omp-expand.c revision 1.2
1/* Expansion pass for OMP directives. Outlines regions of certain OMP 2 directives to separate functions, converts others into explicit calls to the 3 runtime library (libgomp) and so forth 4 5Copyright (C) 2005-2017 Free Software Foundation, Inc. 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify it under 10the terms of the GNU General Public License as published by the Free 11Software Foundation; either version 3, or (at your option) any later 12version. 13 14GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15WARRANTY; without even the implied warranty of MERCHANTABILITY or 16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17for more details. 18 19You should have received a copy of the GNU General Public License 20along with GCC; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "memmodel.h" 27#include "backend.h" 28#include "target.h" 29#include "rtl.h" 30#include "tree.h" 31#include "gimple.h" 32#include "cfghooks.h" 33#include "tree-pass.h" 34#include "ssa.h" 35#include "optabs.h" 36#include "cgraph.h" 37#include "pretty-print.h" 38#include "diagnostic-core.h" 39#include "fold-const.h" 40#include "stor-layout.h" 41#include "cfganal.h" 42#include "internal-fn.h" 43#include "gimplify.h" 44#include "gimple-iterator.h" 45#include "gimplify-me.h" 46#include "gimple-walk.h" 47#include "tree-cfg.h" 48#include "tree-into-ssa.h" 49#include "tree-ssa.h" 50#include "splay-tree.h" 51#include "cfgloop.h" 52#include "omp-general.h" 53#include "omp-offload.h" 54#include "tree-cfgcleanup.h" 55#include "symbol-summary.h" 56#include "cilk.h" 57#include "gomp-constants.h" 58#include "gimple-pretty-print.h" 59#include "hsa-common.h" 60#include "debug.h" 61 62 63/* OMP region information. Every parallel and workshare 64 directive is enclosed between two markers, the OMP_* directive 65 and a corresponding GIMPLE_OMP_RETURN statement. */ 66 67struct omp_region 68{ 69 /* The enclosing region. */ 70 struct omp_region *outer; 71 72 /* First child region. */ 73 struct omp_region *inner; 74 75 /* Next peer region. */ 76 struct omp_region *next; 77 78 /* Block containing the omp directive as its last stmt. */ 79 basic_block entry; 80 81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ 82 basic_block exit; 83 84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ 85 basic_block cont; 86 87 /* If this is a combined parallel+workshare region, this is a list 88 of additional arguments needed by the combined parallel+workshare 89 library call. */ 90 vec<tree, va_gc> *ws_args; 91 92 /* The code for the omp directive of this region. */ 93 enum gimple_code type; 94 95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ 96 enum omp_clause_schedule_kind sched_kind; 97 98 /* Schedule modifiers. */ 99 unsigned char sched_modifiers; 100 101 /* True if this is a combined parallel+workshare region. */ 102 bool is_combined_parallel; 103 104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has 105 a depend clause. */ 106 gomp_ordered *ord_stmt; 107}; 108 109static struct omp_region *root_omp_region; 110static bool omp_any_child_fn_dumped; 111 112static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, 113 bool = false); 114static gphi *find_phi_with_arg_on_edge (tree, edge); 115static void expand_omp (struct omp_region *region); 116 117/* Return true if REGION is a combined parallel+workshare region. */ 118 119static inline bool 120is_combined_parallel (struct omp_region *region) 121{ 122 return region->is_combined_parallel; 123} 124 125/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB 126 is the immediate dominator of PAR_ENTRY_BB, return true if there 127 are no data dependencies that would prevent expanding the parallel 128 directive at PAR_ENTRY_BB as a combined parallel+workshare region. 129 130 When expanding a combined parallel+workshare region, the call to 131 the child function may need additional arguments in the case of 132 GIMPLE_OMP_FOR regions. In some cases, these arguments are 133 computed out of variables passed in from the parent to the child 134 via 'struct .omp_data_s'. For instance: 135 136 #pragma omp parallel for schedule (guided, i * 4) 137 for (j ...) 138 139 Is lowered into: 140 141 # BLOCK 2 (PAR_ENTRY_BB) 142 .omp_data_o.i = i; 143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) 144 145 # BLOCK 3 (WS_ENTRY_BB) 146 .omp_data_i = &.omp_data_o; 147 D.1667 = .omp_data_i->i; 148 D.1598 = D.1667 * 4; 149 #pragma omp for schedule (guided, D.1598) 150 151 When we outline the parallel region, the call to the child function 152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but 153 that value is computed *after* the call site. So, in principle we 154 cannot do the transformation. 155 156 To see whether the code in WS_ENTRY_BB blocks the combined 157 parallel+workshare call, we collect all the variables used in the 158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any 159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined 160 call. 161 162 FIXME. If we had the SSA form built at this point, we could merely 163 hoist the code in block 3 into block 2 and be done with it. But at 164 this point we don't have dataflow information and though we could 165 hack something up here, it is really not worth the aggravation. */ 166 167static bool 168workshare_safe_to_combine_p (basic_block ws_entry_bb) 169{ 170 struct omp_for_data fd; 171 gimple *ws_stmt = last_stmt (ws_entry_bb); 172 173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 174 return true; 175 176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); 177 178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); 179 180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) 181 return false; 182 if (fd.iter_type != long_integer_type_node) 183 return false; 184 185 /* FIXME. We give up too easily here. If any of these arguments 186 are not constants, they will likely involve variables that have 187 been mapped into fields of .omp_data_s for sharing with the child 188 function. With appropriate data flow, it would be possible to 189 see through this. */ 190 if (!is_gimple_min_invariant (fd.loop.n1) 191 || !is_gimple_min_invariant (fd.loop.n2) 192 || !is_gimple_min_invariant (fd.loop.step) 193 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) 194 return false; 195 196 return true; 197} 198 199/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier 200 presence (SIMD_SCHEDULE). */ 201 202static tree 203omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) 204{ 205 if (!simd_schedule) 206 return chunk_size; 207 208 int vf = omp_max_vf (); 209 if (vf == 1) 210 return chunk_size; 211 212 tree type = TREE_TYPE (chunk_size); 213 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, 214 build_int_cst (type, vf - 1)); 215 return fold_build2 (BIT_AND_EXPR, type, chunk_size, 216 build_int_cst (type, -vf)); 217} 218 219/* Collect additional arguments needed to emit a combined 220 parallel+workshare call. WS_STMT is the workshare directive being 221 expanded. */ 222 223static vec<tree, va_gc> * 224get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) 225{ 226 tree t; 227 location_t loc = gimple_location (ws_stmt); 228 vec<tree, va_gc> *ws_args; 229 230 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) 231 { 232 struct omp_for_data fd; 233 tree n1, n2; 234 235 omp_extract_for_data (for_stmt, &fd, NULL); 236 n1 = fd.loop.n1; 237 n2 = fd.loop.n2; 238 239 if (gimple_omp_for_combined_into_p (for_stmt)) 240 { 241 tree innerc 242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt), 243 OMP_CLAUSE__LOOPTEMP_); 244 gcc_assert (innerc); 245 n1 = OMP_CLAUSE_DECL (innerc); 246 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 247 OMP_CLAUSE__LOOPTEMP_); 248 gcc_assert (innerc); 249 n2 = OMP_CLAUSE_DECL (innerc); 250 } 251 252 vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); 253 254 t = fold_convert_loc (loc, long_integer_type_node, n1); 255 ws_args->quick_push (t); 256 257 t = fold_convert_loc (loc, long_integer_type_node, n2); 258 ws_args->quick_push (t); 259 260 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); 261 ws_args->quick_push (t); 262 263 if (fd.chunk_size) 264 { 265 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); 266 t = omp_adjust_chunk_size (t, fd.simd_schedule); 267 ws_args->quick_push (t); 268 } 269 270 return ws_args; 271 } 272 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 273 { 274 /* Number of sections is equal to the number of edges from the 275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to 276 the exit of the sections region. */ 277 basic_block bb = single_succ (gimple_bb (ws_stmt)); 278 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); 279 vec_alloc (ws_args, 1); 280 ws_args->quick_push (t); 281 return ws_args; 282 } 283 284 gcc_unreachable (); 285} 286 287/* Discover whether REGION is a combined parallel+workshare region. */ 288 289static void 290determine_parallel_type (struct omp_region *region) 291{ 292 basic_block par_entry_bb, par_exit_bb; 293 basic_block ws_entry_bb, ws_exit_bb; 294 295 if (region == NULL || region->inner == NULL 296 || region->exit == NULL || region->inner->exit == NULL 297 || region->inner->cont == NULL) 298 return; 299 300 /* We only support parallel+for and parallel+sections. */ 301 if (region->type != GIMPLE_OMP_PARALLEL 302 || (region->inner->type != GIMPLE_OMP_FOR 303 && region->inner->type != GIMPLE_OMP_SECTIONS)) 304 return; 305 306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and 307 WS_EXIT_BB -> PAR_EXIT_BB. */ 308 par_entry_bb = region->entry; 309 par_exit_bb = region->exit; 310 ws_entry_bb = region->inner->entry; 311 ws_exit_bb = region->inner->exit; 312 313 if (single_succ (par_entry_bb) == ws_entry_bb 314 && single_succ (ws_exit_bb) == par_exit_bb 315 && workshare_safe_to_combine_p (ws_entry_bb) 316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) 317 || (last_and_only_stmt (ws_entry_bb) 318 && last_and_only_stmt (par_exit_bb)))) 319 { 320 gimple *par_stmt = last_stmt (par_entry_bb); 321 gimple *ws_stmt = last_stmt (ws_entry_bb); 322 323 if (region->inner->type == GIMPLE_OMP_FOR) 324 { 325 /* If this is a combined parallel loop, we need to determine 326 whether or not to use the combined library calls. There 327 are two cases where we do not apply the transformation: 328 static loops and any kind of ordered loop. In the first 329 case, we already open code the loop so there is no need 330 to do anything else. In the latter case, the combined 331 parallel loop call would still need extra synchronization 332 to implement ordered semantics, so there would not be any 333 gain in using the combined call. */ 334 tree clauses = gimple_omp_for_clauses (ws_stmt); 335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); 336 if (c == NULL 337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) 338 == OMP_CLAUSE_SCHEDULE_STATIC) 339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)) 340 { 341 region->is_combined_parallel = false; 342 region->inner->is_combined_parallel = false; 343 return; 344 } 345 } 346 347 region->is_combined_parallel = true; 348 region->inner->is_combined_parallel = true; 349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt); 350 } 351} 352 353/* Debugging dumps for parallel regions. */ 354void dump_omp_region (FILE *, struct omp_region *, int); 355void debug_omp_region (struct omp_region *); 356void debug_all_omp_regions (void); 357 358/* Dump the parallel region tree rooted at REGION. */ 359 360void 361dump_omp_region (FILE *file, struct omp_region *region, int indent) 362{ 363 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, 364 gimple_code_name[region->type]); 365 366 if (region->inner) 367 dump_omp_region (file, region->inner, indent + 4); 368 369 if (region->cont) 370 { 371 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", 372 region->cont->index); 373 } 374 375 if (region->exit) 376 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", 377 region->exit->index); 378 else 379 fprintf (file, "%*s[no exit marker]\n", indent, ""); 380 381 if (region->next) 382 dump_omp_region (file, region->next, indent); 383} 384 385DEBUG_FUNCTION void 386debug_omp_region (struct omp_region *region) 387{ 388 dump_omp_region (stderr, region, 0); 389} 390 391DEBUG_FUNCTION void 392debug_all_omp_regions (void) 393{ 394 dump_omp_region (stderr, root_omp_region, 0); 395} 396 397/* Create a new parallel region starting at STMT inside region PARENT. */ 398 399static struct omp_region * 400new_omp_region (basic_block bb, enum gimple_code type, 401 struct omp_region *parent) 402{ 403 struct omp_region *region = XCNEW (struct omp_region); 404 405 region->outer = parent; 406 region->entry = bb; 407 region->type = type; 408 409 if (parent) 410 { 411 /* This is a nested region. Add it to the list of inner 412 regions in PARENT. */ 413 region->next = parent->inner; 414 parent->inner = region; 415 } 416 else 417 { 418 /* This is a toplevel region. Add it to the list of toplevel 419 regions in ROOT_OMP_REGION. */ 420 region->next = root_omp_region; 421 root_omp_region = region; 422 } 423 424 return region; 425} 426 427/* Release the memory associated with the region tree rooted at REGION. */ 428 429static void 430free_omp_region_1 (struct omp_region *region) 431{ 432 struct omp_region *i, *n; 433 434 for (i = region->inner; i ; i = n) 435 { 436 n = i->next; 437 free_omp_region_1 (i); 438 } 439 440 free (region); 441} 442 443/* Release the memory for the entire omp region tree. */ 444 445void 446omp_free_regions (void) 447{ 448 struct omp_region *r, *n; 449 for (r = root_omp_region; r ; r = n) 450 { 451 n = r->next; 452 free_omp_region_1 (r); 453 } 454 root_omp_region = NULL; 455} 456 457/* A convenience function to build an empty GIMPLE_COND with just the 458 condition. */ 459 460static gcond * 461gimple_build_cond_empty (tree cond) 462{ 463 enum tree_code pred_code; 464 tree lhs, rhs; 465 466 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); 467 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); 468} 469 470/* Return true if a parallel REGION is within a declare target function or 471 within a target region and is not a part of a gridified target. */ 472 473static bool 474parallel_needs_hsa_kernel_p (struct omp_region *region) 475{ 476 bool indirect = false; 477 for (region = region->outer; region; region = region->outer) 478 { 479 if (region->type == GIMPLE_OMP_PARALLEL) 480 indirect = true; 481 else if (region->type == GIMPLE_OMP_TARGET) 482 { 483 gomp_target *tgt_stmt 484 = as_a <gomp_target *> (last_stmt (region->entry)); 485 486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 487 OMP_CLAUSE__GRIDDIM_)) 488 return indirect; 489 else 490 return true; 491 } 492 } 493 494 if (lookup_attribute ("omp declare target", 495 DECL_ATTRIBUTES (current_function_decl))) 496 return true; 497 498 return false; 499} 500 501/* Build the function calls to GOMP_parallel_start etc to actually 502 generate the parallel operation. REGION is the parallel region 503 being expanded. BB is the block where to insert the code. WS_ARGS 504 will be set if this is a call to a combined parallel+workshare 505 construct, it contains the list of additional arguments needed by 506 the workshare construct. */ 507 508static void 509expand_parallel_call (struct omp_region *region, basic_block bb, 510 gomp_parallel *entry_stmt, 511 vec<tree, va_gc> *ws_args) 512{ 513 tree t, t1, t2, val, cond, c, clauses, flags; 514 gimple_stmt_iterator gsi; 515 gimple *stmt; 516 enum built_in_function start_ix; 517 int start_ix2; 518 location_t clause_loc; 519 vec<tree, va_gc> *args; 520 521 clauses = gimple_omp_parallel_clauses (entry_stmt); 522 523 /* Determine what flavor of GOMP_parallel we will be 524 emitting. */ 525 start_ix = BUILT_IN_GOMP_PARALLEL; 526 if (is_combined_parallel (region)) 527 { 528 switch (region->inner->type) 529 { 530 case GIMPLE_OMP_FOR: 531 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); 532 switch (region->inner->sched_kind) 533 { 534 case OMP_CLAUSE_SCHEDULE_RUNTIME: 535 start_ix2 = 3; 536 break; 537 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 538 case OMP_CLAUSE_SCHEDULE_GUIDED: 539 if (region->inner->sched_modifiers 540 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) 541 { 542 start_ix2 = 3 + region->inner->sched_kind; 543 break; 544 } 545 /* FALLTHRU */ 546 default: 547 start_ix2 = region->inner->sched_kind; 548 break; 549 } 550 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; 551 start_ix = (enum built_in_function) start_ix2; 552 break; 553 case GIMPLE_OMP_SECTIONS: 554 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; 555 break; 556 default: 557 gcc_unreachable (); 558 } 559 } 560 561 /* By default, the value of NUM_THREADS is zero (selected at run time) 562 and there is no conditional. */ 563 cond = NULL_TREE; 564 val = build_int_cst (unsigned_type_node, 0); 565 flags = build_int_cst (unsigned_type_node, 0); 566 567 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 568 if (c) 569 cond = OMP_CLAUSE_IF_EXPR (c); 570 571 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS); 572 if (c) 573 { 574 val = OMP_CLAUSE_NUM_THREADS_EXPR (c); 575 clause_loc = OMP_CLAUSE_LOCATION (c); 576 } 577 else 578 clause_loc = gimple_location (entry_stmt); 579 580 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND); 581 if (c) 582 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); 583 584 /* Ensure 'val' is of the correct type. */ 585 val = fold_convert_loc (clause_loc, unsigned_type_node, val); 586 587 /* If we found the clause 'if (cond)', build either 588 (cond != 0) or (cond ? val : 1u). */ 589 if (cond) 590 { 591 cond = gimple_boolify (cond); 592 593 if (integer_zerop (val)) 594 val = fold_build2_loc (clause_loc, 595 EQ_EXPR, unsigned_type_node, cond, 596 build_int_cst (TREE_TYPE (cond), 0)); 597 else 598 { 599 basic_block cond_bb, then_bb, else_bb; 600 edge e, e_then, e_else; 601 tree tmp_then, tmp_else, tmp_join, tmp_var; 602 603 tmp_var = create_tmp_var (TREE_TYPE (val)); 604 if (gimple_in_ssa_p (cfun)) 605 { 606 tmp_then = make_ssa_name (tmp_var); 607 tmp_else = make_ssa_name (tmp_var); 608 tmp_join = make_ssa_name (tmp_var); 609 } 610 else 611 { 612 tmp_then = tmp_var; 613 tmp_else = tmp_var; 614 tmp_join = tmp_var; 615 } 616 617 e = split_block_after_labels (bb); 618 cond_bb = e->src; 619 bb = e->dest; 620 remove_edge (e); 621 622 then_bb = create_empty_bb (cond_bb); 623 else_bb = create_empty_bb (then_bb); 624 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 625 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 626 627 stmt = gimple_build_cond_empty (cond); 628 gsi = gsi_start_bb (cond_bb); 629 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 630 631 gsi = gsi_start_bb (then_bb); 632 expand_omp_build_assign (&gsi, tmp_then, val, true); 633 634 gsi = gsi_start_bb (else_bb); 635 expand_omp_build_assign (&gsi, tmp_else, 636 build_int_cst (unsigned_type_node, 1), 637 true); 638 639 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 640 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 641 add_bb_to_loop (then_bb, cond_bb->loop_father); 642 add_bb_to_loop (else_bb, cond_bb->loop_father); 643 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); 644 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); 645 646 if (gimple_in_ssa_p (cfun)) 647 { 648 gphi *phi = create_phi_node (tmp_join, bb); 649 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); 650 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); 651 } 652 653 val = tmp_join; 654 } 655 656 gsi = gsi_start_bb (bb); 657 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, 658 false, GSI_CONTINUE_LINKING); 659 } 660 661 gsi = gsi_last_bb (bb); 662 t = gimple_omp_parallel_data_arg (entry_stmt); 663 if (t == NULL) 664 t1 = null_pointer_node; 665 else 666 t1 = build_fold_addr_expr (t); 667 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); 668 t2 = build_fold_addr_expr (child_fndecl); 669 670 vec_alloc (args, 4 + vec_safe_length (ws_args)); 671 args->quick_push (t2); 672 args->quick_push (t1); 673 args->quick_push (val); 674 if (ws_args) 675 args->splice (*ws_args); 676 args->quick_push (flags); 677 678 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 679 builtin_decl_explicit (start_ix), args); 680 681 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 682 false, GSI_CONTINUE_LINKING); 683 684 if (hsa_gen_requested_p () 685 && parallel_needs_hsa_kernel_p (region)) 686 { 687 cgraph_node *child_cnode = cgraph_node::get (child_fndecl); 688 hsa_register_kernel (child_cnode); 689 } 690} 691 692/* Insert a function call whose name is FUNC_NAME with the information from 693 ENTRY_STMT into the basic_block BB. */ 694 695static void 696expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt, 697 vec <tree, va_gc> *ws_args) 698{ 699 tree t, t1, t2; 700 gimple_stmt_iterator gsi; 701 vec <tree, va_gc> *args; 702 703 gcc_assert (vec_safe_length (ws_args) == 2); 704 tree func_name = (*ws_args)[0]; 705 tree grain = (*ws_args)[1]; 706 707 tree clauses = gimple_omp_parallel_clauses (entry_stmt); 708 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_); 709 gcc_assert (count != NULL_TREE); 710 count = OMP_CLAUSE_OPERAND (count, 0); 711 712 gsi = gsi_last_bb (bb); 713 t = gimple_omp_parallel_data_arg (entry_stmt); 714 if (t == NULL) 715 t1 = null_pointer_node; 716 else 717 t1 = build_fold_addr_expr (t); 718 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt)); 719 720 vec_alloc (args, 4); 721 args->quick_push (t2); 722 args->quick_push (t1); 723 args->quick_push (count); 724 args->quick_push (grain); 725 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args); 726 727 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, 728 GSI_CONTINUE_LINKING); 729} 730 731/* Build the function call to GOMP_task to actually 732 generate the task operation. BB is the block where to insert the code. */ 733 734static void 735expand_task_call (struct omp_region *region, basic_block bb, 736 gomp_task *entry_stmt) 737{ 738 tree t1, t2, t3; 739 gimple_stmt_iterator gsi; 740 location_t loc = gimple_location (entry_stmt); 741 742 tree clauses = gimple_omp_task_clauses (entry_stmt); 743 744 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); 745 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED); 746 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE); 747 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 748 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL); 749 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY); 750 751 unsigned int iflags 752 = (untied ? GOMP_TASK_FLAG_UNTIED : 0) 753 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) 754 | (depend ? GOMP_TASK_FLAG_DEPEND : 0); 755 756 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); 757 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; 758 tree num_tasks = NULL_TREE; 759 bool ull = false; 760 if (taskloop_p) 761 { 762 gimple *g = last_stmt (region->outer->entry); 763 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR 764 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); 765 struct omp_for_data fd; 766 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL); 767 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 768 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar), 769 OMP_CLAUSE__LOOPTEMP_); 770 startvar = OMP_CLAUSE_DECL (startvar); 771 endvar = OMP_CLAUSE_DECL (endvar); 772 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); 773 if (fd.loop.cond_code == LT_EXPR) 774 iflags |= GOMP_TASK_FLAG_UP; 775 tree tclauses = gimple_omp_for_clauses (g); 776 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); 777 if (num_tasks) 778 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); 779 else 780 { 781 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); 782 if (num_tasks) 783 { 784 iflags |= GOMP_TASK_FLAG_GRAINSIZE; 785 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); 786 } 787 else 788 num_tasks = integer_zero_node; 789 } 790 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); 791 if (ifc == NULL_TREE) 792 iflags |= GOMP_TASK_FLAG_IF; 793 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) 794 iflags |= GOMP_TASK_FLAG_NOGROUP; 795 ull = fd.iter_type == long_long_unsigned_type_node; 796 } 797 else if (priority) 798 iflags |= GOMP_TASK_FLAG_PRIORITY; 799 800 tree flags = build_int_cst (unsigned_type_node, iflags); 801 802 tree cond = boolean_true_node; 803 if (ifc) 804 { 805 if (taskloop_p) 806 { 807 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 808 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 809 build_int_cst (unsigned_type_node, 810 GOMP_TASK_FLAG_IF), 811 build_int_cst (unsigned_type_node, 0)); 812 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, 813 flags, t); 814 } 815 else 816 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 817 } 818 819 if (finalc) 820 { 821 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); 822 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 823 build_int_cst (unsigned_type_node, 824 GOMP_TASK_FLAG_FINAL), 825 build_int_cst (unsigned_type_node, 0)); 826 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); 827 } 828 if (depend) 829 depend = OMP_CLAUSE_DECL (depend); 830 else 831 depend = build_int_cst (ptr_type_node, 0); 832 if (priority) 833 priority = fold_convert (integer_type_node, 834 OMP_CLAUSE_PRIORITY_EXPR (priority)); 835 else 836 priority = integer_zero_node; 837 838 gsi = gsi_last_bb (bb); 839 tree t = gimple_omp_task_data_arg (entry_stmt); 840 if (t == NULL) 841 t2 = null_pointer_node; 842 else 843 t2 = build_fold_addr_expr_loc (loc, t); 844 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); 845 t = gimple_omp_task_copy_fn (entry_stmt); 846 if (t == NULL) 847 t3 = null_pointer_node; 848 else 849 t3 = build_fold_addr_expr_loc (loc, t); 850 851 if (taskloop_p) 852 t = build_call_expr (ull 853 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) 854 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), 855 11, t1, t2, t3, 856 gimple_omp_task_arg_size (entry_stmt), 857 gimple_omp_task_arg_align (entry_stmt), flags, 858 num_tasks, priority, startvar, endvar, step); 859 else 860 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), 861 9, t1, t2, t3, 862 gimple_omp_task_arg_size (entry_stmt), 863 gimple_omp_task_arg_align (entry_stmt), cond, flags, 864 depend, priority); 865 866 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 867 false, GSI_CONTINUE_LINKING); 868} 869 870/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ 871 872static tree 873vec2chain (vec<tree, va_gc> *v) 874{ 875 tree chain = NULL_TREE, t; 876 unsigned ix; 877 878 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) 879 { 880 DECL_CHAIN (t) = chain; 881 chain = t; 882 } 883 884 return chain; 885} 886 887/* Remove barriers in REGION->EXIT's block. Note that this is only 888 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region 889 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that 890 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be 891 removed. */ 892 893static void 894remove_exit_barrier (struct omp_region *region) 895{ 896 gimple_stmt_iterator gsi; 897 basic_block exit_bb; 898 edge_iterator ei; 899 edge e; 900 gimple *stmt; 901 int any_addressable_vars = -1; 902 903 exit_bb = region->exit; 904 905 /* If the parallel region doesn't return, we don't have REGION->EXIT 906 block at all. */ 907 if (! exit_bb) 908 return; 909 910 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The 911 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of 912 statements that can appear in between are extremely limited -- no 913 memory operations at all. Here, we allow nothing at all, so the 914 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ 915 gsi = gsi_last_bb (exit_bb); 916 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 917 gsi_prev (&gsi); 918 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) 919 return; 920 921 FOR_EACH_EDGE (e, ei, exit_bb->preds) 922 { 923 gsi = gsi_last_bb (e->src); 924 if (gsi_end_p (gsi)) 925 continue; 926 stmt = gsi_stmt (gsi); 927 if (gimple_code (stmt) == GIMPLE_OMP_RETURN 928 && !gimple_omp_return_nowait_p (stmt)) 929 { 930 /* OpenMP 3.0 tasks unfortunately prevent this optimization 931 in many cases. If there could be tasks queued, the barrier 932 might be needed to let the tasks run before some local 933 variable of the parallel that the task uses as shared 934 runs out of scope. The task can be spawned either 935 from within current function (this would be easy to check) 936 or from some function it calls and gets passed an address 937 of such a variable. */ 938 if (any_addressable_vars < 0) 939 { 940 gomp_parallel *parallel_stmt 941 = as_a <gomp_parallel *> (last_stmt (region->entry)); 942 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); 943 tree local_decls, block, decl; 944 unsigned ix; 945 946 any_addressable_vars = 0; 947 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) 948 if (TREE_ADDRESSABLE (decl)) 949 { 950 any_addressable_vars = 1; 951 break; 952 } 953 for (block = gimple_block (stmt); 954 !any_addressable_vars 955 && block 956 && TREE_CODE (block) == BLOCK; 957 block = BLOCK_SUPERCONTEXT (block)) 958 { 959 for (local_decls = BLOCK_VARS (block); 960 local_decls; 961 local_decls = DECL_CHAIN (local_decls)) 962 if (TREE_ADDRESSABLE (local_decls)) 963 { 964 any_addressable_vars = 1; 965 break; 966 } 967 if (block == gimple_block (parallel_stmt)) 968 break; 969 } 970 } 971 if (!any_addressable_vars) 972 gimple_omp_return_set_nowait (stmt); 973 } 974 } 975} 976 977static void 978remove_exit_barriers (struct omp_region *region) 979{ 980 if (region->type == GIMPLE_OMP_PARALLEL) 981 remove_exit_barrier (region); 982 983 if (region->inner) 984 { 985 region = region->inner; 986 remove_exit_barriers (region); 987 while (region->next) 988 { 989 region = region->next; 990 remove_exit_barriers (region); 991 } 992 } 993} 994 995/* Optimize omp_get_thread_num () and omp_get_num_threads () 996 calls. These can't be declared as const functions, but 997 within one parallel body they are constant, so they can be 998 transformed there into __builtin_omp_get_{thread_num,num_threads} () 999 which are declared const. Similarly for task body, except 1000 that in untied task omp_get_thread_num () can change at any task 1001 scheduling point. */ 1002 1003static void 1004optimize_omp_library_calls (gimple *entry_stmt) 1005{ 1006 basic_block bb; 1007 gimple_stmt_iterator gsi; 1008 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1009 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); 1010 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1011 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); 1012 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1013 && omp_find_clause (gimple_omp_task_clauses (entry_stmt), 1014 OMP_CLAUSE_UNTIED) != NULL); 1015 1016 FOR_EACH_BB_FN (bb, cfun) 1017 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1018 { 1019 gimple *call = gsi_stmt (gsi); 1020 tree decl; 1021 1022 if (is_gimple_call (call) 1023 && (decl = gimple_call_fndecl (call)) 1024 && DECL_EXTERNAL (decl) 1025 && TREE_PUBLIC (decl) 1026 && DECL_INITIAL (decl) == NULL) 1027 { 1028 tree built_in; 1029 1030 if (DECL_NAME (decl) == thr_num_id) 1031 { 1032 /* In #pragma omp task untied omp_get_thread_num () can change 1033 during the execution of the task region. */ 1034 if (untied_task) 1035 continue; 1036 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1037 } 1038 else if (DECL_NAME (decl) == num_thr_id) 1039 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1040 else 1041 continue; 1042 1043 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) 1044 || gimple_call_num_args (call) != 0) 1045 continue; 1046 1047 if (flag_exceptions && !TREE_NOTHROW (decl)) 1048 continue; 1049 1050 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE 1051 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), 1052 TREE_TYPE (TREE_TYPE (built_in)))) 1053 continue; 1054 1055 gimple_call_set_fndecl (call, built_in); 1056 } 1057 } 1058} 1059 1060/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be 1061 regimplified. */ 1062 1063static tree 1064expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) 1065{ 1066 tree t = *tp; 1067 1068 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ 1069 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) 1070 return t; 1071 1072 if (TREE_CODE (t) == ADDR_EXPR) 1073 recompute_tree_invariant_for_addr_expr (t); 1074 1075 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 1076 return NULL_TREE; 1077} 1078 1079/* Prepend or append TO = FROM assignment before or after *GSI_P. */ 1080 1081static void 1082expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, 1083 bool after) 1084{ 1085 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); 1086 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, 1087 !after, after ? GSI_CONTINUE_LINKING 1088 : GSI_SAME_STMT); 1089 gimple *stmt = gimple_build_assign (to, from); 1090 if (after) 1091 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); 1092 else 1093 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); 1094 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) 1095 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) 1096 { 1097 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1098 gimple_regimplify_operands (stmt, &gsi); 1099 } 1100} 1101 1102/* Expand the OpenMP parallel or task directive starting at REGION. */ 1103 1104static void 1105expand_omp_taskreg (struct omp_region *region) 1106{ 1107 basic_block entry_bb, exit_bb, new_bb; 1108 struct function *child_cfun; 1109 tree child_fn, block, t; 1110 gimple_stmt_iterator gsi; 1111 gimple *entry_stmt, *stmt; 1112 edge e; 1113 vec<tree, va_gc> *ws_args; 1114 1115 entry_stmt = last_stmt (region->entry); 1116 child_fn = gimple_omp_taskreg_child_fn (entry_stmt); 1117 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 1118 1119 entry_bb = region->entry; 1120 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) 1121 exit_bb = region->cont; 1122 else 1123 exit_bb = region->exit; 1124 1125 bool is_cilk_for 1126 = (flag_cilkplus 1127 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL 1128 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt), 1129 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE); 1130 1131 if (is_cilk_for) 1132 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for, 1133 and the inner statement contains the name of the built-in function 1134 and grain. */ 1135 ws_args = region->inner->ws_args; 1136 else if (is_combined_parallel (region)) 1137 ws_args = region->ws_args; 1138 else 1139 ws_args = NULL; 1140 1141 if (child_cfun->cfg) 1142 { 1143 /* Due to inlining, it may happen that we have already outlined 1144 the region, in which case all we need to do is make the 1145 sub-graph unreachable and emit the parallel call. */ 1146 edge entry_succ_e, exit_succ_e; 1147 1148 entry_succ_e = single_succ_edge (entry_bb); 1149 1150 gsi = gsi_last_bb (entry_bb); 1151 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL 1152 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); 1153 gsi_remove (&gsi, true); 1154 1155 new_bb = entry_bb; 1156 if (exit_bb) 1157 { 1158 exit_succ_e = single_succ_edge (exit_bb); 1159 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); 1160 } 1161 remove_edge_and_dominated_blocks (entry_succ_e); 1162 } 1163 else 1164 { 1165 unsigned srcidx, dstidx, num; 1166 1167 /* If the parallel region needs data sent from the parent 1168 function, then the very first statement (except possible 1169 tree profile counter updates) of the parallel body 1170 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 1171 &.OMP_DATA_O is passed as an argument to the child function, 1172 we need to replace it with the argument as seen by the child 1173 function. 1174 1175 In most cases, this will end up being the identity assignment 1176 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had 1177 a function call that has been inlined, the original PARM_DECL 1178 .OMP_DATA_I may have been converted into a different local 1179 variable. In which case, we need to keep the assignment. */ 1180 if (gimple_omp_taskreg_data_arg (entry_stmt)) 1181 { 1182 basic_block entry_succ_bb 1183 = single_succ_p (entry_bb) ? single_succ (entry_bb) 1184 : FALLTHRU_EDGE (entry_bb)->dest; 1185 tree arg; 1186 gimple *parcopy_stmt = NULL; 1187 1188 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 1189 { 1190 gimple *stmt; 1191 1192 gcc_assert (!gsi_end_p (gsi)); 1193 stmt = gsi_stmt (gsi); 1194 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1195 continue; 1196 1197 if (gimple_num_ops (stmt) == 2) 1198 { 1199 tree arg = gimple_assign_rhs1 (stmt); 1200 1201 /* We're ignore the subcode because we're 1202 effectively doing a STRIP_NOPS. */ 1203 1204 if (TREE_CODE (arg) == ADDR_EXPR 1205 && TREE_OPERAND (arg, 0) 1206 == gimple_omp_taskreg_data_arg (entry_stmt)) 1207 { 1208 parcopy_stmt = stmt; 1209 break; 1210 } 1211 } 1212 } 1213 1214 gcc_assert (parcopy_stmt != NULL); 1215 arg = DECL_ARGUMENTS (child_fn); 1216 1217 if (!gimple_in_ssa_p (cfun)) 1218 { 1219 if (gimple_assign_lhs (parcopy_stmt) == arg) 1220 gsi_remove (&gsi, true); 1221 else 1222 { 1223 /* ?? Is setting the subcode really necessary ?? */ 1224 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); 1225 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1226 } 1227 } 1228 else 1229 { 1230 tree lhs = gimple_assign_lhs (parcopy_stmt); 1231 gcc_assert (SSA_NAME_VAR (lhs) == arg); 1232 /* We'd like to set the rhs to the default def in the child_fn, 1233 but it's too early to create ssa names in the child_fn. 1234 Instead, we set the rhs to the parm. In 1235 move_sese_region_to_fn, we introduce a default def for the 1236 parm, map the parm to it's default def, and once we encounter 1237 this stmt, replace the parm with the default def. */ 1238 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1239 update_stmt (parcopy_stmt); 1240 } 1241 } 1242 1243 /* Declare local variables needed in CHILD_CFUN. */ 1244 block = DECL_INITIAL (child_fn); 1245 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 1246 /* The gimplifier could record temporaries in parallel/task block 1247 rather than in containing function's local_decls chain, 1248 which would mean cgraph missed finalizing them. Do it now. */ 1249 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 1250 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 1251 varpool_node::finalize_decl (t); 1252 DECL_SAVED_TREE (child_fn) = NULL; 1253 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 1254 gimple_set_body (child_fn, NULL); 1255 TREE_USED (block) = 1; 1256 1257 /* Reset DECL_CONTEXT on function arguments. */ 1258 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 1259 DECL_CONTEXT (t) = child_fn; 1260 1261 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, 1262 so that it can be moved to the child function. */ 1263 gsi = gsi_last_bb (entry_bb); 1264 stmt = gsi_stmt (gsi); 1265 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL 1266 || gimple_code (stmt) == GIMPLE_OMP_TASK)); 1267 e = split_block (entry_bb, stmt); 1268 gsi_remove (&gsi, true); 1269 entry_bb = e->dest; 1270 edge e2 = NULL; 1271 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) 1272 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 1273 else 1274 { 1275 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); 1276 gcc_assert (e2->dest == region->exit); 1277 remove_edge (BRANCH_EDGE (entry_bb)); 1278 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); 1279 gsi = gsi_last_bb (region->exit); 1280 gcc_assert (!gsi_end_p (gsi) 1281 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1282 gsi_remove (&gsi, true); 1283 } 1284 1285 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ 1286 if (exit_bb) 1287 { 1288 gsi = gsi_last_bb (exit_bb); 1289 gcc_assert (!gsi_end_p (gsi) 1290 && (gimple_code (gsi_stmt (gsi)) 1291 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); 1292 stmt = gimple_build_return (NULL); 1293 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 1294 gsi_remove (&gsi, true); 1295 } 1296 1297 /* Move the parallel region into CHILD_CFUN. */ 1298 1299 if (gimple_in_ssa_p (cfun)) 1300 { 1301 init_tree_ssa (child_cfun); 1302 init_ssa_operands (child_cfun); 1303 child_cfun->gimple_df->in_ssa_p = true; 1304 block = NULL_TREE; 1305 } 1306 else 1307 block = gimple_block (entry_stmt); 1308 1309 /* Make sure to generate early debug for the function before 1310 outlining anything. */ 1311 if (! gimple_in_ssa_p (cfun)) 1312 (*debug_hooks->early_global_decl) (cfun->decl); 1313 1314 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 1315 if (exit_bb) 1316 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 1317 if (e2) 1318 { 1319 basic_block dest_bb = e2->dest; 1320 if (!exit_bb) 1321 make_edge (new_bb, dest_bb, EDGE_FALLTHRU); 1322 remove_edge (e2); 1323 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); 1324 } 1325 /* When the OMP expansion process cannot guarantee an up-to-date 1326 loop tree arrange for the child function to fixup loops. */ 1327 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1328 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 1329 1330 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 1331 num = vec_safe_length (child_cfun->local_decls); 1332 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 1333 { 1334 t = (*child_cfun->local_decls)[srcidx]; 1335 if (DECL_CONTEXT (t) == cfun->decl) 1336 continue; 1337 if (srcidx != dstidx) 1338 (*child_cfun->local_decls)[dstidx] = t; 1339 dstidx++; 1340 } 1341 if (dstidx != num) 1342 vec_safe_truncate (child_cfun->local_decls, dstidx); 1343 1344 /* Inform the callgraph about the new function. */ 1345 child_cfun->curr_properties = cfun->curr_properties; 1346 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 1347 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 1348 cgraph_node *node = cgraph_node::get_create (child_fn); 1349 node->parallelized_function = 1; 1350 cgraph_node::add_new_function (child_fn, true); 1351 1352 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 1353 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 1354 1355 /* Fix the callgraph edges for child_cfun. Those for cfun will be 1356 fixed in a following pass. */ 1357 push_cfun (child_cfun); 1358 if (need_asm) 1359 assign_assembler_name_if_needed (child_fn); 1360 1361 if (optimize) 1362 optimize_omp_library_calls (entry_stmt); 1363 cgraph_edge::rebuild_edges (); 1364 1365 /* Some EH regions might become dead, see PR34608. If 1366 pass_cleanup_cfg isn't the first pass to happen with the 1367 new child, these dead EH edges might cause problems. 1368 Clean them up now. */ 1369 if (flag_exceptions) 1370 { 1371 basic_block bb; 1372 bool changed = false; 1373 1374 FOR_EACH_BB_FN (bb, cfun) 1375 changed |= gimple_purge_dead_eh_edges (bb); 1376 if (changed) 1377 cleanup_tree_cfg (); 1378 } 1379 if (gimple_in_ssa_p (cfun)) 1380 update_ssa (TODO_update_ssa); 1381 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1382 verify_loop_structure (); 1383 pop_cfun (); 1384 1385 if (dump_file && !gimple_in_ssa_p (cfun)) 1386 { 1387 omp_any_child_fn_dumped = true; 1388 dump_function_header (dump_file, child_fn, dump_flags); 1389 dump_function_to_file (child_fn, dump_file, dump_flags); 1390 } 1391 } 1392 1393 /* Emit a library call to launch the children threads. */ 1394 if (is_cilk_for) 1395 expand_cilk_for_call (new_bb, 1396 as_a <gomp_parallel *> (entry_stmt), ws_args); 1397 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) 1398 expand_parallel_call (region, new_bb, 1399 as_a <gomp_parallel *> (entry_stmt), ws_args); 1400 else 1401 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); 1402 if (gimple_in_ssa_p (cfun)) 1403 update_ssa (TODO_update_ssa_only_virtuals); 1404} 1405 1406/* Information about members of an OpenACC collapsed loop nest. */ 1407 1408struct oacc_collapse 1409{ 1410 tree base; /* Base value. */ 1411 tree iters; /* Number of steps. */ 1412 tree step; /* Step size. */ 1413 tree tile; /* Tile increment (if tiled). */ 1414 tree outer; /* Tile iterator var. */ 1415}; 1416 1417/* Helper for expand_oacc_for. Determine collapsed loop information. 1418 Fill in COUNTS array. Emit any initialization code before GSI. 1419 Return the calculated outer loop bound of BOUND_TYPE. */ 1420 1421static tree 1422expand_oacc_collapse_init (const struct omp_for_data *fd, 1423 gimple_stmt_iterator *gsi, 1424 oacc_collapse *counts, tree bound_type, 1425 location_t loc) 1426{ 1427 tree tiling = fd->tiling; 1428 tree total = build_int_cst (bound_type, 1); 1429 int ix; 1430 1431 gcc_assert (integer_onep (fd->loop.step)); 1432 gcc_assert (integer_zerop (fd->loop.n1)); 1433 1434 /* When tiling, the first operand of the tile clause applies to the 1435 innermost loop, and we work outwards from there. Seems 1436 backwards, but whatever. */ 1437 for (ix = fd->collapse; ix--;) 1438 { 1439 const omp_for_data_loop *loop = &fd->loops[ix]; 1440 1441 tree iter_type = TREE_TYPE (loop->v); 1442 tree diff_type = iter_type; 1443 tree plus_type = iter_type; 1444 1445 gcc_assert (loop->cond_code == fd->loop.cond_code); 1446 1447 if (POINTER_TYPE_P (iter_type)) 1448 plus_type = sizetype; 1449 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 1450 diff_type = signed_type_for (diff_type); 1451 1452 if (tiling) 1453 { 1454 tree num = build_int_cst (integer_type_node, fd->collapse); 1455 tree loop_no = build_int_cst (integer_type_node, ix); 1456 tree tile = TREE_VALUE (tiling); 1457 gcall *call 1458 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile, 1459 /* gwv-outer=*/integer_zero_node, 1460 /* gwv-inner=*/integer_zero_node); 1461 1462 counts[ix].outer = create_tmp_var (iter_type, ".outer"); 1463 counts[ix].tile = create_tmp_var (diff_type, ".tile"); 1464 gimple_call_set_lhs (call, counts[ix].tile); 1465 gimple_set_location (call, loc); 1466 gsi_insert_before (gsi, call, GSI_SAME_STMT); 1467 1468 tiling = TREE_CHAIN (tiling); 1469 } 1470 else 1471 { 1472 counts[ix].tile = NULL; 1473 counts[ix].outer = loop->v; 1474 } 1475 1476 tree b = loop->n1; 1477 tree e = loop->n2; 1478 tree s = loop->step; 1479 bool up = loop->cond_code == LT_EXPR; 1480 tree dir = build_int_cst (diff_type, up ? +1 : -1); 1481 bool negating; 1482 tree expr; 1483 1484 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, 1485 true, GSI_SAME_STMT); 1486 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, 1487 true, GSI_SAME_STMT); 1488 1489 /* Convert the step, avoiding possible unsigned->signed overflow. */ 1490 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 1491 if (negating) 1492 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 1493 s = fold_convert (diff_type, s); 1494 if (negating) 1495 s = fold_build1 (NEGATE_EXPR, diff_type, s); 1496 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, 1497 true, GSI_SAME_STMT); 1498 1499 /* Determine the range, avoiding possible unsigned->signed overflow. */ 1500 negating = !up && TYPE_UNSIGNED (iter_type); 1501 expr = fold_build2 (MINUS_EXPR, plus_type, 1502 fold_convert (plus_type, negating ? b : e), 1503 fold_convert (plus_type, negating ? e : b)); 1504 expr = fold_convert (diff_type, expr); 1505 if (negating) 1506 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 1507 tree range = force_gimple_operand_gsi 1508 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); 1509 1510 /* Determine number of iterations. */ 1511 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 1512 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 1513 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 1514 1515 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, 1516 true, GSI_SAME_STMT); 1517 1518 counts[ix].base = b; 1519 counts[ix].iters = iters; 1520 counts[ix].step = s; 1521 1522 total = fold_build2 (MULT_EXPR, bound_type, total, 1523 fold_convert (bound_type, iters)); 1524 } 1525 1526 return total; 1527} 1528 1529/* Emit initializers for collapsed loop members. INNER is true if 1530 this is for the element loop of a TILE. IVAR is the outer 1531 loop iteration variable, from which collapsed loop iteration values 1532 are calculated. COUNTS array has been initialized by 1533 expand_oacc_collapse_inits. */ 1534 1535static void 1536expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, 1537 gimple_stmt_iterator *gsi, 1538 const oacc_collapse *counts, tree ivar) 1539{ 1540 tree ivar_type = TREE_TYPE (ivar); 1541 1542 /* The most rapidly changing iteration variable is the innermost 1543 one. */ 1544 for (int ix = fd->collapse; ix--;) 1545 { 1546 const omp_for_data_loop *loop = &fd->loops[ix]; 1547 const oacc_collapse *collapse = &counts[ix]; 1548 tree v = inner ? loop->v : collapse->outer; 1549 tree iter_type = TREE_TYPE (v); 1550 tree diff_type = TREE_TYPE (collapse->step); 1551 tree plus_type = iter_type; 1552 enum tree_code plus_code = PLUS_EXPR; 1553 tree expr; 1554 1555 if (POINTER_TYPE_P (iter_type)) 1556 { 1557 plus_code = POINTER_PLUS_EXPR; 1558 plus_type = sizetype; 1559 } 1560 1561 expr = ivar; 1562 if (ix) 1563 { 1564 tree mod = fold_convert (ivar_type, collapse->iters); 1565 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod); 1566 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod); 1567 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE, 1568 true, GSI_SAME_STMT); 1569 } 1570 1571 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), 1572 collapse->step); 1573 expr = fold_build2 (plus_code, iter_type, 1574 inner ? collapse->outer : collapse->base, 1575 fold_convert (plus_type, expr)); 1576 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, 1577 true, GSI_SAME_STMT); 1578 gassign *ass = gimple_build_assign (v, expr); 1579 gsi_insert_before (gsi, ass, GSI_SAME_STMT); 1580 } 1581} 1582 1583/* Helper function for expand_omp_{for_*,simd}. If this is the outermost 1584 of the combined collapse > 1 loop constructs, generate code like: 1585 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; 1586 if (cond3 is <) 1587 adj = STEP3 - 1; 1588 else 1589 adj = STEP3 + 1; 1590 count3 = (adj + N32 - N31) / STEP3; 1591 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; 1592 if (cond2 is <) 1593 adj = STEP2 - 1; 1594 else 1595 adj = STEP2 + 1; 1596 count2 = (adj + N22 - N21) / STEP2; 1597 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; 1598 if (cond1 is <) 1599 adj = STEP1 - 1; 1600 else 1601 adj = STEP1 + 1; 1602 count1 = (adj + N12 - N11) / STEP1; 1603 count = count1 * count2 * count3; 1604 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: 1605 count = 0; 1606 and set ZERO_ITER_BB to that bb. If this isn't the outermost 1607 of the combined loop constructs, just initialize COUNTS array 1608 from the _looptemp_ clauses. */ 1609 1610/* NOTE: It *could* be better to moosh all of the BBs together, 1611 creating one larger BB with all the computation and the unexpected 1612 jump at the end. I.e. 1613 1614 bool zero3, zero2, zero1, zero; 1615 1616 zero3 = N32 c3 N31; 1617 count3 = (N32 - N31) /[cl] STEP3; 1618 zero2 = N22 c2 N21; 1619 count2 = (N22 - N21) /[cl] STEP2; 1620 zero1 = N12 c1 N11; 1621 count1 = (N12 - N11) /[cl] STEP1; 1622 zero = zero3 || zero2 || zero1; 1623 count = count1 * count2 * count3; 1624 if (__builtin_expect(zero, false)) goto zero_iter_bb; 1625 1626 After all, we expect the zero=false, and thus we expect to have to 1627 evaluate all of the comparison expressions, so short-circuiting 1628 oughtn't be a win. Since the condition isn't protecting a 1629 denominator, we're not concerned about divide-by-zero, so we can 1630 fully evaluate count even if a numerator turned out to be wrong. 1631 1632 It seems like putting this all together would create much better 1633 scheduling opportunities, and less pressure on the chip's branch 1634 predictor. */ 1635 1636static void 1637expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1638 basic_block &entry_bb, tree *counts, 1639 basic_block &zero_iter1_bb, int &first_zero_iter1, 1640 basic_block &zero_iter2_bb, int &first_zero_iter2, 1641 basic_block &l2_dom_bb) 1642{ 1643 tree t, type = TREE_TYPE (fd->loop.v); 1644 edge e, ne; 1645 int i; 1646 1647 /* Collapsed loops need work for expansion into SSA form. */ 1648 gcc_assert (!gimple_in_ssa_p (cfun)); 1649 1650 if (gimple_omp_for_combined_into_p (fd->for_stmt) 1651 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 1652 { 1653 gcc_assert (fd->ordered == 0); 1654 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1655 isn't supposed to be handled, as the inner loop doesn't 1656 use it. */ 1657 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 1658 OMP_CLAUSE__LOOPTEMP_); 1659 gcc_assert (innerc); 1660 for (i = 0; i < fd->collapse; i++) 1661 { 1662 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1663 OMP_CLAUSE__LOOPTEMP_); 1664 gcc_assert (innerc); 1665 if (i) 1666 counts[i] = OMP_CLAUSE_DECL (innerc); 1667 else 1668 counts[0] = NULL_TREE; 1669 } 1670 return; 1671 } 1672 1673 for (i = fd->collapse; i < fd->ordered; i++) 1674 { 1675 tree itype = TREE_TYPE (fd->loops[i].v); 1676 counts[i] = NULL_TREE; 1677 t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1678 fold_convert (itype, fd->loops[i].n1), 1679 fold_convert (itype, fd->loops[i].n2)); 1680 if (t && integer_zerop (t)) 1681 { 1682 for (i = fd->collapse; i < fd->ordered; i++) 1683 counts[i] = build_int_cst (type, 0); 1684 break; 1685 } 1686 } 1687 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) 1688 { 1689 tree itype = TREE_TYPE (fd->loops[i].v); 1690 1691 if (i >= fd->collapse && counts[i]) 1692 continue; 1693 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) 1694 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1695 fold_convert (itype, fd->loops[i].n1), 1696 fold_convert (itype, fd->loops[i].n2))) 1697 == NULL_TREE || !integer_onep (t))) 1698 { 1699 gcond *cond_stmt; 1700 tree n1, n2; 1701 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 1702 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, 1703 true, GSI_SAME_STMT); 1704 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 1705 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, 1706 true, GSI_SAME_STMT); 1707 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2, 1708 NULL_TREE, NULL_TREE); 1709 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT); 1710 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 1711 expand_omp_regimplify_p, NULL, NULL) 1712 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 1713 expand_omp_regimplify_p, NULL, NULL)) 1714 { 1715 *gsi = gsi_for_stmt (cond_stmt); 1716 gimple_regimplify_operands (cond_stmt, gsi); 1717 } 1718 e = split_block (entry_bb, cond_stmt); 1719 basic_block &zero_iter_bb 1720 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; 1721 int &first_zero_iter 1722 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; 1723 if (zero_iter_bb == NULL) 1724 { 1725 gassign *assign_stmt; 1726 first_zero_iter = i; 1727 zero_iter_bb = create_empty_bb (entry_bb); 1728 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); 1729 *gsi = gsi_after_labels (zero_iter_bb); 1730 if (i < fd->collapse) 1731 assign_stmt = gimple_build_assign (fd->loop.n2, 1732 build_zero_cst (type)); 1733 else 1734 { 1735 counts[i] = create_tmp_reg (type, ".count"); 1736 assign_stmt 1737 = gimple_build_assign (counts[i], build_zero_cst (type)); 1738 } 1739 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); 1740 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, 1741 entry_bb); 1742 } 1743 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); 1744 ne->probability = REG_BR_PROB_BASE / 2000 - 1; 1745 e->flags = EDGE_TRUE_VALUE; 1746 e->probability = REG_BR_PROB_BASE - ne->probability; 1747 if (l2_dom_bb == NULL) 1748 l2_dom_bb = entry_bb; 1749 entry_bb = e->dest; 1750 *gsi = gsi_last_bb (entry_bb); 1751 } 1752 1753 if (POINTER_TYPE_P (itype)) 1754 itype = signed_type_for (itype); 1755 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR 1756 ? -1 : 1)); 1757 t = fold_build2 (PLUS_EXPR, itype, 1758 fold_convert (itype, fd->loops[i].step), t); 1759 t = fold_build2 (PLUS_EXPR, itype, t, 1760 fold_convert (itype, fd->loops[i].n2)); 1761 t = fold_build2 (MINUS_EXPR, itype, t, 1762 fold_convert (itype, fd->loops[i].n1)); 1763 /* ?? We could probably use CEIL_DIV_EXPR instead of 1764 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't 1765 generate the same code in the end because generically we 1766 don't know that the values involved must be negative for 1767 GT?? */ 1768 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 1769 t = fold_build2 (TRUNC_DIV_EXPR, itype, 1770 fold_build1 (NEGATE_EXPR, itype, t), 1771 fold_build1 (NEGATE_EXPR, itype, 1772 fold_convert (itype, 1773 fd->loops[i].step))); 1774 else 1775 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, 1776 fold_convert (itype, fd->loops[i].step)); 1777 t = fold_convert (type, t); 1778 if (TREE_CODE (t) == INTEGER_CST) 1779 counts[i] = t; 1780 else 1781 { 1782 if (i < fd->collapse || i != first_zero_iter2) 1783 counts[i] = create_tmp_reg (type, ".count"); 1784 expand_omp_build_assign (gsi, counts[i], t); 1785 } 1786 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) 1787 { 1788 if (i == 0) 1789 t = counts[0]; 1790 else 1791 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); 1792 expand_omp_build_assign (gsi, fd->loop.n2, t); 1793 } 1794 } 1795} 1796 1797/* Helper function for expand_omp_{for_*,simd}. Generate code like: 1798 T = V; 1799 V3 = N31 + (T % count3) * STEP3; 1800 T = T / count3; 1801 V2 = N21 + (T % count2) * STEP2; 1802 T = T / count2; 1803 V1 = N11 + T * STEP1; 1804 if this loop doesn't have an inner loop construct combined with it. 1805 If it does have an inner loop construct combined with it and the 1806 iteration count isn't known constant, store values from counts array 1807 into its _looptemp_ temporaries instead. */ 1808 1809static void 1810expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1811 tree *counts, gimple *inner_stmt, tree startvar) 1812{ 1813 int i; 1814 if (gimple_omp_for_combined_p (fd->for_stmt)) 1815 { 1816 /* If fd->loop.n2 is constant, then no propagation of the counts 1817 is needed, they are constant. */ 1818 if (TREE_CODE (fd->loop.n2) == INTEGER_CST) 1819 return; 1820 1821 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR 1822 ? gimple_omp_taskreg_clauses (inner_stmt) 1823 : gimple_omp_for_clauses (inner_stmt); 1824 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1825 isn't supposed to be handled, as the inner loop doesn't 1826 use it. */ 1827 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 1828 gcc_assert (innerc); 1829 for (i = 0; i < fd->collapse; i++) 1830 { 1831 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1832 OMP_CLAUSE__LOOPTEMP_); 1833 gcc_assert (innerc); 1834 if (i) 1835 { 1836 tree tem = OMP_CLAUSE_DECL (innerc); 1837 tree t = fold_convert (TREE_TYPE (tem), counts[i]); 1838 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 1839 false, GSI_CONTINUE_LINKING); 1840 gassign *stmt = gimple_build_assign (tem, t); 1841 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1842 } 1843 } 1844 return; 1845 } 1846 1847 tree type = TREE_TYPE (fd->loop.v); 1848 tree tem = create_tmp_reg (type, ".tem"); 1849 gassign *stmt = gimple_build_assign (tem, startvar); 1850 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1851 1852 for (i = fd->collapse - 1; i >= 0; i--) 1853 { 1854 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; 1855 itype = vtype; 1856 if (POINTER_TYPE_P (vtype)) 1857 itype = signed_type_for (vtype); 1858 if (i != 0) 1859 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); 1860 else 1861 t = tem; 1862 t = fold_convert (itype, t); 1863 t = fold_build2 (MULT_EXPR, itype, t, 1864 fold_convert (itype, fd->loops[i].step)); 1865 if (POINTER_TYPE_P (vtype)) 1866 t = fold_build_pointer_plus (fd->loops[i].n1, t); 1867 else 1868 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); 1869 t = force_gimple_operand_gsi (gsi, t, 1870 DECL_P (fd->loops[i].v) 1871 && TREE_ADDRESSABLE (fd->loops[i].v), 1872 NULL_TREE, false, 1873 GSI_CONTINUE_LINKING); 1874 stmt = gimple_build_assign (fd->loops[i].v, t); 1875 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1876 if (i != 0) 1877 { 1878 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); 1879 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 1880 false, GSI_CONTINUE_LINKING); 1881 stmt = gimple_build_assign (tem, t); 1882 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1883 } 1884 } 1885} 1886 1887/* Helper function for expand_omp_for_*. Generate code like: 1888 L10: 1889 V3 += STEP3; 1890 if (V3 cond3 N32) goto BODY_BB; else goto L11; 1891 L11: 1892 V3 = N31; 1893 V2 += STEP2; 1894 if (V2 cond2 N22) goto BODY_BB; else goto L12; 1895 L12: 1896 V2 = N21; 1897 V1 += STEP1; 1898 goto BODY_BB; */ 1899 1900static basic_block 1901extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, 1902 basic_block body_bb) 1903{ 1904 basic_block last_bb, bb, collapse_bb = NULL; 1905 int i; 1906 gimple_stmt_iterator gsi; 1907 edge e; 1908 tree t; 1909 gimple *stmt; 1910 1911 last_bb = cont_bb; 1912 for (i = fd->collapse - 1; i >= 0; i--) 1913 { 1914 tree vtype = TREE_TYPE (fd->loops[i].v); 1915 1916 bb = create_empty_bb (last_bb); 1917 add_bb_to_loop (bb, last_bb->loop_father); 1918 gsi = gsi_start_bb (bb); 1919 1920 if (i < fd->collapse - 1) 1921 { 1922 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); 1923 e->probability = REG_BR_PROB_BASE / 8; 1924 1925 t = fd->loops[i + 1].n1; 1926 t = force_gimple_operand_gsi (&gsi, t, 1927 DECL_P (fd->loops[i + 1].v) 1928 && TREE_ADDRESSABLE (fd->loops[i 1929 + 1].v), 1930 NULL_TREE, false, 1931 GSI_CONTINUE_LINKING); 1932 stmt = gimple_build_assign (fd->loops[i + 1].v, t); 1933 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 1934 } 1935 else 1936 collapse_bb = bb; 1937 1938 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); 1939 1940 if (POINTER_TYPE_P (vtype)) 1941 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); 1942 else 1943 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); 1944 t = force_gimple_operand_gsi (&gsi, t, 1945 DECL_P (fd->loops[i].v) 1946 && TREE_ADDRESSABLE (fd->loops[i].v), 1947 NULL_TREE, false, GSI_CONTINUE_LINKING); 1948 stmt = gimple_build_assign (fd->loops[i].v, t); 1949 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 1950 1951 if (i > 0) 1952 { 1953 t = fd->loops[i].n2; 1954 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 1955 false, GSI_CONTINUE_LINKING); 1956 tree v = fd->loops[i].v; 1957 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 1958 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 1959 false, GSI_CONTINUE_LINKING); 1960 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); 1961 stmt = gimple_build_cond_empty (t); 1962 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 1963 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)), 1964 expand_omp_regimplify_p, NULL, NULL) 1965 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)), 1966 expand_omp_regimplify_p, NULL, NULL)) 1967 gimple_regimplify_operands (stmt, &gsi); 1968 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); 1969 e->probability = REG_BR_PROB_BASE * 7 / 8; 1970 } 1971 else 1972 make_edge (bb, body_bb, EDGE_FALLTHRU); 1973 last_bb = bb; 1974 } 1975 1976 return collapse_bb; 1977} 1978 1979/* Expand #pragma omp ordered depend(source). */ 1980 1981static void 1982expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 1983 tree *counts, location_t loc) 1984{ 1985 enum built_in_function source_ix 1986 = fd->iter_type == long_integer_type_node 1987 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; 1988 gimple *g 1989 = gimple_build_call (builtin_decl_explicit (source_ix), 1, 1990 build_fold_addr_expr (counts[fd->ordered])); 1991 gimple_set_location (g, loc); 1992 gsi_insert_before (gsi, g, GSI_SAME_STMT); 1993} 1994 1995/* Expand a single depend from #pragma omp ordered depend(sink:...). */ 1996 1997static void 1998expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 1999 tree *counts, tree c, location_t loc) 2000{ 2001 auto_vec<tree, 10> args; 2002 enum built_in_function sink_ix 2003 = fd->iter_type == long_integer_type_node 2004 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; 2005 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; 2006 int i; 2007 gimple_stmt_iterator gsi2 = *gsi; 2008 bool warned_step = false; 2009 2010 for (i = 0; i < fd->ordered; i++) 2011 { 2012 tree step = NULL_TREE; 2013 off = TREE_PURPOSE (deps); 2014 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2015 { 2016 step = TREE_OPERAND (off, 1); 2017 off = TREE_OPERAND (off, 0); 2018 } 2019 if (!integer_zerop (off)) 2020 { 2021 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2022 || fd->loops[i].cond_code == GT_EXPR); 2023 bool forward = fd->loops[i].cond_code == LT_EXPR; 2024 if (step) 2025 { 2026 /* Non-simple Fortran DO loops. If step is variable, 2027 we don't know at compile even the direction, so can't 2028 warn. */ 2029 if (TREE_CODE (step) != INTEGER_CST) 2030 break; 2031 forward = tree_int_cst_sgn (step) != -1; 2032 } 2033 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2034 warning_at (loc, 0, "%<depend(sink)%> clause waiting for " 2035 "lexically later iteration"); 2036 break; 2037 } 2038 deps = TREE_CHAIN (deps); 2039 } 2040 /* If all offsets corresponding to the collapsed loops are zero, 2041 this depend clause can be ignored. FIXME: but there is still a 2042 flush needed. We need to emit one __sync_synchronize () for it 2043 though (perhaps conditionally)? Solve this together with the 2044 conservative dependence folding optimization. 2045 if (i >= fd->collapse) 2046 return; */ 2047 2048 deps = OMP_CLAUSE_DECL (c); 2049 gsi_prev (&gsi2); 2050 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); 2051 edge e2 = split_block_after_labels (e1->dest); 2052 2053 gsi2 = gsi_after_labels (e1->dest); 2054 *gsi = gsi_last_bb (e1->src); 2055 for (i = 0; i < fd->ordered; i++) 2056 { 2057 tree itype = TREE_TYPE (fd->loops[i].v); 2058 tree step = NULL_TREE; 2059 tree orig_off = NULL_TREE; 2060 if (POINTER_TYPE_P (itype)) 2061 itype = sizetype; 2062 if (i) 2063 deps = TREE_CHAIN (deps); 2064 off = TREE_PURPOSE (deps); 2065 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2066 { 2067 step = TREE_OPERAND (off, 1); 2068 off = TREE_OPERAND (off, 0); 2069 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2070 && integer_onep (fd->loops[i].step) 2071 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); 2072 } 2073 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); 2074 if (step) 2075 { 2076 off = fold_convert_loc (loc, itype, off); 2077 orig_off = off; 2078 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2079 } 2080 2081 if (integer_zerop (off)) 2082 t = boolean_true_node; 2083 else 2084 { 2085 tree a; 2086 tree co = fold_convert_loc (loc, itype, off); 2087 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 2088 { 2089 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2090 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); 2091 a = fold_build2_loc (loc, POINTER_PLUS_EXPR, 2092 TREE_TYPE (fd->loops[i].v), fd->loops[i].v, 2093 co); 2094 } 2095 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2096 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2097 fd->loops[i].v, co); 2098 else 2099 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 2100 fd->loops[i].v, co); 2101 if (step) 2102 { 2103 tree t1, t2; 2104 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2105 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2106 fd->loops[i].n1); 2107 else 2108 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2109 fd->loops[i].n2); 2110 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2111 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2112 fd->loops[i].n2); 2113 else 2114 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2115 fd->loops[i].n1); 2116 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, 2117 step, build_int_cst (TREE_TYPE (step), 0)); 2118 if (TREE_CODE (step) != INTEGER_CST) 2119 { 2120 t1 = unshare_expr (t1); 2121 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, 2122 false, GSI_CONTINUE_LINKING); 2123 t2 = unshare_expr (t2); 2124 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, 2125 false, GSI_CONTINUE_LINKING); 2126 } 2127 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, 2128 t, t2, t1); 2129 } 2130 else if (fd->loops[i].cond_code == LT_EXPR) 2131 { 2132 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2133 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2134 fd->loops[i].n1); 2135 else 2136 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2137 fd->loops[i].n2); 2138 } 2139 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2140 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, 2141 fd->loops[i].n2); 2142 else 2143 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, 2144 fd->loops[i].n1); 2145 } 2146 if (cond) 2147 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); 2148 else 2149 cond = t; 2150 2151 off = fold_convert_loc (loc, itype, off); 2152 2153 if (step 2154 || (fd->loops[i].cond_code == LT_EXPR 2155 ? !integer_onep (fd->loops[i].step) 2156 : !integer_minus_onep (fd->loops[i].step))) 2157 { 2158 if (step == NULL_TREE 2159 && TYPE_UNSIGNED (itype) 2160 && fd->loops[i].cond_code == GT_EXPR) 2161 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, 2162 fold_build1_loc (loc, NEGATE_EXPR, itype, 2163 s)); 2164 else 2165 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, 2166 orig_off ? orig_off : off, s); 2167 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, 2168 build_int_cst (itype, 0)); 2169 if (integer_zerop (t) && !warned_step) 2170 { 2171 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never " 2172 "in the iteration space"); 2173 warned_step = true; 2174 } 2175 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, 2176 cond, t); 2177 } 2178 2179 if (i <= fd->collapse - 1 && fd->collapse > 1) 2180 t = fd->loop.v; 2181 else if (counts[i]) 2182 t = counts[i]; 2183 else 2184 { 2185 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2186 fd->loops[i].v, fd->loops[i].n1); 2187 t = fold_convert_loc (loc, fd->iter_type, t); 2188 } 2189 if (step) 2190 /* We have divided off by step already earlier. */; 2191 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 2192 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, 2193 fold_build1_loc (loc, NEGATE_EXPR, itype, 2194 s)); 2195 else 2196 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2197 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2198 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); 2199 off = fold_convert_loc (loc, fd->iter_type, off); 2200 if (i <= fd->collapse - 1 && fd->collapse > 1) 2201 { 2202 if (i) 2203 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, 2204 off); 2205 if (i < fd->collapse - 1) 2206 { 2207 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, 2208 counts[i]); 2209 continue; 2210 } 2211 } 2212 off = unshare_expr (off); 2213 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); 2214 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2215 true, GSI_SAME_STMT); 2216 args.safe_push (t); 2217 } 2218 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); 2219 gimple_set_location (g, loc); 2220 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 2221 2222 cond = unshare_expr (cond); 2223 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, 2224 GSI_CONTINUE_LINKING); 2225 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); 2226 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); 2227 e3->probability = REG_BR_PROB_BASE / 8; 2228 e1->probability = REG_BR_PROB_BASE - e3->probability; 2229 e1->flags = EDGE_TRUE_VALUE; 2230 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); 2231 2232 *gsi = gsi_after_labels (e2->dest); 2233} 2234 2235/* Expand all #pragma omp ordered depend(source) and 2236 #pragma omp ordered depend(sink:...) constructs in the current 2237 #pragma omp for ordered(n) region. */ 2238 2239static void 2240expand_omp_ordered_source_sink (struct omp_region *region, 2241 struct omp_for_data *fd, tree *counts, 2242 basic_block cont_bb) 2243{ 2244 struct omp_region *inner; 2245 int i; 2246 for (i = fd->collapse - 1; i < fd->ordered; i++) 2247 if (i == fd->collapse - 1 && fd->collapse > 1) 2248 counts[i] = NULL_TREE; 2249 else if (i >= fd->collapse && !cont_bb) 2250 counts[i] = build_zero_cst (fd->iter_type); 2251 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) 2252 && integer_onep (fd->loops[i].step)) 2253 counts[i] = NULL_TREE; 2254 else 2255 counts[i] = create_tmp_var (fd->iter_type, ".orditer"); 2256 tree atype 2257 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); 2258 counts[fd->ordered] = create_tmp_var (atype, ".orditera"); 2259 TREE_ADDRESSABLE (counts[fd->ordered]) = 1; 2260 2261 for (inner = region->inner; inner; inner = inner->next) 2262 if (inner->type == GIMPLE_OMP_ORDERED) 2263 { 2264 gomp_ordered *ord_stmt = inner->ord_stmt; 2265 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); 2266 location_t loc = gimple_location (ord_stmt); 2267 tree c; 2268 for (c = gimple_omp_ordered_clauses (ord_stmt); 2269 c; c = OMP_CLAUSE_CHAIN (c)) 2270 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) 2271 break; 2272 if (c) 2273 expand_omp_ordered_source (&gsi, fd, counts, loc); 2274 for (c = gimple_omp_ordered_clauses (ord_stmt); 2275 c; c = OMP_CLAUSE_CHAIN (c)) 2276 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) 2277 expand_omp_ordered_sink (&gsi, fd, counts, c, loc); 2278 gsi_remove (&gsi, true); 2279 } 2280} 2281 2282/* Wrap the body into fd->ordered - fd->collapse loops that aren't 2283 collapsed. */ 2284 2285static basic_block 2286expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, 2287 basic_block cont_bb, basic_block body_bb, 2288 bool ordered_lastprivate) 2289{ 2290 if (fd->ordered == fd->collapse) 2291 return cont_bb; 2292 2293 if (!cont_bb) 2294 { 2295 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2296 for (int i = fd->collapse; i < fd->ordered; i++) 2297 { 2298 tree type = TREE_TYPE (fd->loops[i].v); 2299 tree n1 = fold_convert (type, fd->loops[i].n1); 2300 expand_omp_build_assign (&gsi, fd->loops[i].v, n1); 2301 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2302 size_int (i - fd->collapse + 1), 2303 NULL_TREE, NULL_TREE); 2304 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2305 } 2306 return NULL; 2307 } 2308 2309 for (int i = fd->ordered - 1; i >= fd->collapse; i--) 2310 { 2311 tree t, type = TREE_TYPE (fd->loops[i].v); 2312 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2313 expand_omp_build_assign (&gsi, fd->loops[i].v, 2314 fold_convert (type, fd->loops[i].n1)); 2315 if (counts[i]) 2316 expand_omp_build_assign (&gsi, counts[i], 2317 build_zero_cst (fd->iter_type)); 2318 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2319 size_int (i - fd->collapse + 1), 2320 NULL_TREE, NULL_TREE); 2321 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2322 if (!gsi_end_p (gsi)) 2323 gsi_prev (&gsi); 2324 else 2325 gsi = gsi_last_bb (body_bb); 2326 edge e1 = split_block (body_bb, gsi_stmt (gsi)); 2327 basic_block new_body = e1->dest; 2328 if (body_bb == cont_bb) 2329 cont_bb = new_body; 2330 edge e2 = NULL; 2331 basic_block new_header; 2332 if (EDGE_COUNT (cont_bb->preds) > 0) 2333 { 2334 gsi = gsi_last_bb (cont_bb); 2335 if (POINTER_TYPE_P (type)) 2336 t = fold_build_pointer_plus (fd->loops[i].v, 2337 fold_convert (sizetype, 2338 fd->loops[i].step)); 2339 else 2340 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, 2341 fold_convert (type, fd->loops[i].step)); 2342 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 2343 if (counts[i]) 2344 { 2345 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], 2346 build_int_cst (fd->iter_type, 1)); 2347 expand_omp_build_assign (&gsi, counts[i], t); 2348 t = counts[i]; 2349 } 2350 else 2351 { 2352 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2353 fd->loops[i].v, fd->loops[i].n1); 2354 t = fold_convert (fd->iter_type, t); 2355 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2356 true, GSI_SAME_STMT); 2357 } 2358 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2359 size_int (i - fd->collapse + 1), 2360 NULL_TREE, NULL_TREE); 2361 expand_omp_build_assign (&gsi, aref, t); 2362 gsi_prev (&gsi); 2363 e2 = split_block (cont_bb, gsi_stmt (gsi)); 2364 new_header = e2->dest; 2365 } 2366 else 2367 new_header = cont_bb; 2368 gsi = gsi_after_labels (new_header); 2369 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, 2370 true, GSI_SAME_STMT); 2371 tree n2 2372 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), 2373 true, NULL_TREE, true, GSI_SAME_STMT); 2374 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); 2375 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); 2376 edge e3 = split_block (new_header, gsi_stmt (gsi)); 2377 cont_bb = e3->dest; 2378 remove_edge (e1); 2379 make_edge (body_bb, new_header, EDGE_FALLTHRU); 2380 e3->flags = EDGE_FALSE_VALUE; 2381 e3->probability = REG_BR_PROB_BASE / 8; 2382 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); 2383 e1->probability = REG_BR_PROB_BASE - e3->probability; 2384 2385 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); 2386 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); 2387 2388 if (e2) 2389 { 2390 struct loop *loop = alloc_loop (); 2391 loop->header = new_header; 2392 loop->latch = e2->src; 2393 add_loop (loop, body_bb->loop_father); 2394 } 2395 } 2396 2397 /* If there are any lastprivate clauses and it is possible some loops 2398 might have zero iterations, ensure all the decls are initialized, 2399 otherwise we could crash evaluating C++ class iterators with lastprivate 2400 clauses. */ 2401 bool need_inits = false; 2402 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) 2403 if (need_inits) 2404 { 2405 tree type = TREE_TYPE (fd->loops[i].v); 2406 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2407 expand_omp_build_assign (&gsi, fd->loops[i].v, 2408 fold_convert (type, fd->loops[i].n1)); 2409 } 2410 else 2411 { 2412 tree type = TREE_TYPE (fd->loops[i].v); 2413 tree this_cond = fold_build2 (fd->loops[i].cond_code, 2414 boolean_type_node, 2415 fold_convert (type, fd->loops[i].n1), 2416 fold_convert (type, fd->loops[i].n2)); 2417 if (!integer_onep (this_cond)) 2418 need_inits = true; 2419 } 2420 2421 return cont_bb; 2422} 2423 2424/* A subroutine of expand_omp_for. Generate code for a parallel 2425 loop with any schedule. Given parameters: 2426 2427 for (V = N1; V cond N2; V += STEP) BODY; 2428 2429 where COND is "<" or ">", we generate pseudocode 2430 2431 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); 2432 if (more) goto L0; else goto L3; 2433 L0: 2434 V = istart0; 2435 iend = iend0; 2436 L1: 2437 BODY; 2438 V += STEP; 2439 if (V cond iend) goto L1; else goto L2; 2440 L2: 2441 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2442 L3: 2443 2444 If this is a combined omp parallel loop, instead of the call to 2445 GOMP_loop_foo_start, we call GOMP_loop_foo_next. 2446 If this is gimple_omp_for_combined_p loop, then instead of assigning 2447 V and iend in L0 we assign the first two _looptemp_ clause decls of the 2448 inner GIMPLE_OMP_FOR and V += STEP; and 2449 if (V cond iend) goto L1; else goto L2; are removed. 2450 2451 For collapsed loops, given parameters: 2452 collapse(3) 2453 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 2454 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 2455 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 2456 BODY; 2457 2458 we generate pseudocode 2459 2460 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; 2461 if (cond3 is <) 2462 adj = STEP3 - 1; 2463 else 2464 adj = STEP3 + 1; 2465 count3 = (adj + N32 - N31) / STEP3; 2466 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; 2467 if (cond2 is <) 2468 adj = STEP2 - 1; 2469 else 2470 adj = STEP2 + 1; 2471 count2 = (adj + N22 - N21) / STEP2; 2472 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; 2473 if (cond1 is <) 2474 adj = STEP1 - 1; 2475 else 2476 adj = STEP1 + 1; 2477 count1 = (adj + N12 - N11) / STEP1; 2478 count = count1 * count2 * count3; 2479 goto Z1; 2480 Z0: 2481 count = 0; 2482 Z1: 2483 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); 2484 if (more) goto L0; else goto L3; 2485 L0: 2486 V = istart0; 2487 T = V; 2488 V3 = N31 + (T % count3) * STEP3; 2489 T = T / count3; 2490 V2 = N21 + (T % count2) * STEP2; 2491 T = T / count2; 2492 V1 = N11 + T * STEP1; 2493 iend = iend0; 2494 L1: 2495 BODY; 2496 V += 1; 2497 if (V < iend) goto L10; else goto L2; 2498 L10: 2499 V3 += STEP3; 2500 if (V3 cond3 N32) goto L1; else goto L11; 2501 L11: 2502 V3 = N31; 2503 V2 += STEP2; 2504 if (V2 cond2 N22) goto L1; else goto L12; 2505 L12: 2506 V2 = N21; 2507 V1 += STEP1; 2508 goto L1; 2509 L2: 2510 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2511 L3: 2512 2513 */ 2514 2515static void 2516expand_omp_for_generic (struct omp_region *region, 2517 struct omp_for_data *fd, 2518 enum built_in_function start_fn, 2519 enum built_in_function next_fn, 2520 gimple *inner_stmt) 2521{ 2522 tree type, istart0, iend0, iend; 2523 tree t, vmain, vback, bias = NULL_TREE; 2524 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; 2525 basic_block l2_bb = NULL, l3_bb = NULL; 2526 gimple_stmt_iterator gsi; 2527 gassign *assign_stmt; 2528 bool in_combined_parallel = is_combined_parallel (region); 2529 bool broken_loop = region->cont == NULL; 2530 edge e, ne; 2531 tree *counts = NULL; 2532 int i; 2533 bool ordered_lastprivate = false; 2534 2535 gcc_assert (!broken_loop || !in_combined_parallel); 2536 gcc_assert (fd->iter_type == long_integer_type_node 2537 || !in_combined_parallel); 2538 2539 entry_bb = region->entry; 2540 cont_bb = region->cont; 2541 collapse_bb = NULL; 2542 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 2543 gcc_assert (broken_loop 2544 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 2545 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 2546 l1_bb = single_succ (l0_bb); 2547 if (!broken_loop) 2548 { 2549 l2_bb = create_empty_bb (cont_bb); 2550 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb 2551 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest 2552 == l1_bb)); 2553 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 2554 } 2555 else 2556 l2_bb = NULL; 2557 l3_bb = BRANCH_EDGE (entry_bb)->dest; 2558 exit_bb = region->exit; 2559 2560 gsi = gsi_last_bb (entry_bb); 2561 2562 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 2563 if (fd->ordered 2564 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), 2565 OMP_CLAUSE_LASTPRIVATE)) 2566 ordered_lastprivate = false; 2567 if (fd->collapse > 1 || fd->ordered) 2568 { 2569 int first_zero_iter1 = -1, first_zero_iter2 = -1; 2570 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; 2571 2572 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); 2573 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 2574 zero_iter1_bb, first_zero_iter1, 2575 zero_iter2_bb, first_zero_iter2, l2_dom_bb); 2576 2577 if (zero_iter1_bb) 2578 { 2579 /* Some counts[i] vars might be uninitialized if 2580 some loop has zero iterations. But the body shouldn't 2581 be executed in that case, so just avoid uninit warnings. */ 2582 for (i = first_zero_iter1; 2583 i < (fd->ordered ? fd->ordered : fd->collapse); i++) 2584 if (SSA_VAR_P (counts[i])) 2585 TREE_NO_WARNING (counts[i]) = 1; 2586 gsi_prev (&gsi); 2587 e = split_block (entry_bb, gsi_stmt (gsi)); 2588 entry_bb = e->dest; 2589 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); 2590 gsi = gsi_last_bb (entry_bb); 2591 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2592 get_immediate_dominator (CDI_DOMINATORS, 2593 zero_iter1_bb)); 2594 } 2595 if (zero_iter2_bb) 2596 { 2597 /* Some counts[i] vars might be uninitialized if 2598 some loop has zero iterations. But the body shouldn't 2599 be executed in that case, so just avoid uninit warnings. */ 2600 for (i = first_zero_iter2; i < fd->ordered; i++) 2601 if (SSA_VAR_P (counts[i])) 2602 TREE_NO_WARNING (counts[i]) = 1; 2603 if (zero_iter1_bb) 2604 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2605 else 2606 { 2607 gsi_prev (&gsi); 2608 e = split_block (entry_bb, gsi_stmt (gsi)); 2609 entry_bb = e->dest; 2610 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2611 gsi = gsi_last_bb (entry_bb); 2612 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2613 get_immediate_dominator 2614 (CDI_DOMINATORS, zero_iter2_bb)); 2615 } 2616 } 2617 if (fd->collapse == 1) 2618 { 2619 counts[0] = fd->loop.n2; 2620 fd->loop = fd->loops[0]; 2621 } 2622 } 2623 2624 type = TREE_TYPE (fd->loop.v); 2625 istart0 = create_tmp_var (fd->iter_type, ".istart0"); 2626 iend0 = create_tmp_var (fd->iter_type, ".iend0"); 2627 TREE_ADDRESSABLE (istart0) = 1; 2628 TREE_ADDRESSABLE (iend0) = 1; 2629 2630 /* See if we need to bias by LLONG_MIN. */ 2631 if (fd->iter_type == long_long_unsigned_type_node 2632 && TREE_CODE (type) == INTEGER_TYPE 2633 && !TYPE_UNSIGNED (type) 2634 && fd->ordered == 0) 2635 { 2636 tree n1, n2; 2637 2638 if (fd->loop.cond_code == LT_EXPR) 2639 { 2640 n1 = fd->loop.n1; 2641 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 2642 } 2643 else 2644 { 2645 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 2646 n2 = fd->loop.n1; 2647 } 2648 if (TREE_CODE (n1) != INTEGER_CST 2649 || TREE_CODE (n2) != INTEGER_CST 2650 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 2651 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 2652 } 2653 2654 gimple_stmt_iterator gsif = gsi; 2655 gsi_prev (&gsif); 2656 2657 tree arr = NULL_TREE; 2658 if (in_combined_parallel) 2659 { 2660 gcc_assert (fd->ordered == 0); 2661 /* In a combined parallel loop, emit a call to 2662 GOMP_loop_foo_next. */ 2663 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 2664 build_fold_addr_expr (istart0), 2665 build_fold_addr_expr (iend0)); 2666 } 2667 else 2668 { 2669 tree t0, t1, t2, t3, t4; 2670 /* If this is not a combined parallel loop, emit a call to 2671 GOMP_loop_foo_start in ENTRY_BB. */ 2672 t4 = build_fold_addr_expr (iend0); 2673 t3 = build_fold_addr_expr (istart0); 2674 if (fd->ordered) 2675 { 2676 t0 = build_int_cst (unsigned_type_node, 2677 fd->ordered - fd->collapse + 1); 2678 arr = create_tmp_var (build_array_type_nelts (fd->iter_type, 2679 fd->ordered 2680 - fd->collapse + 1), 2681 ".omp_counts"); 2682 DECL_NAMELESS (arr) = 1; 2683 TREE_ADDRESSABLE (arr) = 1; 2684 TREE_STATIC (arr) = 1; 2685 vec<constructor_elt, va_gc> *v; 2686 vec_alloc (v, fd->ordered - fd->collapse + 1); 2687 int idx; 2688 2689 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) 2690 { 2691 tree c; 2692 if (idx == 0 && fd->collapse > 1) 2693 c = fd->loop.n2; 2694 else 2695 c = counts[idx + fd->collapse - 1]; 2696 tree purpose = size_int (idx); 2697 CONSTRUCTOR_APPEND_ELT (v, purpose, c); 2698 if (TREE_CODE (c) != INTEGER_CST) 2699 TREE_STATIC (arr) = 0; 2700 } 2701 2702 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); 2703 if (!TREE_STATIC (arr)) 2704 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, 2705 void_type_node, arr), 2706 true, NULL_TREE, true, GSI_SAME_STMT); 2707 t1 = build_fold_addr_expr (arr); 2708 t2 = NULL_TREE; 2709 } 2710 else 2711 { 2712 t2 = fold_convert (fd->iter_type, fd->loop.step); 2713 t1 = fd->loop.n2; 2714 t0 = fd->loop.n1; 2715 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 2716 { 2717 tree innerc 2718 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2719 OMP_CLAUSE__LOOPTEMP_); 2720 gcc_assert (innerc); 2721 t0 = OMP_CLAUSE_DECL (innerc); 2722 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2723 OMP_CLAUSE__LOOPTEMP_); 2724 gcc_assert (innerc); 2725 t1 = OMP_CLAUSE_DECL (innerc); 2726 } 2727 if (POINTER_TYPE_P (TREE_TYPE (t0)) 2728 && TYPE_PRECISION (TREE_TYPE (t0)) 2729 != TYPE_PRECISION (fd->iter_type)) 2730 { 2731 /* Avoid casting pointers to integer of a different size. */ 2732 tree itype = signed_type_for (type); 2733 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 2734 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 2735 } 2736 else 2737 { 2738 t1 = fold_convert (fd->iter_type, t1); 2739 t0 = fold_convert (fd->iter_type, t0); 2740 } 2741 if (bias) 2742 { 2743 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 2744 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 2745 } 2746 } 2747 if (fd->iter_type == long_integer_type_node || fd->ordered) 2748 { 2749 if (fd->chunk_size) 2750 { 2751 t = fold_convert (fd->iter_type, fd->chunk_size); 2752 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2753 if (fd->ordered) 2754 t = build_call_expr (builtin_decl_explicit (start_fn), 2755 5, t0, t1, t, t3, t4); 2756 else 2757 t = build_call_expr (builtin_decl_explicit (start_fn), 2758 6, t0, t1, t2, t, t3, t4); 2759 } 2760 else if (fd->ordered) 2761 t = build_call_expr (builtin_decl_explicit (start_fn), 2762 4, t0, t1, t3, t4); 2763 else 2764 t = build_call_expr (builtin_decl_explicit (start_fn), 2765 5, t0, t1, t2, t3, t4); 2766 } 2767 else 2768 { 2769 tree t5; 2770 tree c_bool_type; 2771 tree bfn_decl; 2772 2773 /* The GOMP_loop_ull_*start functions have additional boolean 2774 argument, true for < loops and false for > loops. 2775 In Fortran, the C bool type can be different from 2776 boolean_type_node. */ 2777 bfn_decl = builtin_decl_explicit (start_fn); 2778 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); 2779 t5 = build_int_cst (c_bool_type, 2780 fd->loop.cond_code == LT_EXPR ? 1 : 0); 2781 if (fd->chunk_size) 2782 { 2783 tree bfn_decl = builtin_decl_explicit (start_fn); 2784 t = fold_convert (fd->iter_type, fd->chunk_size); 2785 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2786 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); 2787 } 2788 else 2789 t = build_call_expr (builtin_decl_explicit (start_fn), 2790 6, t5, t0, t1, t2, t3, t4); 2791 } 2792 } 2793 if (TREE_TYPE (t) != boolean_type_node) 2794 t = fold_build2 (NE_EXPR, boolean_type_node, 2795 t, build_int_cst (TREE_TYPE (t), 0)); 2796 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2797 true, GSI_SAME_STMT); 2798 if (arr && !TREE_STATIC (arr)) 2799 { 2800 tree clobber = build_constructor (TREE_TYPE (arr), NULL); 2801 TREE_THIS_VOLATILE (clobber) = 1; 2802 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), 2803 GSI_SAME_STMT); 2804 } 2805 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 2806 2807 /* Remove the GIMPLE_OMP_FOR statement. */ 2808 gsi_remove (&gsi, true); 2809 2810 if (gsi_end_p (gsif)) 2811 gsif = gsi_after_labels (gsi_bb (gsif)); 2812 gsi_next (&gsif); 2813 2814 /* Iteration setup for sequential loop goes in L0_BB. */ 2815 tree startvar = fd->loop.v; 2816 tree endvar = NULL_TREE; 2817 2818 if (gimple_omp_for_combined_p (fd->for_stmt)) 2819 { 2820 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR 2821 && gimple_omp_for_kind (inner_stmt) 2822 == GF_OMP_FOR_KIND_SIMD); 2823 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt), 2824 OMP_CLAUSE__LOOPTEMP_); 2825 gcc_assert (innerc); 2826 startvar = OMP_CLAUSE_DECL (innerc); 2827 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2828 OMP_CLAUSE__LOOPTEMP_); 2829 gcc_assert (innerc); 2830 endvar = OMP_CLAUSE_DECL (innerc); 2831 } 2832 2833 gsi = gsi_start_bb (l0_bb); 2834 t = istart0; 2835 if (fd->ordered && fd->collapse == 1) 2836 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 2837 fold_convert (fd->iter_type, fd->loop.step)); 2838 else if (bias) 2839 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 2840 if (fd->ordered && fd->collapse == 1) 2841 { 2842 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2843 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 2844 fd->loop.n1, fold_convert (sizetype, t)); 2845 else 2846 { 2847 t = fold_convert (TREE_TYPE (startvar), t); 2848 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 2849 fd->loop.n1, t); 2850 } 2851 } 2852 else 2853 { 2854 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2855 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 2856 t = fold_convert (TREE_TYPE (startvar), t); 2857 } 2858 t = force_gimple_operand_gsi (&gsi, t, 2859 DECL_P (startvar) 2860 && TREE_ADDRESSABLE (startvar), 2861 NULL_TREE, false, GSI_CONTINUE_LINKING); 2862 assign_stmt = gimple_build_assign (startvar, t); 2863 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2864 2865 t = iend0; 2866 if (fd->ordered && fd->collapse == 1) 2867 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 2868 fold_convert (fd->iter_type, fd->loop.step)); 2869 else if (bias) 2870 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 2871 if (fd->ordered && fd->collapse == 1) 2872 { 2873 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2874 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 2875 fd->loop.n1, fold_convert (sizetype, t)); 2876 else 2877 { 2878 t = fold_convert (TREE_TYPE (startvar), t); 2879 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 2880 fd->loop.n1, t); 2881 } 2882 } 2883 else 2884 { 2885 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2886 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 2887 t = fold_convert (TREE_TYPE (startvar), t); 2888 } 2889 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2890 false, GSI_CONTINUE_LINKING); 2891 if (endvar) 2892 { 2893 assign_stmt = gimple_build_assign (endvar, iend); 2894 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2895 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) 2896 assign_stmt = gimple_build_assign (fd->loop.v, iend); 2897 else 2898 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); 2899 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2900 } 2901 /* Handle linear clause adjustments. */ 2902 tree itercnt = NULL_TREE; 2903 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 2904 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 2905 c; c = OMP_CLAUSE_CHAIN (c)) 2906 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 2907 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 2908 { 2909 tree d = OMP_CLAUSE_DECL (c); 2910 bool is_ref = omp_is_reference (d); 2911 tree t = d, a, dest; 2912 if (is_ref) 2913 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 2914 tree type = TREE_TYPE (t); 2915 if (POINTER_TYPE_P (type)) 2916 type = sizetype; 2917 dest = unshare_expr (t); 2918 tree v = create_tmp_var (TREE_TYPE (t), NULL); 2919 expand_omp_build_assign (&gsif, v, t); 2920 if (itercnt == NULL_TREE) 2921 { 2922 itercnt = startvar; 2923 tree n1 = fd->loop.n1; 2924 if (POINTER_TYPE_P (TREE_TYPE (itercnt))) 2925 { 2926 itercnt 2927 = fold_convert (signed_type_for (TREE_TYPE (itercnt)), 2928 itercnt); 2929 n1 = fold_convert (TREE_TYPE (itercnt), n1); 2930 } 2931 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), 2932 itercnt, n1); 2933 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), 2934 itercnt, fd->loop.step); 2935 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 2936 NULL_TREE, false, 2937 GSI_CONTINUE_LINKING); 2938 } 2939 a = fold_build2 (MULT_EXPR, type, 2940 fold_convert (type, itercnt), 2941 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 2942 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 2943 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 2944 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2945 false, GSI_CONTINUE_LINKING); 2946 assign_stmt = gimple_build_assign (dest, t); 2947 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2948 } 2949 if (fd->collapse > 1) 2950 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 2951 2952 if (fd->ordered) 2953 { 2954 /* Until now, counts array contained number of iterations or 2955 variable containing it for ith loop. From now on, we need 2956 those counts only for collapsed loops, and only for the 2nd 2957 till the last collapsed one. Move those one element earlier, 2958 we'll use counts[fd->collapse - 1] for the first source/sink 2959 iteration counter and so on and counts[fd->ordered] 2960 as the array holding the current counter values for 2961 depend(source). */ 2962 if (fd->collapse > 1) 2963 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); 2964 if (broken_loop) 2965 { 2966 int i; 2967 for (i = fd->collapse; i < fd->ordered; i++) 2968 { 2969 tree type = TREE_TYPE (fd->loops[i].v); 2970 tree this_cond 2971 = fold_build2 (fd->loops[i].cond_code, boolean_type_node, 2972 fold_convert (type, fd->loops[i].n1), 2973 fold_convert (type, fd->loops[i].n2)); 2974 if (!integer_onep (this_cond)) 2975 break; 2976 } 2977 if (i < fd->ordered) 2978 { 2979 cont_bb 2980 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); 2981 add_bb_to_loop (cont_bb, l1_bb->loop_father); 2982 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); 2983 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); 2984 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 2985 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); 2986 make_edge (cont_bb, l1_bb, 0); 2987 l2_bb = create_empty_bb (cont_bb); 2988 broken_loop = false; 2989 } 2990 } 2991 expand_omp_ordered_source_sink (region, fd, counts, cont_bb); 2992 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, 2993 ordered_lastprivate); 2994 if (counts[fd->collapse - 1]) 2995 { 2996 gcc_assert (fd->collapse == 1); 2997 gsi = gsi_last_bb (l0_bb); 2998 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], 2999 istart0, true); 3000 gsi = gsi_last_bb (cont_bb); 3001 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1], 3002 build_int_cst (fd->iter_type, 1)); 3003 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); 3004 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3005 size_zero_node, NULL_TREE, NULL_TREE); 3006 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); 3007 t = counts[fd->collapse - 1]; 3008 } 3009 else if (fd->collapse > 1) 3010 t = fd->loop.v; 3011 else 3012 { 3013 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3014 fd->loops[0].v, fd->loops[0].n1); 3015 t = fold_convert (fd->iter_type, t); 3016 } 3017 gsi = gsi_last_bb (l0_bb); 3018 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3019 size_zero_node, NULL_TREE, NULL_TREE); 3020 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3021 false, GSI_CONTINUE_LINKING); 3022 expand_omp_build_assign (&gsi, aref, t, true); 3023 } 3024 3025 if (!broken_loop) 3026 { 3027 /* Code to control the increment and predicate for the sequential 3028 loop goes in the CONT_BB. */ 3029 gsi = gsi_last_bb (cont_bb); 3030 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 3031 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 3032 vmain = gimple_omp_continue_control_use (cont_stmt); 3033 vback = gimple_omp_continue_control_def (cont_stmt); 3034 3035 if (!gimple_omp_for_combined_p (fd->for_stmt)) 3036 { 3037 if (POINTER_TYPE_P (type)) 3038 t = fold_build_pointer_plus (vmain, fd->loop.step); 3039 else 3040 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); 3041 t = force_gimple_operand_gsi (&gsi, t, 3042 DECL_P (vback) 3043 && TREE_ADDRESSABLE (vback), 3044 NULL_TREE, true, GSI_SAME_STMT); 3045 assign_stmt = gimple_build_assign (vback, t); 3046 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3047 3048 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) 3049 { 3050 tree tem; 3051 if (fd->collapse > 1) 3052 tem = fd->loop.v; 3053 else 3054 { 3055 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3056 fd->loops[0].v, fd->loops[0].n1); 3057 tem = fold_convert (fd->iter_type, tem); 3058 } 3059 tree aref = build4 (ARRAY_REF, fd->iter_type, 3060 counts[fd->ordered], size_zero_node, 3061 NULL_TREE, NULL_TREE); 3062 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, 3063 true, GSI_SAME_STMT); 3064 expand_omp_build_assign (&gsi, aref, tem); 3065 } 3066 3067 t = build2 (fd->loop.cond_code, boolean_type_node, 3068 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, 3069 iend); 3070 gcond *cond_stmt = gimple_build_cond_empty (t); 3071 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3072 } 3073 3074 /* Remove GIMPLE_OMP_CONTINUE. */ 3075 gsi_remove (&gsi, true); 3076 3077 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 3078 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); 3079 3080 /* Emit code to get the next parallel iteration in L2_BB. */ 3081 gsi = gsi_start_bb (l2_bb); 3082 3083 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 3084 build_fold_addr_expr (istart0), 3085 build_fold_addr_expr (iend0)); 3086 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3087 false, GSI_CONTINUE_LINKING); 3088 if (TREE_TYPE (t) != boolean_type_node) 3089 t = fold_build2 (NE_EXPR, boolean_type_node, 3090 t, build_int_cst (TREE_TYPE (t), 0)); 3091 gcond *cond_stmt = gimple_build_cond_empty (t); 3092 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 3093 } 3094 3095 /* Add the loop cleanup function. */ 3096 gsi = gsi_last_bb (exit_bb); 3097 if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) 3098 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 3099 else if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3100 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 3101 else 3102 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 3103 gcall *call_stmt = gimple_build_call (t, 0); 3104 if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3105 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); 3106 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); 3107 if (fd->ordered) 3108 { 3109 tree arr = counts[fd->ordered]; 3110 tree clobber = build_constructor (TREE_TYPE (arr), NULL); 3111 TREE_THIS_VOLATILE (clobber) = 1; 3112 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), 3113 GSI_SAME_STMT); 3114 } 3115 gsi_remove (&gsi, true); 3116 3117 /* Connect the new blocks. */ 3118 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; 3119 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; 3120 3121 if (!broken_loop) 3122 { 3123 gimple_seq phis; 3124 3125 e = find_edge (cont_bb, l3_bb); 3126 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); 3127 3128 phis = phi_nodes (l3_bb); 3129 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) 3130 { 3131 gimple *phi = gsi_stmt (gsi); 3132 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), 3133 PHI_ARG_DEF_FROM_EDGE (phi, e)); 3134 } 3135 remove_edge (e); 3136 3137 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); 3138 e = find_edge (cont_bb, l1_bb); 3139 if (e == NULL) 3140 { 3141 e = BRANCH_EDGE (cont_bb); 3142 gcc_assert (single_succ (e->dest) == l1_bb); 3143 } 3144 if (gimple_omp_for_combined_p (fd->for_stmt)) 3145 { 3146 remove_edge (e); 3147 e = NULL; 3148 } 3149 else if (fd->collapse > 1) 3150 { 3151 remove_edge (e); 3152 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 3153 } 3154 else 3155 e->flags = EDGE_TRUE_VALUE; 3156 if (e) 3157 { 3158 e->probability = REG_BR_PROB_BASE * 7 / 8; 3159 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8; 3160 } 3161 else 3162 { 3163 e = find_edge (cont_bb, l2_bb); 3164 e->flags = EDGE_FALLTHRU; 3165 } 3166 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); 3167 3168 if (gimple_in_ssa_p (cfun)) 3169 { 3170 /* Add phis to the outer loop that connect to the phis in the inner, 3171 original loop, and move the loop entry value of the inner phi to 3172 the loop entry value of the outer phi. */ 3173 gphi_iterator psi; 3174 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) 3175 { 3176 source_location locus; 3177 gphi *nphi; 3178 gphi *exit_phi = psi.phi (); 3179 3180 edge l2_to_l3 = find_edge (l2_bb, l3_bb); 3181 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); 3182 3183 basic_block latch = BRANCH_EDGE (cont_bb)->dest; 3184 edge latch_to_l1 = find_edge (latch, l1_bb); 3185 gphi *inner_phi 3186 = find_phi_with_arg_on_edge (exit_res, latch_to_l1); 3187 3188 tree t = gimple_phi_result (exit_phi); 3189 tree new_res = copy_ssa_name (t, NULL); 3190 nphi = create_phi_node (new_res, l0_bb); 3191 3192 edge l0_to_l1 = find_edge (l0_bb, l1_bb); 3193 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); 3194 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); 3195 edge entry_to_l0 = find_edge (entry_bb, l0_bb); 3196 add_phi_arg (nphi, t, entry_to_l0, locus); 3197 3198 edge l2_to_l0 = find_edge (l2_bb, l0_bb); 3199 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); 3200 3201 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); 3202 }; 3203 } 3204 3205 set_immediate_dominator (CDI_DOMINATORS, l2_bb, 3206 recompute_dominator (CDI_DOMINATORS, l2_bb)); 3207 set_immediate_dominator (CDI_DOMINATORS, l3_bb, 3208 recompute_dominator (CDI_DOMINATORS, l3_bb)); 3209 set_immediate_dominator (CDI_DOMINATORS, l0_bb, 3210 recompute_dominator (CDI_DOMINATORS, l0_bb)); 3211 set_immediate_dominator (CDI_DOMINATORS, l1_bb, 3212 recompute_dominator (CDI_DOMINATORS, l1_bb)); 3213 3214 /* We enter expand_omp_for_generic with a loop. This original loop may 3215 have its own loop struct, or it may be part of an outer loop struct 3216 (which may be the fake loop). */ 3217 struct loop *outer_loop = entry_bb->loop_father; 3218 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; 3219 3220 add_bb_to_loop (l2_bb, outer_loop); 3221 3222 /* We've added a new loop around the original loop. Allocate the 3223 corresponding loop struct. */ 3224 struct loop *new_loop = alloc_loop (); 3225 new_loop->header = l0_bb; 3226 new_loop->latch = l2_bb; 3227 add_loop (new_loop, outer_loop); 3228 3229 /* Allocate a loop structure for the original loop unless we already 3230 had one. */ 3231 if (!orig_loop_has_loop_struct 3232 && !gimple_omp_for_combined_p (fd->for_stmt)) 3233 { 3234 struct loop *orig_loop = alloc_loop (); 3235 orig_loop->header = l1_bb; 3236 /* The loop may have multiple latches. */ 3237 add_loop (orig_loop, new_loop); 3238 } 3239 } 3240} 3241 3242/* A subroutine of expand_omp_for. Generate code for a parallel 3243 loop with static schedule and no specified chunk size. Given 3244 parameters: 3245 3246 for (V = N1; V cond N2; V += STEP) BODY; 3247 3248 where COND is "<" or ">", we generate pseudocode 3249 3250 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 3251 if (cond is <) 3252 adj = STEP - 1; 3253 else 3254 adj = STEP + 1; 3255 if ((__typeof (V)) -1 > 0 && cond is >) 3256 n = -(adj + N2 - N1) / -STEP; 3257 else 3258 n = (adj + N2 - N1) / STEP; 3259 q = n / nthreads; 3260 tt = n % nthreads; 3261 if (threadid < tt) goto L3; else goto L4; 3262 L3: 3263 tt = 0; 3264 q = q + 1; 3265 L4: 3266 s0 = q * threadid + tt; 3267 e0 = s0 + q; 3268 V = s0 * STEP + N1; 3269 if (s0 >= e0) goto L2; else goto L0; 3270 L0: 3271 e = e0 * STEP + N1; 3272 L1: 3273 BODY; 3274 V += STEP; 3275 if (V cond e) goto L1; 3276 L2: 3277*/ 3278 3279static void 3280expand_omp_for_static_nochunk (struct omp_region *region, 3281 struct omp_for_data *fd, 3282 gimple *inner_stmt) 3283{ 3284 tree n, q, s0, e0, e, t, tt, nthreads, threadid; 3285 tree type, itype, vmain, vback; 3286 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; 3287 basic_block body_bb, cont_bb, collapse_bb = NULL; 3288 basic_block fin_bb; 3289 gimple_stmt_iterator gsi; 3290 edge ep; 3291 bool broken_loop = region->cont == NULL; 3292 tree *counts = NULL; 3293 tree n1, n2, step; 3294 3295 itype = type = TREE_TYPE (fd->loop.v); 3296 if (POINTER_TYPE_P (type)) 3297 itype = signed_type_for (type); 3298 3299 entry_bb = region->entry; 3300 cont_bb = region->cont; 3301 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 3302 fin_bb = BRANCH_EDGE (entry_bb)->dest; 3303 gcc_assert (broken_loop 3304 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 3305 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 3306 body_bb = single_succ (seq_start_bb); 3307 if (!broken_loop) 3308 { 3309 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 3310 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 3311 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 3312 } 3313 exit_bb = region->exit; 3314 3315 /* Iteration space partitioning goes in ENTRY_BB. */ 3316 gsi = gsi_last_bb (entry_bb); 3317 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3318 3319 if (fd->collapse > 1) 3320 { 3321 int first_zero_iter = -1, dummy = -1; 3322 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 3323 3324 counts = XALLOCAVEC (tree, fd->collapse); 3325 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 3326 fin_bb, first_zero_iter, 3327 dummy_bb, dummy, l2_dom_bb); 3328 t = NULL_TREE; 3329 } 3330 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3331 t = integer_one_node; 3332 else 3333 t = fold_binary (fd->loop.cond_code, boolean_type_node, 3334 fold_convert (type, fd->loop.n1), 3335 fold_convert (type, fd->loop.n2)); 3336 if (fd->collapse == 1 3337 && TYPE_UNSIGNED (type) 3338 && (t == NULL_TREE || !integer_onep (t))) 3339 { 3340 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 3341 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 3342 true, GSI_SAME_STMT); 3343 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 3344 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 3345 true, GSI_SAME_STMT); 3346 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 3347 NULL_TREE, NULL_TREE); 3348 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3349 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 3350 expand_omp_regimplify_p, NULL, NULL) 3351 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 3352 expand_omp_regimplify_p, NULL, NULL)) 3353 { 3354 gsi = gsi_for_stmt (cond_stmt); 3355 gimple_regimplify_operands (cond_stmt, &gsi); 3356 } 3357 ep = split_block (entry_bb, cond_stmt); 3358 ep->flags = EDGE_TRUE_VALUE; 3359 entry_bb = ep->dest; 3360 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); 3361 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); 3362 ep->probability = REG_BR_PROB_BASE / 2000 - 1; 3363 if (gimple_in_ssa_p (cfun)) 3364 { 3365 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; 3366 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 3367 !gsi_end_p (gpi); gsi_next (&gpi)) 3368 { 3369 gphi *phi = gpi.phi (); 3370 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 3371 ep, UNKNOWN_LOCATION); 3372 } 3373 } 3374 gsi = gsi_last_bb (entry_bb); 3375 } 3376 3377 switch (gimple_omp_for_kind (fd->for_stmt)) 3378 { 3379 case GF_OMP_FOR_KIND_FOR: 3380 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3381 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 3382 break; 3383 case GF_OMP_FOR_KIND_DISTRIBUTE: 3384 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 3385 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 3386 break; 3387 default: 3388 gcc_unreachable (); 3389 } 3390 nthreads = build_call_expr (nthreads, 0); 3391 nthreads = fold_convert (itype, nthreads); 3392 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 3393 true, GSI_SAME_STMT); 3394 threadid = build_call_expr (threadid, 0); 3395 threadid = fold_convert (itype, threadid); 3396 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 3397 true, GSI_SAME_STMT); 3398 3399 n1 = fd->loop.n1; 3400 n2 = fd->loop.n2; 3401 step = fd->loop.step; 3402 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3403 { 3404 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3405 OMP_CLAUSE__LOOPTEMP_); 3406 gcc_assert (innerc); 3407 n1 = OMP_CLAUSE_DECL (innerc); 3408 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3409 OMP_CLAUSE__LOOPTEMP_); 3410 gcc_assert (innerc); 3411 n2 = OMP_CLAUSE_DECL (innerc); 3412 } 3413 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 3414 true, NULL_TREE, true, GSI_SAME_STMT); 3415 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 3416 true, NULL_TREE, true, GSI_SAME_STMT); 3417 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 3418 true, NULL_TREE, true, GSI_SAME_STMT); 3419 3420 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 3421 t = fold_build2 (PLUS_EXPR, itype, step, t); 3422 t = fold_build2 (PLUS_EXPR, itype, t, n2); 3423 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 3424 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 3425 t = fold_build2 (TRUNC_DIV_EXPR, itype, 3426 fold_build1 (NEGATE_EXPR, itype, t), 3427 fold_build1 (NEGATE_EXPR, itype, step)); 3428 else 3429 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 3430 t = fold_convert (itype, t); 3431 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3432 3433 q = create_tmp_reg (itype, "q"); 3434 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); 3435 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3436 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); 3437 3438 tt = create_tmp_reg (itype, "tt"); 3439 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); 3440 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3441 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); 3442 3443 t = build2 (LT_EXPR, boolean_type_node, threadid, tt); 3444 gcond *cond_stmt = gimple_build_cond_empty (t); 3445 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3446 3447 second_bb = split_block (entry_bb, cond_stmt)->dest; 3448 gsi = gsi_last_bb (second_bb); 3449 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3450 3451 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), 3452 GSI_SAME_STMT); 3453 gassign *assign_stmt 3454 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); 3455 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3456 3457 third_bb = split_block (second_bb, assign_stmt)->dest; 3458 gsi = gsi_last_bb (third_bb); 3459 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3460 3461 t = build2 (MULT_EXPR, itype, q, threadid); 3462 t = build2 (PLUS_EXPR, itype, t, tt); 3463 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3464 3465 t = fold_build2 (PLUS_EXPR, itype, s0, q); 3466 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3467 3468 t = build2 (GE_EXPR, boolean_type_node, s0, e0); 3469 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3470 3471 /* Remove the GIMPLE_OMP_FOR statement. */ 3472 gsi_remove (&gsi, true); 3473 3474 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 3475 gsi = gsi_start_bb (seq_start_bb); 3476 3477 tree startvar = fd->loop.v; 3478 tree endvar = NULL_TREE; 3479 3480 if (gimple_omp_for_combined_p (fd->for_stmt)) 3481 { 3482 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 3483 ? gimple_omp_parallel_clauses (inner_stmt) 3484 : gimple_omp_for_clauses (inner_stmt); 3485 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 3486 gcc_assert (innerc); 3487 startvar = OMP_CLAUSE_DECL (innerc); 3488 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3489 OMP_CLAUSE__LOOPTEMP_); 3490 gcc_assert (innerc); 3491 endvar = OMP_CLAUSE_DECL (innerc); 3492 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 3493 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 3494 { 3495 int i; 3496 for (i = 1; i < fd->collapse; i++) 3497 { 3498 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3499 OMP_CLAUSE__LOOPTEMP_); 3500 gcc_assert (innerc); 3501 } 3502 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3503 OMP_CLAUSE__LOOPTEMP_); 3504 if (innerc) 3505 { 3506 /* If needed (distribute parallel for with lastprivate), 3507 propagate down the total number of iterations. */ 3508 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 3509 fd->loop.n2); 3510 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 3511 GSI_CONTINUE_LINKING); 3512 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 3513 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3514 } 3515 } 3516 } 3517 t = fold_convert (itype, s0); 3518 t = fold_build2 (MULT_EXPR, itype, t, step); 3519 if (POINTER_TYPE_P (type)) 3520 t = fold_build_pointer_plus (n1, t); 3521 else 3522 t = fold_build2 (PLUS_EXPR, type, t, n1); 3523 t = fold_convert (TREE_TYPE (startvar), t); 3524 t = force_gimple_operand_gsi (&gsi, t, 3525 DECL_P (startvar) 3526 && TREE_ADDRESSABLE (startvar), 3527 NULL_TREE, false, GSI_CONTINUE_LINKING); 3528 assign_stmt = gimple_build_assign (startvar, t); 3529 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3530 3531 t = fold_convert (itype, e0); 3532 t = fold_build2 (MULT_EXPR, itype, t, step); 3533 if (POINTER_TYPE_P (type)) 3534 t = fold_build_pointer_plus (n1, t); 3535 else 3536 t = fold_build2 (PLUS_EXPR, type, t, n1); 3537 t = fold_convert (TREE_TYPE (startvar), t); 3538 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3539 false, GSI_CONTINUE_LINKING); 3540 if (endvar) 3541 { 3542 assign_stmt = gimple_build_assign (endvar, e); 3543 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3544 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 3545 assign_stmt = gimple_build_assign (fd->loop.v, e); 3546 else 3547 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 3548 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3549 } 3550 /* Handle linear clause adjustments. */ 3551 tree itercnt = NULL_TREE; 3552 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 3553 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 3554 c; c = OMP_CLAUSE_CHAIN (c)) 3555 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 3556 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 3557 { 3558 tree d = OMP_CLAUSE_DECL (c); 3559 bool is_ref = omp_is_reference (d); 3560 tree t = d, a, dest; 3561 if (is_ref) 3562 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 3563 if (itercnt == NULL_TREE) 3564 { 3565 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3566 { 3567 itercnt = fold_build2 (MINUS_EXPR, itype, 3568 fold_convert (itype, n1), 3569 fold_convert (itype, fd->loop.n1)); 3570 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); 3571 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); 3572 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 3573 NULL_TREE, false, 3574 GSI_CONTINUE_LINKING); 3575 } 3576 else 3577 itercnt = s0; 3578 } 3579 tree type = TREE_TYPE (t); 3580 if (POINTER_TYPE_P (type)) 3581 type = sizetype; 3582 a = fold_build2 (MULT_EXPR, type, 3583 fold_convert (type, itercnt), 3584 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 3585 dest = unshare_expr (t); 3586 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 3587 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); 3588 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3589 false, GSI_CONTINUE_LINKING); 3590 assign_stmt = gimple_build_assign (dest, t); 3591 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3592 } 3593 if (fd->collapse > 1) 3594 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 3595 3596 if (!broken_loop) 3597 { 3598 /* The code controlling the sequential loop replaces the 3599 GIMPLE_OMP_CONTINUE. */ 3600 gsi = gsi_last_bb (cont_bb); 3601 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 3602 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 3603 vmain = gimple_omp_continue_control_use (cont_stmt); 3604 vback = gimple_omp_continue_control_def (cont_stmt); 3605 3606 if (!gimple_omp_for_combined_p (fd->for_stmt)) 3607 { 3608 if (POINTER_TYPE_P (type)) 3609 t = fold_build_pointer_plus (vmain, step); 3610 else 3611 t = fold_build2 (PLUS_EXPR, type, vmain, step); 3612 t = force_gimple_operand_gsi (&gsi, t, 3613 DECL_P (vback) 3614 && TREE_ADDRESSABLE (vback), 3615 NULL_TREE, true, GSI_SAME_STMT); 3616 assign_stmt = gimple_build_assign (vback, t); 3617 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3618 3619 t = build2 (fd->loop.cond_code, boolean_type_node, 3620 DECL_P (vback) && TREE_ADDRESSABLE (vback) 3621 ? t : vback, e); 3622 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3623 } 3624 3625 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 3626 gsi_remove (&gsi, true); 3627 3628 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 3629 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 3630 } 3631 3632 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 3633 gsi = gsi_last_bb (exit_bb); 3634 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 3635 { 3636 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 3637 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 3638 } 3639 gsi_remove (&gsi, true); 3640 3641 /* Connect all the blocks. */ 3642 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); 3643 ep->probability = REG_BR_PROB_BASE / 4 * 3; 3644 ep = find_edge (entry_bb, second_bb); 3645 ep->flags = EDGE_TRUE_VALUE; 3646 ep->probability = REG_BR_PROB_BASE / 4; 3647 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; 3648 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; 3649 3650 if (!broken_loop) 3651 { 3652 ep = find_edge (cont_bb, body_bb); 3653 if (ep == NULL) 3654 { 3655 ep = BRANCH_EDGE (cont_bb); 3656 gcc_assert (single_succ (ep->dest) == body_bb); 3657 } 3658 if (gimple_omp_for_combined_p (fd->for_stmt)) 3659 { 3660 remove_edge (ep); 3661 ep = NULL; 3662 } 3663 else if (fd->collapse > 1) 3664 { 3665 remove_edge (ep); 3666 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 3667 } 3668 else 3669 ep->flags = EDGE_TRUE_VALUE; 3670 find_edge (cont_bb, fin_bb)->flags 3671 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 3672 } 3673 3674 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); 3675 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); 3676 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); 3677 3678 set_immediate_dominator (CDI_DOMINATORS, body_bb, 3679 recompute_dominator (CDI_DOMINATORS, body_bb)); 3680 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 3681 recompute_dominator (CDI_DOMINATORS, fin_bb)); 3682 3683 struct loop *loop = body_bb->loop_father; 3684 if (loop != entry_bb->loop_father) 3685 { 3686 gcc_assert (broken_loop || loop->header == body_bb); 3687 gcc_assert (broken_loop 3688 || loop->latch == region->cont 3689 || single_pred (loop->latch) == region->cont); 3690 return; 3691 } 3692 3693 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 3694 { 3695 loop = alloc_loop (); 3696 loop->header = body_bb; 3697 if (collapse_bb == NULL) 3698 loop->latch = cont_bb; 3699 add_loop (loop, body_bb->loop_father); 3700 } 3701} 3702 3703/* Return phi in E->DEST with ARG on edge E. */ 3704 3705static gphi * 3706find_phi_with_arg_on_edge (tree arg, edge e) 3707{ 3708 basic_block bb = e->dest; 3709 3710 for (gphi_iterator gpi = gsi_start_phis (bb); 3711 !gsi_end_p (gpi); 3712 gsi_next (&gpi)) 3713 { 3714 gphi *phi = gpi.phi (); 3715 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) 3716 return phi; 3717 } 3718 3719 return NULL; 3720} 3721 3722/* A subroutine of expand_omp_for. Generate code for a parallel 3723 loop with static schedule and a specified chunk size. Given 3724 parameters: 3725 3726 for (V = N1; V cond N2; V += STEP) BODY; 3727 3728 where COND is "<" or ">", we generate pseudocode 3729 3730 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 3731 if (cond is <) 3732 adj = STEP - 1; 3733 else 3734 adj = STEP + 1; 3735 if ((__typeof (V)) -1 > 0 && cond is >) 3736 n = -(adj + N2 - N1) / -STEP; 3737 else 3738 n = (adj + N2 - N1) / STEP; 3739 trip = 0; 3740 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is 3741 here so that V is defined 3742 if the loop is not entered 3743 L0: 3744 s0 = (trip * nthreads + threadid) * CHUNK; 3745 e0 = min (s0 + CHUNK, n); 3746 if (s0 < n) goto L1; else goto L4; 3747 L1: 3748 V = s0 * STEP + N1; 3749 e = e0 * STEP + N1; 3750 L2: 3751 BODY; 3752 V += STEP; 3753 if (V cond e) goto L2; else goto L3; 3754 L3: 3755 trip += 1; 3756 goto L0; 3757 L4: 3758*/ 3759 3760static void 3761expand_omp_for_static_chunk (struct omp_region *region, 3762 struct omp_for_data *fd, gimple *inner_stmt) 3763{ 3764 tree n, s0, e0, e, t; 3765 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; 3766 tree type, itype, vmain, vback, vextra; 3767 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; 3768 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; 3769 gimple_stmt_iterator gsi; 3770 edge se; 3771 bool broken_loop = region->cont == NULL; 3772 tree *counts = NULL; 3773 tree n1, n2, step; 3774 3775 itype = type = TREE_TYPE (fd->loop.v); 3776 if (POINTER_TYPE_P (type)) 3777 itype = signed_type_for (type); 3778 3779 entry_bb = region->entry; 3780 se = split_block (entry_bb, last_stmt (entry_bb)); 3781 entry_bb = se->src; 3782 iter_part_bb = se->dest; 3783 cont_bb = region->cont; 3784 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); 3785 fin_bb = BRANCH_EDGE (iter_part_bb)->dest; 3786 gcc_assert (broken_loop 3787 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); 3788 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); 3789 body_bb = single_succ (seq_start_bb); 3790 if (!broken_loop) 3791 { 3792 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 3793 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 3794 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 3795 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); 3796 } 3797 exit_bb = region->exit; 3798 3799 /* Trip and adjustment setup goes in ENTRY_BB. */ 3800 gsi = gsi_last_bb (entry_bb); 3801 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3802 3803 if (fd->collapse > 1) 3804 { 3805 int first_zero_iter = -1, dummy = -1; 3806 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 3807 3808 counts = XALLOCAVEC (tree, fd->collapse); 3809 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 3810 fin_bb, first_zero_iter, 3811 dummy_bb, dummy, l2_dom_bb); 3812 t = NULL_TREE; 3813 } 3814 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3815 t = integer_one_node; 3816 else 3817 t = fold_binary (fd->loop.cond_code, boolean_type_node, 3818 fold_convert (type, fd->loop.n1), 3819 fold_convert (type, fd->loop.n2)); 3820 if (fd->collapse == 1 3821 && TYPE_UNSIGNED (type) 3822 && (t == NULL_TREE || !integer_onep (t))) 3823 { 3824 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 3825 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 3826 true, GSI_SAME_STMT); 3827 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 3828 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 3829 true, GSI_SAME_STMT); 3830 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 3831 NULL_TREE, NULL_TREE); 3832 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3833 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 3834 expand_omp_regimplify_p, NULL, NULL) 3835 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 3836 expand_omp_regimplify_p, NULL, NULL)) 3837 { 3838 gsi = gsi_for_stmt (cond_stmt); 3839 gimple_regimplify_operands (cond_stmt, &gsi); 3840 } 3841 se = split_block (entry_bb, cond_stmt); 3842 se->flags = EDGE_TRUE_VALUE; 3843 entry_bb = se->dest; 3844 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); 3845 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); 3846 se->probability = REG_BR_PROB_BASE / 2000 - 1; 3847 if (gimple_in_ssa_p (cfun)) 3848 { 3849 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; 3850 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 3851 !gsi_end_p (gpi); gsi_next (&gpi)) 3852 { 3853 gphi *phi = gpi.phi (); 3854 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 3855 se, UNKNOWN_LOCATION); 3856 } 3857 } 3858 gsi = gsi_last_bb (entry_bb); 3859 } 3860 3861 switch (gimple_omp_for_kind (fd->for_stmt)) 3862 { 3863 case GF_OMP_FOR_KIND_FOR: 3864 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3865 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 3866 break; 3867 case GF_OMP_FOR_KIND_DISTRIBUTE: 3868 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 3869 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 3870 break; 3871 default: 3872 gcc_unreachable (); 3873 } 3874 nthreads = build_call_expr (nthreads, 0); 3875 nthreads = fold_convert (itype, nthreads); 3876 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 3877 true, GSI_SAME_STMT); 3878 threadid = build_call_expr (threadid, 0); 3879 threadid = fold_convert (itype, threadid); 3880 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 3881 true, GSI_SAME_STMT); 3882 3883 n1 = fd->loop.n1; 3884 n2 = fd->loop.n2; 3885 step = fd->loop.step; 3886 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3887 { 3888 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3889 OMP_CLAUSE__LOOPTEMP_); 3890 gcc_assert (innerc); 3891 n1 = OMP_CLAUSE_DECL (innerc); 3892 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3893 OMP_CLAUSE__LOOPTEMP_); 3894 gcc_assert (innerc); 3895 n2 = OMP_CLAUSE_DECL (innerc); 3896 } 3897 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 3898 true, NULL_TREE, true, GSI_SAME_STMT); 3899 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 3900 true, NULL_TREE, true, GSI_SAME_STMT); 3901 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 3902 true, NULL_TREE, true, GSI_SAME_STMT); 3903 tree chunk_size = fold_convert (itype, fd->chunk_size); 3904 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); 3905 chunk_size 3906 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, 3907 GSI_SAME_STMT); 3908 3909 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 3910 t = fold_build2 (PLUS_EXPR, itype, step, t); 3911 t = fold_build2 (PLUS_EXPR, itype, t, n2); 3912 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 3913 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 3914 t = fold_build2 (TRUNC_DIV_EXPR, itype, 3915 fold_build1 (NEGATE_EXPR, itype, t), 3916 fold_build1 (NEGATE_EXPR, itype, step)); 3917 else 3918 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 3919 t = fold_convert (itype, t); 3920 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3921 true, GSI_SAME_STMT); 3922 3923 trip_var = create_tmp_reg (itype, ".trip"); 3924 if (gimple_in_ssa_p (cfun)) 3925 { 3926 trip_init = make_ssa_name (trip_var); 3927 trip_main = make_ssa_name (trip_var); 3928 trip_back = make_ssa_name (trip_var); 3929 } 3930 else 3931 { 3932 trip_init = trip_var; 3933 trip_main = trip_var; 3934 trip_back = trip_var; 3935 } 3936 3937 gassign *assign_stmt 3938 = gimple_build_assign (trip_init, build_int_cst (itype, 0)); 3939 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3940 3941 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); 3942 t = fold_build2 (MULT_EXPR, itype, t, step); 3943 if (POINTER_TYPE_P (type)) 3944 t = fold_build_pointer_plus (n1, t); 3945 else 3946 t = fold_build2 (PLUS_EXPR, type, t, n1); 3947 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3948 true, GSI_SAME_STMT); 3949 3950 /* Remove the GIMPLE_OMP_FOR. */ 3951 gsi_remove (&gsi, true); 3952 3953 gimple_stmt_iterator gsif = gsi; 3954 3955 /* Iteration space partitioning goes in ITER_PART_BB. */ 3956 gsi = gsi_last_bb (iter_part_bb); 3957 3958 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); 3959 t = fold_build2 (PLUS_EXPR, itype, t, threadid); 3960 t = fold_build2 (MULT_EXPR, itype, t, chunk_size); 3961 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3962 false, GSI_CONTINUE_LINKING); 3963 3964 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); 3965 t = fold_build2 (MIN_EXPR, itype, t, n); 3966 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3967 false, GSI_CONTINUE_LINKING); 3968 3969 t = build2 (LT_EXPR, boolean_type_node, s0, n); 3970 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); 3971 3972 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 3973 gsi = gsi_start_bb (seq_start_bb); 3974 3975 tree startvar = fd->loop.v; 3976 tree endvar = NULL_TREE; 3977 3978 if (gimple_omp_for_combined_p (fd->for_stmt)) 3979 { 3980 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 3981 ? gimple_omp_parallel_clauses (inner_stmt) 3982 : gimple_omp_for_clauses (inner_stmt); 3983 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 3984 gcc_assert (innerc); 3985 startvar = OMP_CLAUSE_DECL (innerc); 3986 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3987 OMP_CLAUSE__LOOPTEMP_); 3988 gcc_assert (innerc); 3989 endvar = OMP_CLAUSE_DECL (innerc); 3990 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 3991 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 3992 { 3993 int i; 3994 for (i = 1; i < fd->collapse; i++) 3995 { 3996 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3997 OMP_CLAUSE__LOOPTEMP_); 3998 gcc_assert (innerc); 3999 } 4000 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4001 OMP_CLAUSE__LOOPTEMP_); 4002 if (innerc) 4003 { 4004 /* If needed (distribute parallel for with lastprivate), 4005 propagate down the total number of iterations. */ 4006 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 4007 fd->loop.n2); 4008 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 4009 GSI_CONTINUE_LINKING); 4010 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 4011 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4012 } 4013 } 4014 } 4015 4016 t = fold_convert (itype, s0); 4017 t = fold_build2 (MULT_EXPR, itype, t, step); 4018 if (POINTER_TYPE_P (type)) 4019 t = fold_build_pointer_plus (n1, t); 4020 else 4021 t = fold_build2 (PLUS_EXPR, type, t, n1); 4022 t = fold_convert (TREE_TYPE (startvar), t); 4023 t = force_gimple_operand_gsi (&gsi, t, 4024 DECL_P (startvar) 4025 && TREE_ADDRESSABLE (startvar), 4026 NULL_TREE, false, GSI_CONTINUE_LINKING); 4027 assign_stmt = gimple_build_assign (startvar, t); 4028 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4029 4030 t = fold_convert (itype, e0); 4031 t = fold_build2 (MULT_EXPR, itype, t, step); 4032 if (POINTER_TYPE_P (type)) 4033 t = fold_build_pointer_plus (n1, t); 4034 else 4035 t = fold_build2 (PLUS_EXPR, type, t, n1); 4036 t = fold_convert (TREE_TYPE (startvar), t); 4037 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4038 false, GSI_CONTINUE_LINKING); 4039 if (endvar) 4040 { 4041 assign_stmt = gimple_build_assign (endvar, e); 4042 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4043 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 4044 assign_stmt = gimple_build_assign (fd->loop.v, e); 4045 else 4046 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 4047 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4048 } 4049 /* Handle linear clause adjustments. */ 4050 tree itercnt = NULL_TREE, itercntbias = NULL_TREE; 4051 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 4052 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 4053 c; c = OMP_CLAUSE_CHAIN (c)) 4054 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 4055 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 4056 { 4057 tree d = OMP_CLAUSE_DECL (c); 4058 bool is_ref = omp_is_reference (d); 4059 tree t = d, a, dest; 4060 if (is_ref) 4061 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 4062 tree type = TREE_TYPE (t); 4063 if (POINTER_TYPE_P (type)) 4064 type = sizetype; 4065 dest = unshare_expr (t); 4066 tree v = create_tmp_var (TREE_TYPE (t), NULL); 4067 expand_omp_build_assign (&gsif, v, t); 4068 if (itercnt == NULL_TREE) 4069 { 4070 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4071 { 4072 itercntbias 4073 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), 4074 fold_convert (itype, fd->loop.n1)); 4075 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, 4076 itercntbias, step); 4077 itercntbias 4078 = force_gimple_operand_gsi (&gsif, itercntbias, true, 4079 NULL_TREE, true, 4080 GSI_SAME_STMT); 4081 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); 4082 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 4083 NULL_TREE, false, 4084 GSI_CONTINUE_LINKING); 4085 } 4086 else 4087 itercnt = s0; 4088 } 4089 a = fold_build2 (MULT_EXPR, type, 4090 fold_convert (type, itercnt), 4091 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 4092 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 4093 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 4094 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4095 false, GSI_CONTINUE_LINKING); 4096 assign_stmt = gimple_build_assign (dest, t); 4097 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4098 } 4099 if (fd->collapse > 1) 4100 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 4101 4102 if (!broken_loop) 4103 { 4104 /* The code controlling the sequential loop goes in CONT_BB, 4105 replacing the GIMPLE_OMP_CONTINUE. */ 4106 gsi = gsi_last_bb (cont_bb); 4107 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 4108 vmain = gimple_omp_continue_control_use (cont_stmt); 4109 vback = gimple_omp_continue_control_def (cont_stmt); 4110 4111 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4112 { 4113 if (POINTER_TYPE_P (type)) 4114 t = fold_build_pointer_plus (vmain, step); 4115 else 4116 t = fold_build2 (PLUS_EXPR, type, vmain, step); 4117 if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) 4118 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4119 true, GSI_SAME_STMT); 4120 assign_stmt = gimple_build_assign (vback, t); 4121 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4122 4123 if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) 4124 t = build2 (EQ_EXPR, boolean_type_node, 4125 build_int_cst (itype, 0), 4126 build_int_cst (itype, 1)); 4127 else 4128 t = build2 (fd->loop.cond_code, boolean_type_node, 4129 DECL_P (vback) && TREE_ADDRESSABLE (vback) 4130 ? t : vback, e); 4131 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 4132 } 4133 4134 /* Remove GIMPLE_OMP_CONTINUE. */ 4135 gsi_remove (&gsi, true); 4136 4137 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 4138 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 4139 4140 /* Trip update code goes into TRIP_UPDATE_BB. */ 4141 gsi = gsi_start_bb (trip_update_bb); 4142 4143 t = build_int_cst (itype, 1); 4144 t = build2 (PLUS_EXPR, itype, trip_main, t); 4145 assign_stmt = gimple_build_assign (trip_back, t); 4146 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4147 } 4148 4149 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 4150 gsi = gsi_last_bb (exit_bb); 4151 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 4152 { 4153 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 4154 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 4155 } 4156 gsi_remove (&gsi, true); 4157 4158 /* Connect the new blocks. */ 4159 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; 4160 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; 4161 4162 if (!broken_loop) 4163 { 4164 se = find_edge (cont_bb, body_bb); 4165 if (se == NULL) 4166 { 4167 se = BRANCH_EDGE (cont_bb); 4168 gcc_assert (single_succ (se->dest) == body_bb); 4169 } 4170 if (gimple_omp_for_combined_p (fd->for_stmt)) 4171 { 4172 remove_edge (se); 4173 se = NULL; 4174 } 4175 else if (fd->collapse > 1) 4176 { 4177 remove_edge (se); 4178 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 4179 } 4180 else 4181 se->flags = EDGE_TRUE_VALUE; 4182 find_edge (cont_bb, trip_update_bb)->flags 4183 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 4184 4185 redirect_edge_and_branch (single_succ_edge (trip_update_bb), 4186 iter_part_bb); 4187 } 4188 4189 if (gimple_in_ssa_p (cfun)) 4190 { 4191 gphi_iterator psi; 4192 gphi *phi; 4193 edge re, ene; 4194 edge_var_map *vm; 4195 size_t i; 4196 4197 gcc_assert (fd->collapse == 1 && !broken_loop); 4198 4199 /* When we redirect the edge from trip_update_bb to iter_part_bb, we 4200 remove arguments of the phi nodes in fin_bb. We need to create 4201 appropriate phi nodes in iter_part_bb instead. */ 4202 se = find_edge (iter_part_bb, fin_bb); 4203 re = single_succ_edge (trip_update_bb); 4204 vec<edge_var_map> *head = redirect_edge_var_map_vector (re); 4205 ene = single_succ_edge (entry_bb); 4206 4207 psi = gsi_start_phis (fin_bb); 4208 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); 4209 gsi_next (&psi), ++i) 4210 { 4211 gphi *nphi; 4212 source_location locus; 4213 4214 phi = psi.phi (); 4215 t = gimple_phi_result (phi); 4216 gcc_assert (t == redirect_edge_var_map_result (vm)); 4217 4218 if (!single_pred_p (fin_bb)) 4219 t = copy_ssa_name (t, phi); 4220 4221 nphi = create_phi_node (t, iter_part_bb); 4222 4223 t = PHI_ARG_DEF_FROM_EDGE (phi, se); 4224 locus = gimple_phi_arg_location_from_edge (phi, se); 4225 4226 /* A special case -- fd->loop.v is not yet computed in 4227 iter_part_bb, we need to use vextra instead. */ 4228 if (t == fd->loop.v) 4229 t = vextra; 4230 add_phi_arg (nphi, t, ene, locus); 4231 locus = redirect_edge_var_map_location (vm); 4232 tree back_arg = redirect_edge_var_map_def (vm); 4233 add_phi_arg (nphi, back_arg, re, locus); 4234 edge ce = find_edge (cont_bb, body_bb); 4235 if (ce == NULL) 4236 { 4237 ce = BRANCH_EDGE (cont_bb); 4238 gcc_assert (single_succ (ce->dest) == body_bb); 4239 ce = single_succ_edge (ce->dest); 4240 } 4241 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); 4242 gcc_assert (inner_loop_phi != NULL); 4243 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), 4244 find_edge (seq_start_bb, body_bb), locus); 4245 4246 if (!single_pred_p (fin_bb)) 4247 add_phi_arg (phi, gimple_phi_result (nphi), se, locus); 4248 } 4249 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); 4250 redirect_edge_var_map_clear (re); 4251 if (single_pred_p (fin_bb)) 4252 while (1) 4253 { 4254 psi = gsi_start_phis (fin_bb); 4255 if (gsi_end_p (psi)) 4256 break; 4257 remove_phi_node (&psi, false); 4258 } 4259 4260 /* Make phi node for trip. */ 4261 phi = create_phi_node (trip_main, iter_part_bb); 4262 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), 4263 UNKNOWN_LOCATION); 4264 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), 4265 UNKNOWN_LOCATION); 4266 } 4267 4268 if (!broken_loop) 4269 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); 4270 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, 4271 recompute_dominator (CDI_DOMINATORS, iter_part_bb)); 4272 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 4273 recompute_dominator (CDI_DOMINATORS, fin_bb)); 4274 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, 4275 recompute_dominator (CDI_DOMINATORS, seq_start_bb)); 4276 set_immediate_dominator (CDI_DOMINATORS, body_bb, 4277 recompute_dominator (CDI_DOMINATORS, body_bb)); 4278 4279 if (!broken_loop) 4280 { 4281 struct loop *loop = body_bb->loop_father; 4282 struct loop *trip_loop = alloc_loop (); 4283 trip_loop->header = iter_part_bb; 4284 trip_loop->latch = trip_update_bb; 4285 add_loop (trip_loop, iter_part_bb->loop_father); 4286 4287 if (loop != entry_bb->loop_father) 4288 { 4289 gcc_assert (loop->header == body_bb); 4290 gcc_assert (loop->latch == region->cont 4291 || single_pred (loop->latch) == region->cont); 4292 trip_loop->inner = loop; 4293 return; 4294 } 4295 4296 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4297 { 4298 loop = alloc_loop (); 4299 loop->header = body_bb; 4300 if (collapse_bb == NULL) 4301 loop->latch = cont_bb; 4302 add_loop (loop, trip_loop); 4303 } 4304 } 4305} 4306 4307/* A subroutine of expand_omp_for. Generate code for _Cilk_for loop. 4308 Given parameters: 4309 for (V = N1; V cond N2; V += STEP) BODY; 4310 4311 where COND is "<" or ">" or "!=", we generate pseudocode 4312 4313 for (ind_var = low; ind_var < high; ind_var++) 4314 { 4315 V = n1 + (ind_var * STEP) 4316 4317 <BODY> 4318 } 4319 4320 In the above pseudocode, low and high are function parameters of the 4321 child function. In the function below, we are inserting a temp. 4322 variable that will be making a call to two OMP functions that will not be 4323 found in the body of _Cilk_for (since OMP_FOR cannot be mixed 4324 with _Cilk_for). These functions are replaced with low and high 4325 by the function that handles taskreg. */ 4326 4327 4328static void 4329expand_cilk_for (struct omp_region *region, struct omp_for_data *fd) 4330{ 4331 bool broken_loop = region->cont == NULL; 4332 basic_block entry_bb = region->entry; 4333 basic_block cont_bb = region->cont; 4334 4335 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4336 gcc_assert (broken_loop 4337 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 4338 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest; 4339 basic_block l1_bb, l2_bb; 4340 4341 if (!broken_loop) 4342 { 4343 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); 4344 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4345 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; 4346 l2_bb = BRANCH_EDGE (entry_bb)->dest; 4347 } 4348 else 4349 { 4350 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; 4351 l1_bb = split_edge (BRANCH_EDGE (entry_bb)); 4352 l2_bb = single_succ (l1_bb); 4353 } 4354 basic_block exit_bb = region->exit; 4355 basic_block l2_dom_bb = NULL; 4356 4357 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb); 4358 4359 /* Below statements until the "tree high_val = ..." are pseudo statements 4360 used to pass information to be used by expand_omp_taskreg. 4361 low_val and high_val will be replaced by the __low and __high 4362 parameter from the child function. 4363 4364 The call_exprs part is a place-holder, it is mainly used 4365 to distinctly identify to the top-level part that this is 4366 where we should put low and high (reasoning given in header 4367 comment). */ 4368 4369 gomp_parallel *par_stmt 4370 = as_a <gomp_parallel *> (last_stmt (region->outer->entry)); 4371 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt); 4372 tree t, low_val = NULL_TREE, high_val = NULL_TREE; 4373 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t)) 4374 { 4375 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high")) 4376 high_val = t; 4377 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low")) 4378 low_val = t; 4379 } 4380 gcc_assert (low_val && high_val); 4381 4382 tree type = TREE_TYPE (low_val); 4383 tree ind_var = create_tmp_reg (type, "__cilk_ind_var"); 4384 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4385 4386 /* Not needed in SSA form right now. */ 4387 gcc_assert (!gimple_in_ssa_p (cfun)); 4388 if (l2_dom_bb == NULL) 4389 l2_dom_bb = l1_bb; 4390 4391 tree n1 = low_val; 4392 tree n2 = high_val; 4393 4394 gimple *stmt = gimple_build_assign (ind_var, n1); 4395 4396 /* Replace the GIMPLE_OMP_FOR statement. */ 4397 gsi_replace (&gsi, stmt, true); 4398 4399 if (!broken_loop) 4400 { 4401 /* Code to control the increment goes in the CONT_BB. */ 4402 gsi = gsi_last_bb (cont_bb); 4403 stmt = gsi_stmt (gsi); 4404 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); 4405 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var, 4406 build_one_cst (type)); 4407 4408 /* Replace GIMPLE_OMP_CONTINUE. */ 4409 gsi_replace (&gsi, stmt, true); 4410 } 4411 4412 /* Emit the condition in L1_BB. */ 4413 gsi = gsi_after_labels (l1_bb); 4414 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step), 4415 fold_convert (TREE_TYPE (fd->loop.step), ind_var), 4416 fd->loop.step); 4417 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1))) 4418 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1), 4419 fd->loop.n1, fold_convert (sizetype, t)); 4420 else 4421 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1), 4422 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t)); 4423 t = fold_convert (TREE_TYPE (fd->loop.v), t); 4424 expand_omp_build_assign (&gsi, fd->loop.v, t); 4425 4426 /* The condition is always '<' since the runtime will fill in the low 4427 and high values. */ 4428 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE); 4429 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4430 4431 /* Remove GIMPLE_OMP_RETURN. */ 4432 gsi = gsi_last_bb (exit_bb); 4433 gsi_remove (&gsi, true); 4434 4435 /* Connect the new blocks. */ 4436 remove_edge (FALLTHRU_EDGE (entry_bb)); 4437 4438 edge e, ne; 4439 if (!broken_loop) 4440 { 4441 remove_edge (BRANCH_EDGE (entry_bb)); 4442 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); 4443 4444 e = BRANCH_EDGE (l1_bb); 4445 ne = FALLTHRU_EDGE (l1_bb); 4446 e->flags = EDGE_TRUE_VALUE; 4447 } 4448 else 4449 { 4450 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 4451 4452 ne = single_succ_edge (l1_bb); 4453 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); 4454 4455 } 4456 ne->flags = EDGE_FALSE_VALUE; 4457 e->probability = REG_BR_PROB_BASE * 7 / 8; 4458 ne->probability = REG_BR_PROB_BASE / 8; 4459 4460 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); 4461 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); 4462 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); 4463 4464 if (!broken_loop) 4465 { 4466 struct loop *loop = alloc_loop (); 4467 loop->header = l1_bb; 4468 loop->latch = cont_bb; 4469 add_loop (loop, l1_bb->loop_father); 4470 loop->safelen = INT_MAX; 4471 } 4472 4473 /* Pick the correct library function based on the precision of the 4474 induction variable type. */ 4475 tree lib_fun = NULL_TREE; 4476 if (TYPE_PRECISION (type) == 32) 4477 lib_fun = cilk_for_32_fndecl; 4478 else if (TYPE_PRECISION (type) == 64) 4479 lib_fun = cilk_for_64_fndecl; 4480 else 4481 gcc_unreachable (); 4482 4483 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR); 4484 4485 /* WS_ARGS contains the library function flavor to call: 4486 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the 4487 user-defined grain value. If the user does not define one, then zero 4488 is passed in by the parser. */ 4489 vec_alloc (region->ws_args, 2); 4490 region->ws_args->quick_push (lib_fun); 4491 region->ws_args->quick_push (fd->chunk_size); 4492} 4493 4494/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing 4495 loop. Given parameters: 4496 4497 for (V = N1; V cond N2; V += STEP) BODY; 4498 4499 where COND is "<" or ">", we generate pseudocode 4500 4501 V = N1; 4502 goto L1; 4503 L0: 4504 BODY; 4505 V += STEP; 4506 L1: 4507 if (V cond N2) goto L0; else goto L2; 4508 L2: 4509 4510 For collapsed loops, given parameters: 4511 collapse(3) 4512 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 4513 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 4514 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 4515 BODY; 4516 4517 we generate pseudocode 4518 4519 if (cond3 is <) 4520 adj = STEP3 - 1; 4521 else 4522 adj = STEP3 + 1; 4523 count3 = (adj + N32 - N31) / STEP3; 4524 if (cond2 is <) 4525 adj = STEP2 - 1; 4526 else 4527 adj = STEP2 + 1; 4528 count2 = (adj + N22 - N21) / STEP2; 4529 if (cond1 is <) 4530 adj = STEP1 - 1; 4531 else 4532 adj = STEP1 + 1; 4533 count1 = (adj + N12 - N11) / STEP1; 4534 count = count1 * count2 * count3; 4535 V = 0; 4536 V1 = N11; 4537 V2 = N21; 4538 V3 = N31; 4539 goto L1; 4540 L0: 4541 BODY; 4542 V += 1; 4543 V3 += STEP3; 4544 V2 += (V3 cond3 N32) ? 0 : STEP2; 4545 V3 = (V3 cond3 N32) ? V3 : N31; 4546 V1 += (V2 cond2 N22) ? 0 : STEP1; 4547 V2 = (V2 cond2 N22) ? V2 : N21; 4548 L1: 4549 if (V < count) goto L0; else goto L2; 4550 L2: 4551 4552 */ 4553 4554static void 4555expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) 4556{ 4557 tree type, t; 4558 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; 4559 gimple_stmt_iterator gsi; 4560 gimple *stmt; 4561 gcond *cond_stmt; 4562 bool broken_loop = region->cont == NULL; 4563 edge e, ne; 4564 tree *counts = NULL; 4565 int i; 4566 int safelen_int = INT_MAX; 4567 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4568 OMP_CLAUSE_SAFELEN); 4569 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4570 OMP_CLAUSE__SIMDUID_); 4571 tree n1, n2; 4572 4573 if (safelen) 4574 { 4575 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); 4576 if (TREE_CODE (safelen) != INTEGER_CST) 4577 safelen_int = 0; 4578 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX) 4579 safelen_int = tree_to_uhwi (safelen); 4580 if (safelen_int == 1) 4581 safelen_int = 0; 4582 } 4583 type = TREE_TYPE (fd->loop.v); 4584 entry_bb = region->entry; 4585 cont_bb = region->cont; 4586 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4587 gcc_assert (broken_loop 4588 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 4589 l0_bb = FALLTHRU_EDGE (entry_bb)->dest; 4590 if (!broken_loop) 4591 { 4592 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); 4593 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4594 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; 4595 l2_bb = BRANCH_EDGE (entry_bb)->dest; 4596 } 4597 else 4598 { 4599 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; 4600 l1_bb = split_edge (BRANCH_EDGE (entry_bb)); 4601 l2_bb = single_succ (l1_bb); 4602 } 4603 exit_bb = region->exit; 4604 l2_dom_bb = NULL; 4605 4606 gsi = gsi_last_bb (entry_bb); 4607 4608 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4609 /* Not needed in SSA form right now. */ 4610 gcc_assert (!gimple_in_ssa_p (cfun)); 4611 if (fd->collapse > 1) 4612 { 4613 int first_zero_iter = -1, dummy = -1; 4614 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; 4615 4616 counts = XALLOCAVEC (tree, fd->collapse); 4617 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4618 zero_iter_bb, first_zero_iter, 4619 dummy_bb, dummy, l2_dom_bb); 4620 } 4621 if (l2_dom_bb == NULL) 4622 l2_dom_bb = l1_bb; 4623 4624 n1 = fd->loop.n1; 4625 n2 = fd->loop.n2; 4626 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4627 { 4628 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4629 OMP_CLAUSE__LOOPTEMP_); 4630 gcc_assert (innerc); 4631 n1 = OMP_CLAUSE_DECL (innerc); 4632 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4633 OMP_CLAUSE__LOOPTEMP_); 4634 gcc_assert (innerc); 4635 n2 = OMP_CLAUSE_DECL (innerc); 4636 } 4637 tree step = fd->loop.step; 4638 4639 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4640 OMP_CLAUSE__SIMT_); 4641 if (is_simt) 4642 { 4643 cfun->curr_properties &= ~PROP_gimple_lomp_dev; 4644 is_simt = safelen_int > 1; 4645 } 4646 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; 4647 if (is_simt) 4648 { 4649 simt_lane = create_tmp_var (unsigned_type_node); 4650 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); 4651 gimple_call_set_lhs (g, simt_lane); 4652 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4653 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, 4654 fold_convert (TREE_TYPE (step), simt_lane)); 4655 n1 = fold_convert (type, n1); 4656 if (POINTER_TYPE_P (type)) 4657 n1 = fold_build_pointer_plus (n1, offset); 4658 else 4659 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); 4660 4661 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ 4662 if (fd->collapse > 1) 4663 simt_maxlane = build_one_cst (unsigned_type_node); 4664 else if (safelen_int < omp_max_simt_vf ()) 4665 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); 4666 tree vf 4667 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, 4668 unsigned_type_node, 0); 4669 if (simt_maxlane) 4670 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); 4671 vf = fold_convert (TREE_TYPE (step), vf); 4672 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); 4673 } 4674 4675 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); 4676 if (fd->collapse > 1) 4677 { 4678 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4679 { 4680 gsi_prev (&gsi); 4681 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); 4682 gsi_next (&gsi); 4683 } 4684 else 4685 for (i = 0; i < fd->collapse; i++) 4686 { 4687 tree itype = TREE_TYPE (fd->loops[i].v); 4688 if (POINTER_TYPE_P (itype)) 4689 itype = signed_type_for (itype); 4690 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); 4691 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4692 } 4693 } 4694 4695 /* Remove the GIMPLE_OMP_FOR statement. */ 4696 gsi_remove (&gsi, true); 4697 4698 if (!broken_loop) 4699 { 4700 /* Code to control the increment goes in the CONT_BB. */ 4701 gsi = gsi_last_bb (cont_bb); 4702 stmt = gsi_stmt (gsi); 4703 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); 4704 4705 if (POINTER_TYPE_P (type)) 4706 t = fold_build_pointer_plus (fd->loop.v, step); 4707 else 4708 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 4709 expand_omp_build_assign (&gsi, fd->loop.v, t); 4710 4711 if (fd->collapse > 1) 4712 { 4713 i = fd->collapse - 1; 4714 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 4715 { 4716 t = fold_convert (sizetype, fd->loops[i].step); 4717 t = fold_build_pointer_plus (fd->loops[i].v, t); 4718 } 4719 else 4720 { 4721 t = fold_convert (TREE_TYPE (fd->loops[i].v), 4722 fd->loops[i].step); 4723 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 4724 fd->loops[i].v, t); 4725 } 4726 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4727 4728 for (i = fd->collapse - 1; i > 0; i--) 4729 { 4730 tree itype = TREE_TYPE (fd->loops[i].v); 4731 tree itype2 = TREE_TYPE (fd->loops[i - 1].v); 4732 if (POINTER_TYPE_P (itype2)) 4733 itype2 = signed_type_for (itype2); 4734 t = fold_convert (itype2, fd->loops[i - 1].step); 4735 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 4736 GSI_SAME_STMT); 4737 t = build3 (COND_EXPR, itype2, 4738 build2 (fd->loops[i].cond_code, boolean_type_node, 4739 fd->loops[i].v, 4740 fold_convert (itype, fd->loops[i].n2)), 4741 build_int_cst (itype2, 0), t); 4742 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) 4743 t = fold_build_pointer_plus (fd->loops[i - 1].v, t); 4744 else 4745 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); 4746 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); 4747 4748 t = fold_convert (itype, fd->loops[i].n1); 4749 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 4750 GSI_SAME_STMT); 4751 t = build3 (COND_EXPR, itype, 4752 build2 (fd->loops[i].cond_code, boolean_type_node, 4753 fd->loops[i].v, 4754 fold_convert (itype, fd->loops[i].n2)), 4755 fd->loops[i].v, t); 4756 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4757 } 4758 } 4759 4760 /* Remove GIMPLE_OMP_CONTINUE. */ 4761 gsi_remove (&gsi, true); 4762 } 4763 4764 /* Emit the condition in L1_BB. */ 4765 gsi = gsi_start_bb (l1_bb); 4766 4767 t = fold_convert (type, n2); 4768 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4769 false, GSI_CONTINUE_LINKING); 4770 tree v = fd->loop.v; 4771 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 4772 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 4773 false, GSI_CONTINUE_LINKING); 4774 t = build2 (fd->loop.cond_code, boolean_type_node, v, t); 4775 cond_stmt = gimple_build_cond_empty (t); 4776 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 4777 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, 4778 NULL, NULL) 4779 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, 4780 NULL, NULL)) 4781 { 4782 gsi = gsi_for_stmt (cond_stmt); 4783 gimple_regimplify_operands (cond_stmt, &gsi); 4784 } 4785 4786 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ 4787 if (is_simt) 4788 { 4789 gsi = gsi_start_bb (l2_bb); 4790 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); 4791 if (POINTER_TYPE_P (type)) 4792 t = fold_build_pointer_plus (fd->loop.v, step); 4793 else 4794 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 4795 expand_omp_build_assign (&gsi, fd->loop.v, t); 4796 } 4797 4798 /* Remove GIMPLE_OMP_RETURN. */ 4799 gsi = gsi_last_bb (exit_bb); 4800 gsi_remove (&gsi, true); 4801 4802 /* Connect the new blocks. */ 4803 remove_edge (FALLTHRU_EDGE (entry_bb)); 4804 4805 if (!broken_loop) 4806 { 4807 remove_edge (BRANCH_EDGE (entry_bb)); 4808 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); 4809 4810 e = BRANCH_EDGE (l1_bb); 4811 ne = FALLTHRU_EDGE (l1_bb); 4812 e->flags = EDGE_TRUE_VALUE; 4813 } 4814 else 4815 { 4816 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 4817 4818 ne = single_succ_edge (l1_bb); 4819 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); 4820 4821 } 4822 ne->flags = EDGE_FALSE_VALUE; 4823 e->probability = REG_BR_PROB_BASE * 7 / 8; 4824 ne->probability = REG_BR_PROB_BASE / 8; 4825 4826 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); 4827 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); 4828 4829 if (simt_maxlane) 4830 { 4831 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, 4832 NULL_TREE, NULL_TREE); 4833 gsi = gsi_last_bb (entry_bb); 4834 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); 4835 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); 4836 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; 4837 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8; 4838 BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8; 4839 l2_dom_bb = entry_bb; 4840 } 4841 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); 4842 4843 if (!broken_loop) 4844 { 4845 struct loop *loop = alloc_loop (); 4846 loop->header = l1_bb; 4847 loop->latch = cont_bb; 4848 add_loop (loop, l1_bb->loop_father); 4849 loop->safelen = safelen_int; 4850 if (simduid) 4851 { 4852 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); 4853 cfun->has_simduid_loops = true; 4854 } 4855 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize 4856 the loop. */ 4857 if ((flag_tree_loop_vectorize 4858 || (!global_options_set.x_flag_tree_loop_vectorize 4859 && !global_options_set.x_flag_tree_vectorize)) 4860 && flag_tree_loop_optimize 4861 && loop->safelen > 1) 4862 { 4863 loop->force_vectorize = true; 4864 cfun->has_force_vectorize_loops = true; 4865 } 4866 } 4867 else if (simduid) 4868 cfun->has_simduid_loops = true; 4869} 4870 4871/* Taskloop construct is represented after gimplification with 4872 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 4873 in between them. This routine expands the outer GIMPLE_OMP_FOR, 4874 which should just compute all the needed loop temporaries 4875 for GIMPLE_OMP_TASK. */ 4876 4877static void 4878expand_omp_taskloop_for_outer (struct omp_region *region, 4879 struct omp_for_data *fd, 4880 gimple *inner_stmt) 4881{ 4882 tree type, bias = NULL_TREE; 4883 basic_block entry_bb, cont_bb, exit_bb; 4884 gimple_stmt_iterator gsi; 4885 gassign *assign_stmt; 4886 tree *counts = NULL; 4887 int i; 4888 4889 gcc_assert (inner_stmt); 4890 gcc_assert (region->cont); 4891 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK 4892 && gimple_omp_task_taskloop_p (inner_stmt)); 4893 type = TREE_TYPE (fd->loop.v); 4894 4895 /* See if we need to bias by LLONG_MIN. */ 4896 if (fd->iter_type == long_long_unsigned_type_node 4897 && TREE_CODE (type) == INTEGER_TYPE 4898 && !TYPE_UNSIGNED (type)) 4899 { 4900 tree n1, n2; 4901 4902 if (fd->loop.cond_code == LT_EXPR) 4903 { 4904 n1 = fd->loop.n1; 4905 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 4906 } 4907 else 4908 { 4909 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 4910 n2 = fd->loop.n1; 4911 } 4912 if (TREE_CODE (n1) != INTEGER_CST 4913 || TREE_CODE (n2) != INTEGER_CST 4914 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 4915 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 4916 } 4917 4918 entry_bb = region->entry; 4919 cont_bb = region->cont; 4920 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4921 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 4922 exit_bb = region->exit; 4923 4924 gsi = gsi_last_bb (entry_bb); 4925 gimple *for_stmt = gsi_stmt (gsi); 4926 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); 4927 if (fd->collapse > 1) 4928 { 4929 int first_zero_iter = -1, dummy = -1; 4930 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; 4931 4932 counts = XALLOCAVEC (tree, fd->collapse); 4933 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4934 zero_iter_bb, first_zero_iter, 4935 dummy_bb, dummy, l2_dom_bb); 4936 4937 if (zero_iter_bb) 4938 { 4939 /* Some counts[i] vars might be uninitialized if 4940 some loop has zero iterations. But the body shouldn't 4941 be executed in that case, so just avoid uninit warnings. */ 4942 for (i = first_zero_iter; i < fd->collapse; i++) 4943 if (SSA_VAR_P (counts[i])) 4944 TREE_NO_WARNING (counts[i]) = 1; 4945 gsi_prev (&gsi); 4946 edge e = split_block (entry_bb, gsi_stmt (gsi)); 4947 entry_bb = e->dest; 4948 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); 4949 gsi = gsi_last_bb (entry_bb); 4950 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 4951 get_immediate_dominator (CDI_DOMINATORS, 4952 zero_iter_bb)); 4953 } 4954 } 4955 4956 tree t0, t1; 4957 t1 = fd->loop.n2; 4958 t0 = fd->loop.n1; 4959 if (POINTER_TYPE_P (TREE_TYPE (t0)) 4960 && TYPE_PRECISION (TREE_TYPE (t0)) 4961 != TYPE_PRECISION (fd->iter_type)) 4962 { 4963 /* Avoid casting pointers to integer of a different size. */ 4964 tree itype = signed_type_for (type); 4965 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 4966 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 4967 } 4968 else 4969 { 4970 t1 = fold_convert (fd->iter_type, t1); 4971 t0 = fold_convert (fd->iter_type, t0); 4972 } 4973 if (bias) 4974 { 4975 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 4976 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 4977 } 4978 4979 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt), 4980 OMP_CLAUSE__LOOPTEMP_); 4981 gcc_assert (innerc); 4982 tree startvar = OMP_CLAUSE_DECL (innerc); 4983 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 4984 gcc_assert (innerc); 4985 tree endvar = OMP_CLAUSE_DECL (innerc); 4986 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 4987 { 4988 gcc_assert (innerc); 4989 for (i = 1; i < fd->collapse; i++) 4990 { 4991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4992 OMP_CLAUSE__LOOPTEMP_); 4993 gcc_assert (innerc); 4994 } 4995 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4996 OMP_CLAUSE__LOOPTEMP_); 4997 if (innerc) 4998 { 4999 /* If needed (inner taskloop has lastprivate clause), propagate 5000 down the total number of iterations. */ 5001 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, 5002 NULL_TREE, false, 5003 GSI_CONTINUE_LINKING); 5004 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 5005 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5006 } 5007 } 5008 5009 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, 5010 GSI_CONTINUE_LINKING); 5011 assign_stmt = gimple_build_assign (startvar, t0); 5012 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5013 5014 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, 5015 GSI_CONTINUE_LINKING); 5016 assign_stmt = gimple_build_assign (endvar, t1); 5017 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5018 if (fd->collapse > 1) 5019 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 5020 5021 /* Remove the GIMPLE_OMP_FOR statement. */ 5022 gsi = gsi_for_stmt (for_stmt); 5023 gsi_remove (&gsi, true); 5024 5025 gsi = gsi_last_bb (cont_bb); 5026 gsi_remove (&gsi, true); 5027 5028 gsi = gsi_last_bb (exit_bb); 5029 gsi_remove (&gsi, true); 5030 5031 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; 5032 remove_edge (BRANCH_EDGE (entry_bb)); 5033 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE; 5034 remove_edge (BRANCH_EDGE (cont_bb)); 5035 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); 5036 set_immediate_dominator (CDI_DOMINATORS, region->entry, 5037 recompute_dominator (CDI_DOMINATORS, region->entry)); 5038} 5039 5040/* Taskloop construct is represented after gimplification with 5041 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 5042 in between them. This routine expands the inner GIMPLE_OMP_FOR. 5043 GOMP_taskloop{,_ull} function arranges for each task to be given just 5044 a single range of iterations. */ 5045 5046static void 5047expand_omp_taskloop_for_inner (struct omp_region *region, 5048 struct omp_for_data *fd, 5049 gimple *inner_stmt) 5050{ 5051 tree e, t, type, itype, vmain, vback, bias = NULL_TREE; 5052 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; 5053 basic_block fin_bb; 5054 gimple_stmt_iterator gsi; 5055 edge ep; 5056 bool broken_loop = region->cont == NULL; 5057 tree *counts = NULL; 5058 tree n1, n2, step; 5059 5060 itype = type = TREE_TYPE (fd->loop.v); 5061 if (POINTER_TYPE_P (type)) 5062 itype = signed_type_for (type); 5063 5064 /* See if we need to bias by LLONG_MIN. */ 5065 if (fd->iter_type == long_long_unsigned_type_node 5066 && TREE_CODE (type) == INTEGER_TYPE 5067 && !TYPE_UNSIGNED (type)) 5068 { 5069 tree n1, n2; 5070 5071 if (fd->loop.cond_code == LT_EXPR) 5072 { 5073 n1 = fd->loop.n1; 5074 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 5075 } 5076 else 5077 { 5078 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 5079 n2 = fd->loop.n1; 5080 } 5081 if (TREE_CODE (n1) != INTEGER_CST 5082 || TREE_CODE (n2) != INTEGER_CST 5083 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 5084 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 5085 } 5086 5087 entry_bb = region->entry; 5088 cont_bb = region->cont; 5089 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 5090 fin_bb = BRANCH_EDGE (entry_bb)->dest; 5091 gcc_assert (broken_loop 5092 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 5093 body_bb = FALLTHRU_EDGE (entry_bb)->dest; 5094 if (!broken_loop) 5095 { 5096 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); 5097 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 5098 } 5099 exit_bb = region->exit; 5100 5101 /* Iteration space partitioning goes in ENTRY_BB. */ 5102 gsi = gsi_last_bb (entry_bb); 5103 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 5104 5105 if (fd->collapse > 1) 5106 { 5107 int first_zero_iter = -1, dummy = -1; 5108 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 5109 5110 counts = XALLOCAVEC (tree, fd->collapse); 5111 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5112 fin_bb, first_zero_iter, 5113 dummy_bb, dummy, l2_dom_bb); 5114 t = NULL_TREE; 5115 } 5116 else 5117 t = integer_one_node; 5118 5119 step = fd->loop.step; 5120 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5121 OMP_CLAUSE__LOOPTEMP_); 5122 gcc_assert (innerc); 5123 n1 = OMP_CLAUSE_DECL (innerc); 5124 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 5125 gcc_assert (innerc); 5126 n2 = OMP_CLAUSE_DECL (innerc); 5127 if (bias) 5128 { 5129 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); 5130 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); 5131 } 5132 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 5133 true, NULL_TREE, true, GSI_SAME_STMT); 5134 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 5135 true, NULL_TREE, true, GSI_SAME_STMT); 5136 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 5137 true, NULL_TREE, true, GSI_SAME_STMT); 5138 5139 tree startvar = fd->loop.v; 5140 tree endvar = NULL_TREE; 5141 5142 if (gimple_omp_for_combined_p (fd->for_stmt)) 5143 { 5144 tree clauses = gimple_omp_for_clauses (inner_stmt); 5145 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 5146 gcc_assert (innerc); 5147 startvar = OMP_CLAUSE_DECL (innerc); 5148 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5149 OMP_CLAUSE__LOOPTEMP_); 5150 gcc_assert (innerc); 5151 endvar = OMP_CLAUSE_DECL (innerc); 5152 } 5153 t = fold_convert (TREE_TYPE (startvar), n1); 5154 t = force_gimple_operand_gsi (&gsi, t, 5155 DECL_P (startvar) 5156 && TREE_ADDRESSABLE (startvar), 5157 NULL_TREE, false, GSI_CONTINUE_LINKING); 5158 gimple *assign_stmt = gimple_build_assign (startvar, t); 5159 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5160 5161 t = fold_convert (TREE_TYPE (startvar), n2); 5162 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5163 false, GSI_CONTINUE_LINKING); 5164 if (endvar) 5165 { 5166 assign_stmt = gimple_build_assign (endvar, e); 5167 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5168 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 5169 assign_stmt = gimple_build_assign (fd->loop.v, e); 5170 else 5171 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 5172 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5173 } 5174 if (fd->collapse > 1) 5175 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 5176 5177 if (!broken_loop) 5178 { 5179 /* The code controlling the sequential loop replaces the 5180 GIMPLE_OMP_CONTINUE. */ 5181 gsi = gsi_last_bb (cont_bb); 5182 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5183 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 5184 vmain = gimple_omp_continue_control_use (cont_stmt); 5185 vback = gimple_omp_continue_control_def (cont_stmt); 5186 5187 if (!gimple_omp_for_combined_p (fd->for_stmt)) 5188 { 5189 if (POINTER_TYPE_P (type)) 5190 t = fold_build_pointer_plus (vmain, step); 5191 else 5192 t = fold_build2 (PLUS_EXPR, type, vmain, step); 5193 t = force_gimple_operand_gsi (&gsi, t, 5194 DECL_P (vback) 5195 && TREE_ADDRESSABLE (vback), 5196 NULL_TREE, true, GSI_SAME_STMT); 5197 assign_stmt = gimple_build_assign (vback, t); 5198 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 5199 5200 t = build2 (fd->loop.cond_code, boolean_type_node, 5201 DECL_P (vback) && TREE_ADDRESSABLE (vback) 5202 ? t : vback, e); 5203 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 5204 } 5205 5206 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 5207 gsi_remove (&gsi, true); 5208 5209 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 5210 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 5211 } 5212 5213 /* Remove the GIMPLE_OMP_FOR statement. */ 5214 gsi = gsi_for_stmt (fd->for_stmt); 5215 gsi_remove (&gsi, true); 5216 5217 /* Remove the GIMPLE_OMP_RETURN statement. */ 5218 gsi = gsi_last_bb (exit_bb); 5219 gsi_remove (&gsi, true); 5220 5221 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; 5222 if (!broken_loop) 5223 remove_edge (BRANCH_EDGE (entry_bb)); 5224 else 5225 { 5226 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); 5227 region->outer->cont = NULL; 5228 } 5229 5230 /* Connect all the blocks. */ 5231 if (!broken_loop) 5232 { 5233 ep = find_edge (cont_bb, body_bb); 5234 if (gimple_omp_for_combined_p (fd->for_stmt)) 5235 { 5236 remove_edge (ep); 5237 ep = NULL; 5238 } 5239 else if (fd->collapse > 1) 5240 { 5241 remove_edge (ep); 5242 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 5243 } 5244 else 5245 ep->flags = EDGE_TRUE_VALUE; 5246 find_edge (cont_bb, fin_bb)->flags 5247 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 5248 } 5249 5250 set_immediate_dominator (CDI_DOMINATORS, body_bb, 5251 recompute_dominator (CDI_DOMINATORS, body_bb)); 5252 if (!broken_loop) 5253 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 5254 recompute_dominator (CDI_DOMINATORS, fin_bb)); 5255 5256 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 5257 { 5258 struct loop *loop = alloc_loop (); 5259 loop->header = body_bb; 5260 if (collapse_bb == NULL) 5261 loop->latch = cont_bb; 5262 add_loop (loop, body_bb->loop_father); 5263 } 5264} 5265 5266/* A subroutine of expand_omp_for. Generate code for an OpenACC 5267 partitioned loop. The lowering here is abstracted, in that the 5268 loop parameters are passed through internal functions, which are 5269 further lowered by oacc_device_lower, once we get to the target 5270 compiler. The loop is of the form: 5271 5272 for (V = B; V LTGT E; V += S) {BODY} 5273 5274 where LTGT is < or >. We may have a specified chunking size, CHUNKING 5275 (constant 0 for no chunking) and we will have a GWV partitioning 5276 mask, specifying dimensions over which the loop is to be 5277 partitioned (see note below). We generate code that looks like 5278 (this ignores tiling): 5279 5280 <entry_bb> [incoming FALL->body, BRANCH->exit] 5281 typedef signedintify (typeof (V)) T; // underlying signed integral type 5282 T range = E - B; 5283 T chunk_no = 0; 5284 T DIR = LTGT == '<' ? +1 : -1; 5285 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); 5286 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); 5287 5288 <head_bb> [created by splitting end of entry_bb] 5289 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); 5290 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); 5291 if (!(offset LTGT bound)) goto bottom_bb; 5292 5293 <body_bb> [incoming] 5294 V = B + offset; 5295 {BODY} 5296 5297 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] 5298 offset += step; 5299 if (offset LTGT bound) goto body_bb; [*] 5300 5301 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb 5302 chunk_no++; 5303 if (chunk < chunk_max) goto head_bb; 5304 5305 <exit_bb> [incoming] 5306 V = B + ((range -/+ 1) / S +/- 1) * S [*] 5307 5308 [*] Needed if V live at end of loop. */ 5309 5310static void 5311expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) 5312{ 5313 tree v = fd->loop.v; 5314 enum tree_code cond_code = fd->loop.cond_code; 5315 enum tree_code plus_code = PLUS_EXPR; 5316 5317 tree chunk_size = integer_minus_one_node; 5318 tree gwv = integer_zero_node; 5319 tree iter_type = TREE_TYPE (v); 5320 tree diff_type = iter_type; 5321 tree plus_type = iter_type; 5322 struct oacc_collapse *counts = NULL; 5323 5324 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) 5325 == GF_OMP_FOR_KIND_OACC_LOOP); 5326 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); 5327 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); 5328 5329 if (POINTER_TYPE_P (iter_type)) 5330 { 5331 plus_code = POINTER_PLUS_EXPR; 5332 plus_type = sizetype; 5333 } 5334 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 5335 diff_type = signed_type_for (diff_type); 5336 5337 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ 5338 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ 5339 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ 5340 basic_block bottom_bb = NULL; 5341 5342 /* entry_bb has two sucessors; the branch edge is to the exit 5343 block, fallthrough edge to body. */ 5344 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 5345 && BRANCH_EDGE (entry_bb)->dest == exit_bb); 5346 5347 /* If cont_bb non-NULL, it has 2 successors. The branch successor is 5348 body_bb, or to a block whose only successor is the body_bb. Its 5349 fallthrough successor is the final block (same as the branch 5350 successor of the entry_bb). */ 5351 if (cont_bb) 5352 { 5353 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; 5354 basic_block bed = BRANCH_EDGE (cont_bb)->dest; 5355 5356 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); 5357 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); 5358 } 5359 else 5360 gcc_assert (!gimple_in_ssa_p (cfun)); 5361 5362 /* The exit block only has entry_bb and cont_bb as predecessors. */ 5363 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); 5364 5365 tree chunk_no; 5366 tree chunk_max = NULL_TREE; 5367 tree bound, offset; 5368 tree step = create_tmp_var (diff_type, ".step"); 5369 bool up = cond_code == LT_EXPR; 5370 tree dir = build_int_cst (diff_type, up ? +1 : -1); 5371 bool chunking = !gimple_in_ssa_p (cfun); 5372 bool negating; 5373 5374 /* Tiling vars. */ 5375 tree tile_size = NULL_TREE; 5376 tree element_s = NULL_TREE; 5377 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE; 5378 basic_block elem_body_bb = NULL; 5379 basic_block elem_cont_bb = NULL; 5380 5381 /* SSA instances. */ 5382 tree offset_incr = NULL_TREE; 5383 tree offset_init = NULL_TREE; 5384 5385 gimple_stmt_iterator gsi; 5386 gassign *ass; 5387 gcall *call; 5388 gimple *stmt; 5389 tree expr; 5390 location_t loc; 5391 edge split, be, fte; 5392 5393 /* Split the end of entry_bb to create head_bb. */ 5394 split = split_block (entry_bb, last_stmt (entry_bb)); 5395 basic_block head_bb = split->dest; 5396 entry_bb = split->src; 5397 5398 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ 5399 gsi = gsi_last_bb (entry_bb); 5400 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); 5401 loc = gimple_location (for_stmt); 5402 5403 if (gimple_in_ssa_p (cfun)) 5404 { 5405 offset_init = gimple_omp_for_index (for_stmt, 0); 5406 gcc_assert (integer_zerop (fd->loop.n1)); 5407 /* The SSA parallelizer does gang parallelism. */ 5408 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); 5409 } 5410 5411 if (fd->collapse > 1 || fd->tiling) 5412 { 5413 gcc_assert (!gimple_in_ssa_p (cfun) && up); 5414 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); 5415 tree total = expand_oacc_collapse_init (fd, &gsi, counts, 5416 TREE_TYPE (fd->loop.n2), loc); 5417 5418 if (SSA_VAR_P (fd->loop.n2)) 5419 { 5420 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, 5421 true, GSI_SAME_STMT); 5422 ass = gimple_build_assign (fd->loop.n2, total); 5423 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5424 } 5425 } 5426 5427 tree b = fd->loop.n1; 5428 tree e = fd->loop.n2; 5429 tree s = fd->loop.step; 5430 5431 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); 5432 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); 5433 5434 /* Convert the step, avoiding possible unsigned->signed overflow. */ 5435 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 5436 if (negating) 5437 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 5438 s = fold_convert (diff_type, s); 5439 if (negating) 5440 s = fold_build1 (NEGATE_EXPR, diff_type, s); 5441 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); 5442 5443 if (!chunking) 5444 chunk_size = integer_zero_node; 5445 expr = fold_convert (diff_type, chunk_size); 5446 chunk_size = force_gimple_operand_gsi (&gsi, expr, true, 5447 NULL_TREE, true, GSI_SAME_STMT); 5448 5449 if (fd->tiling) 5450 { 5451 /* Determine the tile size and element step, 5452 modify the outer loop step size. */ 5453 tile_size = create_tmp_var (diff_type, ".tile_size"); 5454 expr = build_int_cst (diff_type, 1); 5455 for (int ix = 0; ix < fd->collapse; ix++) 5456 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr); 5457 expr = force_gimple_operand_gsi (&gsi, expr, true, 5458 NULL_TREE, true, GSI_SAME_STMT); 5459 ass = gimple_build_assign (tile_size, expr); 5460 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5461 5462 element_s = create_tmp_var (diff_type, ".element_s"); 5463 ass = gimple_build_assign (element_s, s); 5464 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5465 5466 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size); 5467 s = force_gimple_operand_gsi (&gsi, expr, true, 5468 NULL_TREE, true, GSI_SAME_STMT); 5469 } 5470 5471 /* Determine the range, avoiding possible unsigned->signed overflow. */ 5472 negating = !up && TYPE_UNSIGNED (iter_type); 5473 expr = fold_build2 (MINUS_EXPR, plus_type, 5474 fold_convert (plus_type, negating ? b : e), 5475 fold_convert (plus_type, negating ? e : b)); 5476 expr = fold_convert (diff_type, expr); 5477 if (negating) 5478 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 5479 tree range = force_gimple_operand_gsi (&gsi, expr, true, 5480 NULL_TREE, true, GSI_SAME_STMT); 5481 5482 chunk_no = build_int_cst (diff_type, 0); 5483 if (chunking) 5484 { 5485 gcc_assert (!gimple_in_ssa_p (cfun)); 5486 5487 expr = chunk_no; 5488 chunk_max = create_tmp_var (diff_type, ".chunk_max"); 5489 chunk_no = create_tmp_var (diff_type, ".chunk_no"); 5490 5491 ass = gimple_build_assign (chunk_no, expr); 5492 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5493 5494 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 5495 build_int_cst (integer_type_node, 5496 IFN_GOACC_LOOP_CHUNKS), 5497 dir, range, s, chunk_size, gwv); 5498 gimple_call_set_lhs (call, chunk_max); 5499 gimple_set_location (call, loc); 5500 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5501 } 5502 else 5503 chunk_size = chunk_no; 5504 5505 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 5506 build_int_cst (integer_type_node, 5507 IFN_GOACC_LOOP_STEP), 5508 dir, range, s, chunk_size, gwv); 5509 gimple_call_set_lhs (call, step); 5510 gimple_set_location (call, loc); 5511 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5512 5513 /* Remove the GIMPLE_OMP_FOR. */ 5514 gsi_remove (&gsi, true); 5515 5516 /* Fixup edges from head_bb. */ 5517 be = BRANCH_EDGE (head_bb); 5518 fte = FALLTHRU_EDGE (head_bb); 5519 be->flags |= EDGE_FALSE_VALUE; 5520 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 5521 5522 basic_block body_bb = fte->dest; 5523 5524 if (gimple_in_ssa_p (cfun)) 5525 { 5526 gsi = gsi_last_bb (cont_bb); 5527 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5528 5529 offset = gimple_omp_continue_control_use (cont_stmt); 5530 offset_incr = gimple_omp_continue_control_def (cont_stmt); 5531 } 5532 else 5533 { 5534 offset = create_tmp_var (diff_type, ".offset"); 5535 offset_init = offset_incr = offset; 5536 } 5537 bound = create_tmp_var (TREE_TYPE (offset), ".bound"); 5538 5539 /* Loop offset & bound go into head_bb. */ 5540 gsi = gsi_start_bb (head_bb); 5541 5542 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 5543 build_int_cst (integer_type_node, 5544 IFN_GOACC_LOOP_OFFSET), 5545 dir, range, s, 5546 chunk_size, gwv, chunk_no); 5547 gimple_call_set_lhs (call, offset_init); 5548 gimple_set_location (call, loc); 5549 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 5550 5551 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 5552 build_int_cst (integer_type_node, 5553 IFN_GOACC_LOOP_BOUND), 5554 dir, range, s, 5555 chunk_size, gwv, offset_init); 5556 gimple_call_set_lhs (call, bound); 5557 gimple_set_location (call, loc); 5558 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 5559 5560 expr = build2 (cond_code, boolean_type_node, offset_init, bound); 5561 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 5562 GSI_CONTINUE_LINKING); 5563 5564 /* V assignment goes into body_bb. */ 5565 if (!gimple_in_ssa_p (cfun)) 5566 { 5567 gsi = gsi_start_bb (body_bb); 5568 5569 expr = build2 (plus_code, iter_type, b, 5570 fold_convert (plus_type, offset)); 5571 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5572 true, GSI_SAME_STMT); 5573 ass = gimple_build_assign (v, expr); 5574 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5575 5576 if (fd->collapse > 1 || fd->tiling) 5577 expand_oacc_collapse_vars (fd, false, &gsi, counts, v); 5578 5579 if (fd->tiling) 5580 { 5581 /* Determine the range of the element loop -- usually simply 5582 the tile_size, but could be smaller if the final 5583 iteration of the outer loop is a partial tile. */ 5584 tree e_range = create_tmp_var (diff_type, ".e_range"); 5585 5586 expr = build2 (MIN_EXPR, diff_type, 5587 build2 (MINUS_EXPR, diff_type, bound, offset), 5588 build2 (MULT_EXPR, diff_type, tile_size, 5589 element_s)); 5590 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5591 true, GSI_SAME_STMT); 5592 ass = gimple_build_assign (e_range, expr); 5593 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5594 5595 /* Determine bound, offset & step of inner loop. */ 5596 e_bound = create_tmp_var (diff_type, ".e_bound"); 5597 e_offset = create_tmp_var (diff_type, ".e_offset"); 5598 e_step = create_tmp_var (diff_type, ".e_step"); 5599 5600 /* Mark these as element loops. */ 5601 tree t, e_gwv = integer_minus_one_node; 5602 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */ 5603 5604 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET); 5605 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 5606 element_s, chunk, e_gwv, chunk); 5607 gimple_call_set_lhs (call, e_offset); 5608 gimple_set_location (call, loc); 5609 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5610 5611 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND); 5612 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 5613 element_s, chunk, e_gwv, e_offset); 5614 gimple_call_set_lhs (call, e_bound); 5615 gimple_set_location (call, loc); 5616 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5617 5618 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP); 5619 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range, 5620 element_s, chunk, e_gwv); 5621 gimple_call_set_lhs (call, e_step); 5622 gimple_set_location (call, loc); 5623 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5624 5625 /* Add test and split block. */ 5626 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 5627 stmt = gimple_build_cond_empty (expr); 5628 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5629 split = split_block (body_bb, stmt); 5630 elem_body_bb = split->dest; 5631 if (cont_bb == body_bb) 5632 cont_bb = elem_body_bb; 5633 body_bb = split->src; 5634 5635 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 5636 5637 /* Add a dummy exit for the tiled block when cont_bb is missing. */ 5638 if (cont_bb == NULL) 5639 { 5640 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE); 5641 e->probability = PROB_EVEN; 5642 split->probability = PROB_EVEN; 5643 } 5644 5645 /* Initialize the user's loop vars. */ 5646 gsi = gsi_start_bb (elem_body_bb); 5647 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset); 5648 } 5649 } 5650 5651 /* Loop increment goes into cont_bb. If this is not a loop, we 5652 will have spawned threads as if it was, and each one will 5653 execute one iteration. The specification is not explicit about 5654 whether such constructs are ill-formed or not, and they can 5655 occur, especially when noreturn routines are involved. */ 5656 if (cont_bb) 5657 { 5658 gsi = gsi_last_bb (cont_bb); 5659 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5660 loc = gimple_location (cont_stmt); 5661 5662 if (fd->tiling) 5663 { 5664 /* Insert element loop increment and test. */ 5665 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step); 5666 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5667 true, GSI_SAME_STMT); 5668 ass = gimple_build_assign (e_offset, expr); 5669 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5670 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 5671 5672 stmt = gimple_build_cond_empty (expr); 5673 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5674 split = split_block (cont_bb, stmt); 5675 elem_cont_bb = split->src; 5676 cont_bb = split->dest; 5677 5678 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5679 make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE); 5680 5681 make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE); 5682 5683 gsi = gsi_for_stmt (cont_stmt); 5684 } 5685 5686 /* Increment offset. */ 5687 if (gimple_in_ssa_p (cfun)) 5688 expr = build2 (plus_code, iter_type, offset, 5689 fold_convert (plus_type, step)); 5690 else 5691 expr = build2 (PLUS_EXPR, diff_type, offset, step); 5692 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5693 true, GSI_SAME_STMT); 5694 ass = gimple_build_assign (offset_incr, expr); 5695 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5696 expr = build2 (cond_code, boolean_type_node, offset_incr, bound); 5697 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); 5698 5699 /* Remove the GIMPLE_OMP_CONTINUE. */ 5700 gsi_remove (&gsi, true); 5701 5702 /* Fixup edges from cont_bb. */ 5703 be = BRANCH_EDGE (cont_bb); 5704 fte = FALLTHRU_EDGE (cont_bb); 5705 be->flags |= EDGE_TRUE_VALUE; 5706 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5707 5708 if (chunking) 5709 { 5710 /* Split the beginning of exit_bb to make bottom_bb. We 5711 need to insert a nop at the start, because splitting is 5712 after a stmt, not before. */ 5713 gsi = gsi_start_bb (exit_bb); 5714 stmt = gimple_build_nop (); 5715 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5716 split = split_block (exit_bb, stmt); 5717 bottom_bb = split->src; 5718 exit_bb = split->dest; 5719 gsi = gsi_last_bb (bottom_bb); 5720 5721 /* Chunk increment and test goes into bottom_bb. */ 5722 expr = build2 (PLUS_EXPR, diff_type, chunk_no, 5723 build_int_cst (diff_type, 1)); 5724 ass = gimple_build_assign (chunk_no, expr); 5725 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); 5726 5727 /* Chunk test at end of bottom_bb. */ 5728 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); 5729 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 5730 GSI_CONTINUE_LINKING); 5731 5732 /* Fixup edges from bottom_bb. */ 5733 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5734 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); 5735 } 5736 } 5737 5738 gsi = gsi_last_bb (exit_bb); 5739 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 5740 loc = gimple_location (gsi_stmt (gsi)); 5741 5742 if (!gimple_in_ssa_p (cfun)) 5743 { 5744 /* Insert the final value of V, in case it is live. This is the 5745 value for the only thread that survives past the join. */ 5746 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 5747 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 5748 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 5749 expr = fold_build2 (MULT_EXPR, diff_type, expr, s); 5750 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); 5751 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5752 true, GSI_SAME_STMT); 5753 ass = gimple_build_assign (v, expr); 5754 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5755 } 5756 5757 /* Remove the OMP_RETURN. */ 5758 gsi_remove (&gsi, true); 5759 5760 if (cont_bb) 5761 { 5762 /* We now have one, two or three nested loops. Update the loop 5763 structures. */ 5764 struct loop *parent = entry_bb->loop_father; 5765 struct loop *body = body_bb->loop_father; 5766 5767 if (chunking) 5768 { 5769 struct loop *chunk_loop = alloc_loop (); 5770 chunk_loop->header = head_bb; 5771 chunk_loop->latch = bottom_bb; 5772 add_loop (chunk_loop, parent); 5773 parent = chunk_loop; 5774 } 5775 else if (parent != body) 5776 { 5777 gcc_assert (body->header == body_bb); 5778 gcc_assert (body->latch == cont_bb 5779 || single_pred (body->latch) == cont_bb); 5780 parent = NULL; 5781 } 5782 5783 if (parent) 5784 { 5785 struct loop *body_loop = alloc_loop (); 5786 body_loop->header = body_bb; 5787 body_loop->latch = cont_bb; 5788 add_loop (body_loop, parent); 5789 5790 if (fd->tiling) 5791 { 5792 /* Insert tiling's element loop. */ 5793 struct loop *inner_loop = alloc_loop (); 5794 inner_loop->header = elem_body_bb; 5795 inner_loop->latch = elem_cont_bb; 5796 add_loop (inner_loop, body_loop); 5797 } 5798 } 5799 } 5800} 5801 5802/* Expand the OMP loop defined by REGION. */ 5803 5804static void 5805expand_omp_for (struct omp_region *region, gimple *inner_stmt) 5806{ 5807 struct omp_for_data fd; 5808 struct omp_for_data_loop *loops; 5809 5810 loops 5811 = (struct omp_for_data_loop *) 5812 alloca (gimple_omp_for_collapse (last_stmt (region->entry)) 5813 * sizeof (struct omp_for_data_loop)); 5814 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), 5815 &fd, loops); 5816 region->sched_kind = fd.sched_kind; 5817 region->sched_modifiers = fd.sched_modifiers; 5818 5819 gcc_assert (EDGE_COUNT (region->entry->succs) == 2); 5820 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 5821 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 5822 if (region->cont) 5823 { 5824 gcc_assert (EDGE_COUNT (region->cont->succs) == 2); 5825 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 5826 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 5827 } 5828 else 5829 /* If there isn't a continue then this is a degerate case where 5830 the introduction of abnormal edges during lowering will prevent 5831 original loops from being detected. Fix that up. */ 5832 loops_state_set (LOOPS_NEED_FIXUP); 5833 5834 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) 5835 expand_omp_simd (region, &fd); 5836 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR) 5837 expand_cilk_for (region, &fd); 5838 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) 5839 { 5840 gcc_assert (!inner_stmt); 5841 expand_oacc_for (region, &fd); 5842 } 5843 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) 5844 { 5845 if (gimple_omp_for_combined_into_p (fd.for_stmt)) 5846 expand_omp_taskloop_for_inner (region, &fd, inner_stmt); 5847 else 5848 expand_omp_taskloop_for_outer (region, &fd, inner_stmt); 5849 } 5850 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC 5851 && !fd.have_ordered) 5852 { 5853 if (fd.chunk_size == NULL) 5854 expand_omp_for_static_nochunk (region, &fd, inner_stmt); 5855 else 5856 expand_omp_for_static_chunk (region, &fd, inner_stmt); 5857 } 5858 else 5859 { 5860 int fn_index, start_ix, next_ix; 5861 5862 gcc_assert (gimple_omp_for_kind (fd.for_stmt) 5863 == GF_OMP_FOR_KIND_FOR); 5864 if (fd.chunk_size == NULL 5865 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) 5866 fd.chunk_size = integer_zero_node; 5867 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); 5868 switch (fd.sched_kind) 5869 { 5870 case OMP_CLAUSE_SCHEDULE_RUNTIME: 5871 fn_index = 3; 5872 break; 5873 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 5874 case OMP_CLAUSE_SCHEDULE_GUIDED: 5875 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) 5876 && !fd.ordered 5877 && !fd.have_ordered) 5878 { 5879 fn_index = 3 + fd.sched_kind; 5880 break; 5881 } 5882 /* FALLTHRU */ 5883 default: 5884 fn_index = fd.sched_kind; 5885 break; 5886 } 5887 if (!fd.ordered) 5888 fn_index += fd.have_ordered * 6; 5889 if (fd.ordered) 5890 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; 5891 else 5892 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; 5893 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; 5894 if (fd.iter_type == long_long_unsigned_type_node) 5895 { 5896 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START 5897 - (int)BUILT_IN_GOMP_LOOP_STATIC_START); 5898 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT 5899 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); 5900 } 5901 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, 5902 (enum built_in_function) next_ix, inner_stmt); 5903 } 5904 5905 if (gimple_in_ssa_p (cfun)) 5906 update_ssa (TODO_update_ssa_only_virtuals); 5907} 5908 5909/* Expand code for an OpenMP sections directive. In pseudo code, we generate 5910 5911 v = GOMP_sections_start (n); 5912 L0: 5913 switch (v) 5914 { 5915 case 0: 5916 goto L2; 5917 case 1: 5918 section 1; 5919 goto L1; 5920 case 2: 5921 ... 5922 case n: 5923 ... 5924 default: 5925 abort (); 5926 } 5927 L1: 5928 v = GOMP_sections_next (); 5929 goto L0; 5930 L2: 5931 reduction; 5932 5933 If this is a combined parallel sections, replace the call to 5934 GOMP_sections_start with call to GOMP_sections_next. */ 5935 5936static void 5937expand_omp_sections (struct omp_region *region) 5938{ 5939 tree t, u, vin = NULL, vmain, vnext, l2; 5940 unsigned len; 5941 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; 5942 gimple_stmt_iterator si, switch_si; 5943 gomp_sections *sections_stmt; 5944 gimple *stmt; 5945 gomp_continue *cont; 5946 edge_iterator ei; 5947 edge e; 5948 struct omp_region *inner; 5949 unsigned i, casei; 5950 bool exit_reachable = region->cont != NULL; 5951 5952 gcc_assert (region->exit != NULL); 5953 entry_bb = region->entry; 5954 l0_bb = single_succ (entry_bb); 5955 l1_bb = region->cont; 5956 l2_bb = region->exit; 5957 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) 5958 l2 = gimple_block_label (l2_bb); 5959 else 5960 { 5961 /* This can happen if there are reductions. */ 5962 len = EDGE_COUNT (l0_bb->succs); 5963 gcc_assert (len > 0); 5964 e = EDGE_SUCC (l0_bb, len - 1); 5965 si = gsi_last_bb (e->dest); 5966 l2 = NULL_TREE; 5967 if (gsi_end_p (si) 5968 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 5969 l2 = gimple_block_label (e->dest); 5970 else 5971 FOR_EACH_EDGE (e, ei, l0_bb->succs) 5972 { 5973 si = gsi_last_bb (e->dest); 5974 if (gsi_end_p (si) 5975 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 5976 { 5977 l2 = gimple_block_label (e->dest); 5978 break; 5979 } 5980 } 5981 } 5982 if (exit_reachable) 5983 default_bb = create_empty_bb (l1_bb->prev_bb); 5984 else 5985 default_bb = create_empty_bb (l0_bb); 5986 5987 /* We will build a switch() with enough cases for all the 5988 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work 5989 and a default case to abort if something goes wrong. */ 5990 len = EDGE_COUNT (l0_bb->succs); 5991 5992 /* Use vec::quick_push on label_vec throughout, since we know the size 5993 in advance. */ 5994 auto_vec<tree> label_vec (len); 5995 5996 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the 5997 GIMPLE_OMP_SECTIONS statement. */ 5998 si = gsi_last_bb (entry_bb); 5999 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); 6000 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); 6001 vin = gimple_omp_sections_control (sections_stmt); 6002 if (!is_combined_parallel (region)) 6003 { 6004 /* If we are not inside a combined parallel+sections region, 6005 call GOMP_sections_start. */ 6006 t = build_int_cst (unsigned_type_node, len - 1); 6007 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); 6008 stmt = gimple_build_call (u, 1, t); 6009 } 6010 else 6011 { 6012 /* Otherwise, call GOMP_sections_next. */ 6013 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 6014 stmt = gimple_build_call (u, 0); 6015 } 6016 gimple_call_set_lhs (stmt, vin); 6017 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6018 gsi_remove (&si, true); 6019 6020 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in 6021 L0_BB. */ 6022 switch_si = gsi_last_bb (l0_bb); 6023 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); 6024 if (exit_reachable) 6025 { 6026 cont = as_a <gomp_continue *> (last_stmt (l1_bb)); 6027 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); 6028 vmain = gimple_omp_continue_control_use (cont); 6029 vnext = gimple_omp_continue_control_def (cont); 6030 } 6031 else 6032 { 6033 vmain = vin; 6034 vnext = NULL_TREE; 6035 } 6036 6037 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); 6038 label_vec.quick_push (t); 6039 i = 1; 6040 6041 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ 6042 for (inner = region->inner, casei = 1; 6043 inner; 6044 inner = inner->next, i++, casei++) 6045 { 6046 basic_block s_entry_bb, s_exit_bb; 6047 6048 /* Skip optional reduction region. */ 6049 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) 6050 { 6051 --i; 6052 --casei; 6053 continue; 6054 } 6055 6056 s_entry_bb = inner->entry; 6057 s_exit_bb = inner->exit; 6058 6059 t = gimple_block_label (s_entry_bb); 6060 u = build_int_cst (unsigned_type_node, casei); 6061 u = build_case_label (u, NULL, t); 6062 label_vec.quick_push (u); 6063 6064 si = gsi_last_bb (s_entry_bb); 6065 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); 6066 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); 6067 gsi_remove (&si, true); 6068 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; 6069 6070 if (s_exit_bb == NULL) 6071 continue; 6072 6073 si = gsi_last_bb (s_exit_bb); 6074 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 6075 gsi_remove (&si, true); 6076 6077 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; 6078 } 6079 6080 /* Error handling code goes in DEFAULT_BB. */ 6081 t = gimple_block_label (default_bb); 6082 u = build_case_label (NULL, NULL, t); 6083 make_edge (l0_bb, default_bb, 0); 6084 add_bb_to_loop (default_bb, current_loops->tree_root); 6085 6086 stmt = gimple_build_switch (vmain, u, label_vec); 6087 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); 6088 gsi_remove (&switch_si, true); 6089 6090 si = gsi_start_bb (default_bb); 6091 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); 6092 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); 6093 6094 if (exit_reachable) 6095 { 6096 tree bfn_decl; 6097 6098 /* Code to get the next section goes in L1_BB. */ 6099 si = gsi_last_bb (l1_bb); 6100 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); 6101 6102 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 6103 stmt = gimple_build_call (bfn_decl, 0); 6104 gimple_call_set_lhs (stmt, vnext); 6105 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6106 gsi_remove (&si, true); 6107 6108 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; 6109 } 6110 6111 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ 6112 si = gsi_last_bb (l2_bb); 6113 if (gimple_omp_return_nowait_p (gsi_stmt (si))) 6114 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); 6115 else if (gimple_omp_return_lhs (gsi_stmt (si))) 6116 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); 6117 else 6118 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); 6119 stmt = gimple_build_call (t, 0); 6120 if (gimple_omp_return_lhs (gsi_stmt (si))) 6121 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); 6122 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6123 gsi_remove (&si, true); 6124 6125 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); 6126} 6127 6128/* Expand code for an OpenMP single directive. We've already expanded 6129 much of the code, here we simply place the GOMP_barrier call. */ 6130 6131static void 6132expand_omp_single (struct omp_region *region) 6133{ 6134 basic_block entry_bb, exit_bb; 6135 gimple_stmt_iterator si; 6136 6137 entry_bb = region->entry; 6138 exit_bb = region->exit; 6139 6140 si = gsi_last_bb (entry_bb); 6141 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); 6142 gsi_remove (&si, true); 6143 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 6144 6145 si = gsi_last_bb (exit_bb); 6146 if (!gimple_omp_return_nowait_p (gsi_stmt (si))) 6147 { 6148 tree t = gimple_omp_return_lhs (gsi_stmt (si)); 6149 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT); 6150 } 6151 gsi_remove (&si, true); 6152 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 6153} 6154 6155/* Generic expansion for OpenMP synchronization directives: master, 6156 ordered and critical. All we need to do here is remove the entry 6157 and exit markers for REGION. */ 6158 6159static void 6160expand_omp_synch (struct omp_region *region) 6161{ 6162 basic_block entry_bb, exit_bb; 6163 gimple_stmt_iterator si; 6164 6165 entry_bb = region->entry; 6166 exit_bb = region->exit; 6167 6168 si = gsi_last_bb (entry_bb); 6169 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE 6170 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER 6171 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP 6172 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED 6173 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL 6174 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); 6175 gsi_remove (&si, true); 6176 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 6177 6178 if (exit_bb) 6179 { 6180 si = gsi_last_bb (exit_bb); 6181 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 6182 gsi_remove (&si, true); 6183 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 6184 } 6185} 6186 6187/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6188 operation as a normal volatile load. */ 6189 6190static bool 6191expand_omp_atomic_load (basic_block load_bb, tree addr, 6192 tree loaded_val, int index) 6193{ 6194 enum built_in_function tmpbase; 6195 gimple_stmt_iterator gsi; 6196 basic_block store_bb; 6197 location_t loc; 6198 gimple *stmt; 6199 tree decl, call, type, itype; 6200 6201 gsi = gsi_last_bb (load_bb); 6202 stmt = gsi_stmt (gsi); 6203 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 6204 loc = gimple_location (stmt); 6205 6206 /* ??? If the target does not implement atomic_load_optab[mode], and mode 6207 is smaller than word size, then expand_atomic_load assumes that the load 6208 is atomic. We could avoid the builtin entirely in this case. */ 6209 6210 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 6211 decl = builtin_decl_explicit (tmpbase); 6212 if (decl == NULL_TREE) 6213 return false; 6214 6215 type = TREE_TYPE (loaded_val); 6216 itype = TREE_TYPE (TREE_TYPE (decl)); 6217 6218 call = build_call_expr_loc (loc, decl, 2, addr, 6219 build_int_cst (NULL, 6220 gimple_omp_atomic_seq_cst_p (stmt) 6221 ? MEMMODEL_SEQ_CST 6222 : MEMMODEL_RELAXED)); 6223 if (!useless_type_conversion_p (type, itype)) 6224 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 6225 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 6226 6227 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6228 gsi_remove (&gsi, true); 6229 6230 store_bb = single_succ (load_bb); 6231 gsi = gsi_last_bb (store_bb); 6232 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 6233 gsi_remove (&gsi, true); 6234 6235 if (gimple_in_ssa_p (cfun)) 6236 update_ssa (TODO_update_ssa_no_phi); 6237 6238 return true; 6239} 6240 6241/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6242 operation as a normal volatile store. */ 6243 6244static bool 6245expand_omp_atomic_store (basic_block load_bb, tree addr, 6246 tree loaded_val, tree stored_val, int index) 6247{ 6248 enum built_in_function tmpbase; 6249 gimple_stmt_iterator gsi; 6250 basic_block store_bb = single_succ (load_bb); 6251 location_t loc; 6252 gimple *stmt; 6253 tree decl, call, type, itype; 6254 machine_mode imode; 6255 bool exchange; 6256 6257 gsi = gsi_last_bb (load_bb); 6258 stmt = gsi_stmt (gsi); 6259 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 6260 6261 /* If the load value is needed, then this isn't a store but an exchange. */ 6262 exchange = gimple_omp_atomic_need_value_p (stmt); 6263 6264 gsi = gsi_last_bb (store_bb); 6265 stmt = gsi_stmt (gsi); 6266 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); 6267 loc = gimple_location (stmt); 6268 6269 /* ??? If the target does not implement atomic_store_optab[mode], and mode 6270 is smaller than word size, then expand_atomic_store assumes that the store 6271 is atomic. We could avoid the builtin entirely in this case. */ 6272 6273 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); 6274 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); 6275 decl = builtin_decl_explicit (tmpbase); 6276 if (decl == NULL_TREE) 6277 return false; 6278 6279 type = TREE_TYPE (stored_val); 6280 6281 /* Dig out the type of the function's second argument. */ 6282 itype = TREE_TYPE (decl); 6283 itype = TYPE_ARG_TYPES (itype); 6284 itype = TREE_CHAIN (itype); 6285 itype = TREE_VALUE (itype); 6286 imode = TYPE_MODE (itype); 6287 6288 if (exchange && !can_atomic_exchange_p (imode, true)) 6289 return false; 6290 6291 if (!useless_type_conversion_p (itype, type)) 6292 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); 6293 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, 6294 build_int_cst (NULL, 6295 gimple_omp_atomic_seq_cst_p (stmt) 6296 ? MEMMODEL_SEQ_CST 6297 : MEMMODEL_RELAXED)); 6298 if (exchange) 6299 { 6300 if (!useless_type_conversion_p (type, itype)) 6301 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 6302 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 6303 } 6304 6305 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6306 gsi_remove (&gsi, true); 6307 6308 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ 6309 gsi = gsi_last_bb (load_bb); 6310 gsi_remove (&gsi, true); 6311 6312 if (gimple_in_ssa_p (cfun)) 6313 update_ssa (TODO_update_ssa_no_phi); 6314 6315 return true; 6316} 6317 6318/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6319 operation as a __atomic_fetch_op builtin. INDEX is log2 of the 6320 size of the data type, and thus usable to find the index of the builtin 6321 decl. Returns false if the expression is not of the proper form. */ 6322 6323static bool 6324expand_omp_atomic_fetch_op (basic_block load_bb, 6325 tree addr, tree loaded_val, 6326 tree stored_val, int index) 6327{ 6328 enum built_in_function oldbase, newbase, tmpbase; 6329 tree decl, itype, call; 6330 tree lhs, rhs; 6331 basic_block store_bb = single_succ (load_bb); 6332 gimple_stmt_iterator gsi; 6333 gimple *stmt; 6334 location_t loc; 6335 enum tree_code code; 6336 bool need_old, need_new; 6337 machine_mode imode; 6338 bool seq_cst; 6339 6340 /* We expect to find the following sequences: 6341 6342 load_bb: 6343 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) 6344 6345 store_bb: 6346 val = tmp OP something; (or: something OP tmp) 6347 GIMPLE_OMP_STORE (val) 6348 6349 ???FIXME: Allow a more flexible sequence. 6350 Perhaps use data flow to pick the statements. 6351 6352 */ 6353 6354 gsi = gsi_after_labels (store_bb); 6355 stmt = gsi_stmt (gsi); 6356 loc = gimple_location (stmt); 6357 if (!is_gimple_assign (stmt)) 6358 return false; 6359 gsi_next (&gsi); 6360 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) 6361 return false; 6362 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); 6363 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); 6364 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb)); 6365 gcc_checking_assert (!need_old || !need_new); 6366 6367 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) 6368 return false; 6369 6370 /* Check for one of the supported fetch-op operations. */ 6371 code = gimple_assign_rhs_code (stmt); 6372 switch (code) 6373 { 6374 case PLUS_EXPR: 6375 case POINTER_PLUS_EXPR: 6376 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; 6377 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; 6378 break; 6379 case MINUS_EXPR: 6380 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; 6381 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; 6382 break; 6383 case BIT_AND_EXPR: 6384 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; 6385 newbase = BUILT_IN_ATOMIC_AND_FETCH_N; 6386 break; 6387 case BIT_IOR_EXPR: 6388 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; 6389 newbase = BUILT_IN_ATOMIC_OR_FETCH_N; 6390 break; 6391 case BIT_XOR_EXPR: 6392 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; 6393 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; 6394 break; 6395 default: 6396 return false; 6397 } 6398 6399 /* Make sure the expression is of the proper form. */ 6400 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) 6401 rhs = gimple_assign_rhs2 (stmt); 6402 else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) 6403 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) 6404 rhs = gimple_assign_rhs1 (stmt); 6405 else 6406 return false; 6407 6408 tmpbase = ((enum built_in_function) 6409 ((need_new ? newbase : oldbase) + index + 1)); 6410 decl = builtin_decl_explicit (tmpbase); 6411 if (decl == NULL_TREE) 6412 return false; 6413 itype = TREE_TYPE (TREE_TYPE (decl)); 6414 imode = TYPE_MODE (itype); 6415 6416 /* We could test all of the various optabs involved, but the fact of the 6417 matter is that (with the exception of i486 vs i586 and xadd) all targets 6418 that support any atomic operaton optab also implements compare-and-swap. 6419 Let optabs.c take care of expanding any compare-and-swap loop. */ 6420 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode)) 6421 return false; 6422 6423 gsi = gsi_last_bb (load_bb); 6424 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); 6425 6426 /* OpenMP does not imply any barrier-like semantics on its atomic ops. 6427 It only requires that the operation happen atomically. Thus we can 6428 use the RELAXED memory model. */ 6429 call = build_call_expr_loc (loc, decl, 3, addr, 6430 fold_convert_loc (loc, itype, rhs), 6431 build_int_cst (NULL, 6432 seq_cst ? MEMMODEL_SEQ_CST 6433 : MEMMODEL_RELAXED)); 6434 6435 if (need_old || need_new) 6436 { 6437 lhs = need_old ? loaded_val : stored_val; 6438 call = fold_convert_loc (loc, TREE_TYPE (lhs), call); 6439 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); 6440 } 6441 else 6442 call = fold_convert_loc (loc, void_type_node, call); 6443 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6444 gsi_remove (&gsi, true); 6445 6446 gsi = gsi_last_bb (store_bb); 6447 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 6448 gsi_remove (&gsi, true); 6449 gsi = gsi_last_bb (store_bb); 6450 stmt = gsi_stmt (gsi); 6451 gsi_remove (&gsi, true); 6452 6453 if (gimple_in_ssa_p (cfun)) 6454 { 6455 release_defs (stmt); 6456 update_ssa (TODO_update_ssa_no_phi); 6457 } 6458 6459 return true; 6460} 6461 6462/* A subroutine of expand_omp_atomic. Implement the atomic operation as: 6463 6464 oldval = *addr; 6465 repeat: 6466 newval = rhs; // with oldval replacing *addr in rhs 6467 oldval = __sync_val_compare_and_swap (addr, oldval, newval); 6468 if (oldval != newval) 6469 goto repeat; 6470 6471 INDEX is log2 of the size of the data type, and thus usable to find the 6472 index of the builtin decl. */ 6473 6474static bool 6475expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, 6476 tree addr, tree loaded_val, tree stored_val, 6477 int index) 6478{ 6479 tree loadedi, storedi, initial, new_storedi, old_vali; 6480 tree type, itype, cmpxchg, iaddr; 6481 gimple_stmt_iterator si; 6482 basic_block loop_header = single_succ (load_bb); 6483 gimple *phi, *stmt; 6484 edge e; 6485 enum built_in_function fncode; 6486 6487 /* ??? We need a non-pointer interface to __atomic_compare_exchange in 6488 order to use the RELAXED memory model effectively. */ 6489 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N 6490 + index + 1); 6491 cmpxchg = builtin_decl_explicit (fncode); 6492 if (cmpxchg == NULL_TREE) 6493 return false; 6494 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); 6495 itype = TREE_TYPE (TREE_TYPE (cmpxchg)); 6496 6497 if (!can_compare_and_swap_p (TYPE_MODE (itype), true) 6498 || !can_atomic_load_p (TYPE_MODE (itype))) 6499 return false; 6500 6501 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ 6502 si = gsi_last_bb (load_bb); 6503 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 6504 6505 /* For floating-point values, we'll need to view-convert them to integers 6506 so that we can perform the atomic compare and swap. Simplify the 6507 following code by always setting up the "i"ntegral variables. */ 6508 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) 6509 { 6510 tree iaddr_val; 6511 6512 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, 6513 true)); 6514 iaddr_val 6515 = force_gimple_operand_gsi (&si, 6516 fold_convert (TREE_TYPE (iaddr), addr), 6517 false, NULL_TREE, true, GSI_SAME_STMT); 6518 stmt = gimple_build_assign (iaddr, iaddr_val); 6519 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6520 loadedi = create_tmp_var (itype); 6521 if (gimple_in_ssa_p (cfun)) 6522 loadedi = make_ssa_name (loadedi); 6523 } 6524 else 6525 { 6526 iaddr = addr; 6527 loadedi = loaded_val; 6528 } 6529 6530 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 6531 tree loaddecl = builtin_decl_explicit (fncode); 6532 if (loaddecl) 6533 initial 6534 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)), 6535 build_call_expr (loaddecl, 2, iaddr, 6536 build_int_cst (NULL_TREE, 6537 MEMMODEL_RELAXED))); 6538 else 6539 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr, 6540 build_int_cst (TREE_TYPE (iaddr), 0)); 6541 6542 initial 6543 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, 6544 GSI_SAME_STMT); 6545 6546 /* Move the value to the LOADEDI temporary. */ 6547 if (gimple_in_ssa_p (cfun)) 6548 { 6549 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); 6550 phi = create_phi_node (loadedi, loop_header); 6551 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), 6552 initial); 6553 } 6554 else 6555 gsi_insert_before (&si, 6556 gimple_build_assign (loadedi, initial), 6557 GSI_SAME_STMT); 6558 if (loadedi != loaded_val) 6559 { 6560 gimple_stmt_iterator gsi2; 6561 tree x; 6562 6563 x = build1 (VIEW_CONVERT_EXPR, type, loadedi); 6564 gsi2 = gsi_start_bb (loop_header); 6565 if (gimple_in_ssa_p (cfun)) 6566 { 6567 gassign *stmt; 6568 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 6569 true, GSI_SAME_STMT); 6570 stmt = gimple_build_assign (loaded_val, x); 6571 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); 6572 } 6573 else 6574 { 6575 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); 6576 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 6577 true, GSI_SAME_STMT); 6578 } 6579 } 6580 gsi_remove (&si, true); 6581 6582 si = gsi_last_bb (store_bb); 6583 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 6584 6585 if (iaddr == addr) 6586 storedi = stored_val; 6587 else 6588 storedi 6589 = force_gimple_operand_gsi (&si, 6590 build1 (VIEW_CONVERT_EXPR, itype, 6591 stored_val), true, NULL_TREE, true, 6592 GSI_SAME_STMT); 6593 6594 /* Build the compare&swap statement. */ 6595 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); 6596 new_storedi = force_gimple_operand_gsi (&si, 6597 fold_convert (TREE_TYPE (loadedi), 6598 new_storedi), 6599 true, NULL_TREE, 6600 true, GSI_SAME_STMT); 6601 6602 if (gimple_in_ssa_p (cfun)) 6603 old_vali = loadedi; 6604 else 6605 { 6606 old_vali = create_tmp_var (TREE_TYPE (loadedi)); 6607 stmt = gimple_build_assign (old_vali, loadedi); 6608 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6609 6610 stmt = gimple_build_assign (loadedi, new_storedi); 6611 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6612 } 6613 6614 /* Note that we always perform the comparison as an integer, even for 6615 floating point. This allows the atomic operation to properly 6616 succeed even with NaNs and -0.0. */ 6617 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali); 6618 stmt = gimple_build_cond_empty (ne); 6619 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6620 6621 /* Update cfg. */ 6622 e = single_succ_edge (store_bb); 6623 e->flags &= ~EDGE_FALLTHRU; 6624 e->flags |= EDGE_FALSE_VALUE; 6625 6626 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); 6627 6628 /* Copy the new value to loadedi (we already did that before the condition 6629 if we are not in SSA). */ 6630 if (gimple_in_ssa_p (cfun)) 6631 { 6632 phi = gimple_seq_first_stmt (phi_nodes (loop_header)); 6633 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); 6634 } 6635 6636 /* Remove GIMPLE_OMP_ATOMIC_STORE. */ 6637 gsi_remove (&si, true); 6638 6639 struct loop *loop = alloc_loop (); 6640 loop->header = loop_header; 6641 loop->latch = store_bb; 6642 add_loop (loop, loop_header->loop_father); 6643 6644 if (gimple_in_ssa_p (cfun)) 6645 update_ssa (TODO_update_ssa_no_phi); 6646 6647 return true; 6648} 6649 6650/* A subroutine of expand_omp_atomic. Implement the atomic operation as: 6651 6652 GOMP_atomic_start (); 6653 *addr = rhs; 6654 GOMP_atomic_end (); 6655 6656 The result is not globally atomic, but works so long as all parallel 6657 references are within #pragma omp atomic directives. According to 6658 responses received from omp@openmp.org, appears to be within spec. 6659 Which makes sense, since that's how several other compilers handle 6660 this situation as well. 6661 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're 6662 expanding. STORED_VAL is the operand of the matching 6663 GIMPLE_OMP_ATOMIC_STORE. 6664 6665 We replace 6666 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with 6667 loaded_val = *addr; 6668 6669 and replace 6670 GIMPLE_OMP_ATOMIC_STORE (stored_val) with 6671 *addr = stored_val; 6672*/ 6673 6674static bool 6675expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, 6676 tree addr, tree loaded_val, tree stored_val) 6677{ 6678 gimple_stmt_iterator si; 6679 gassign *stmt; 6680 tree t; 6681 6682 si = gsi_last_bb (load_bb); 6683 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 6684 6685 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); 6686 t = build_call_expr (t, 0); 6687 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 6688 6689 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr)); 6690 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6691 gsi_remove (&si, true); 6692 6693 si = gsi_last_bb (store_bb); 6694 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 6695 6696 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)), 6697 stored_val); 6698 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6699 6700 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); 6701 t = build_call_expr (t, 0); 6702 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 6703 gsi_remove (&si, true); 6704 6705 if (gimple_in_ssa_p (cfun)) 6706 update_ssa (TODO_update_ssa_no_phi); 6707 return true; 6708} 6709 6710/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand 6711 using expand_omp_atomic_fetch_op. If it failed, we try to 6712 call expand_omp_atomic_pipeline, and if it fails too, the 6713 ultimate fallback is wrapping the operation in a mutex 6714 (expand_omp_atomic_mutex). REGION is the atomic region built 6715 by build_omp_regions_1(). */ 6716 6717static void 6718expand_omp_atomic (struct omp_region *region) 6719{ 6720 basic_block load_bb = region->entry, store_bb = region->exit; 6721 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); 6722 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); 6723 tree loaded_val = gimple_omp_atomic_load_lhs (load); 6724 tree addr = gimple_omp_atomic_load_rhs (load); 6725 tree stored_val = gimple_omp_atomic_store_val (store); 6726 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); 6727 HOST_WIDE_INT index; 6728 6729 /* Make sure the type is one of the supported sizes. */ 6730 index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); 6731 index = exact_log2 (index); 6732 if (index >= 0 && index <= 4) 6733 { 6734 unsigned int align = TYPE_ALIGN_UNIT (type); 6735 6736 /* __sync builtins require strict data alignment. */ 6737 if (exact_log2 (align) >= index) 6738 { 6739 /* Atomic load. */ 6740 if (loaded_val == stored_val 6741 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT 6742 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) 6743 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD 6744 && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) 6745 return; 6746 6747 /* Atomic store. */ 6748 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT 6749 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) 6750 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD 6751 && store_bb == single_succ (load_bb) 6752 && first_stmt (store_bb) == store 6753 && expand_omp_atomic_store (load_bb, addr, loaded_val, 6754 stored_val, index)) 6755 return; 6756 6757 /* When possible, use specialized atomic update functions. */ 6758 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) 6759 && store_bb == single_succ (load_bb) 6760 && expand_omp_atomic_fetch_op (load_bb, addr, 6761 loaded_val, stored_val, index)) 6762 return; 6763 6764 /* If we don't have specialized __sync builtins, try and implement 6765 as a compare and swap loop. */ 6766 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, 6767 loaded_val, stored_val, index)) 6768 return; 6769 } 6770 } 6771 6772 /* The ultimate fallback is wrapping the operation in a mutex. */ 6773 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); 6774} 6775 6776/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending 6777 at REGION_EXIT. */ 6778 6779static void 6780mark_loops_in_oacc_kernels_region (basic_block region_entry, 6781 basic_block region_exit) 6782{ 6783 struct loop *outer = region_entry->loop_father; 6784 gcc_assert (region_exit == NULL || outer == region_exit->loop_father); 6785 6786 /* Don't parallelize the kernels region if it contains more than one outer 6787 loop. */ 6788 unsigned int nr_outer_loops = 0; 6789 struct loop *single_outer = NULL; 6790 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next) 6791 { 6792 gcc_assert (loop_outer (loop) == outer); 6793 6794 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) 6795 continue; 6796 6797 if (region_exit != NULL 6798 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) 6799 continue; 6800 6801 nr_outer_loops++; 6802 single_outer = loop; 6803 } 6804 if (nr_outer_loops != 1) 6805 return; 6806 6807 for (struct loop *loop = single_outer->inner; 6808 loop != NULL; 6809 loop = loop->inner) 6810 if (loop->next) 6811 return; 6812 6813 /* Mark the loops in the region. */ 6814 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner) 6815 loop->in_oacc_kernels_region = true; 6816} 6817 6818/* Types used to pass grid and wortkgroup sizes to kernel invocation. */ 6819 6820struct GTY(()) grid_launch_attributes_trees 6821{ 6822 tree kernel_dim_array_type; 6823 tree kernel_lattrs_dimnum_decl; 6824 tree kernel_lattrs_grid_decl; 6825 tree kernel_lattrs_group_decl; 6826 tree kernel_launch_attributes_type; 6827}; 6828 6829static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; 6830 6831/* Create types used to pass kernel launch attributes to target. */ 6832 6833static void 6834grid_create_kernel_launch_attr_types (void) 6835{ 6836 if (grid_attr_trees) 6837 return; 6838 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); 6839 6840 tree dim_arr_index_type 6841 = build_index_type (build_int_cst (integer_type_node, 2)); 6842 grid_attr_trees->kernel_dim_array_type 6843 = build_array_type (uint32_type_node, dim_arr_index_type); 6844 6845 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); 6846 grid_attr_trees->kernel_lattrs_dimnum_decl 6847 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), 6848 uint32_type_node); 6849 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; 6850 6851 grid_attr_trees->kernel_lattrs_grid_decl 6852 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), 6853 grid_attr_trees->kernel_dim_array_type); 6854 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) 6855 = grid_attr_trees->kernel_lattrs_dimnum_decl; 6856 grid_attr_trees->kernel_lattrs_group_decl 6857 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), 6858 grid_attr_trees->kernel_dim_array_type); 6859 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) 6860 = grid_attr_trees->kernel_lattrs_grid_decl; 6861 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, 6862 "__gomp_kernel_launch_attributes", 6863 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); 6864} 6865 6866/* Insert before the current statement in GSI a store of VALUE to INDEX of 6867 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be 6868 of type uint32_type_node. */ 6869 6870static void 6871grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, 6872 tree fld_decl, int index, tree value) 6873{ 6874 tree ref = build4 (ARRAY_REF, uint32_type_node, 6875 build3 (COMPONENT_REF, 6876 grid_attr_trees->kernel_dim_array_type, 6877 range_var, fld_decl, NULL_TREE), 6878 build_int_cst (integer_type_node, index), 6879 NULL_TREE, NULL_TREE); 6880 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); 6881} 6882 6883/* Return a tree representation of a pointer to a structure with grid and 6884 work-group size information. Statements filling that information will be 6885 inserted before GSI, TGT_STMT is the target statement which has the 6886 necessary information in it. */ 6887 6888static tree 6889grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, 6890 gomp_target *tgt_stmt) 6891{ 6892 grid_create_kernel_launch_attr_types (); 6893 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, 6894 "__kernel_launch_attrs"); 6895 6896 unsigned max_dim = 0; 6897 for (tree clause = gimple_omp_target_clauses (tgt_stmt); 6898 clause; 6899 clause = OMP_CLAUSE_CHAIN (clause)) 6900 { 6901 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) 6902 continue; 6903 6904 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); 6905 max_dim = MAX (dim, max_dim); 6906 6907 grid_insert_store_range_dim (gsi, lattrs, 6908 grid_attr_trees->kernel_lattrs_grid_decl, 6909 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); 6910 grid_insert_store_range_dim (gsi, lattrs, 6911 grid_attr_trees->kernel_lattrs_group_decl, 6912 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); 6913 } 6914 6915 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, 6916 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); 6917 gcc_checking_assert (max_dim <= 2); 6918 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); 6919 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), 6920 GSI_SAME_STMT); 6921 TREE_ADDRESSABLE (lattrs) = 1; 6922 return build_fold_addr_expr (lattrs); 6923} 6924 6925/* Build target argument identifier from the DEVICE identifier, value 6926 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ 6927 6928static tree 6929get_target_argument_identifier_1 (int device, bool subseqent_param, int id) 6930{ 6931 tree t = build_int_cst (integer_type_node, device); 6932 if (subseqent_param) 6933 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 6934 build_int_cst (integer_type_node, 6935 GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); 6936 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 6937 build_int_cst (integer_type_node, id)); 6938 return t; 6939} 6940 6941/* Like above but return it in type that can be directly stored as an element 6942 of the argument array. */ 6943 6944static tree 6945get_target_argument_identifier (int device, bool subseqent_param, int id) 6946{ 6947 tree t = get_target_argument_identifier_1 (device, subseqent_param, id); 6948 return fold_convert (ptr_type_node, t); 6949} 6950 6951/* Return a target argument consisting of DEVICE identifier, value identifier 6952 ID, and the actual VALUE. */ 6953 6954static tree 6955get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, 6956 tree value) 6957{ 6958 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, 6959 fold_convert (integer_type_node, value), 6960 build_int_cst (unsigned_type_node, 6961 GOMP_TARGET_ARG_VALUE_SHIFT)); 6962 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 6963 get_target_argument_identifier_1 (device, false, id)); 6964 t = fold_convert (ptr_type_node, t); 6965 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); 6966} 6967 6968/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, 6969 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, 6970 otherwise push an identifier (with DEVICE and ID) and the VALUE in two 6971 arguments. */ 6972 6973static void 6974push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, 6975 int id, tree value, vec <tree> *args) 6976{ 6977 if (tree_fits_shwi_p (value) 6978 && tree_to_shwi (value) > -(1 << 15) 6979 && tree_to_shwi (value) < (1 << 15)) 6980 args->quick_push (get_target_argument_value (gsi, device, id, value)); 6981 else 6982 { 6983 args->quick_push (get_target_argument_identifier (device, true, id)); 6984 value = fold_convert (ptr_type_node, value); 6985 value = force_gimple_operand_gsi (gsi, value, true, NULL, true, 6986 GSI_SAME_STMT); 6987 args->quick_push (value); 6988 } 6989} 6990 6991/* Create an array of arguments that is then passed to GOMP_target. */ 6992 6993static tree 6994get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) 6995{ 6996 auto_vec <tree, 6> args; 6997 tree clauses = gimple_omp_target_clauses (tgt_stmt); 6998 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 6999 if (c) 7000 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); 7001 else 7002 t = integer_minus_one_node; 7003 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 7004 GOMP_TARGET_ARG_NUM_TEAMS, t, &args); 7005 7006 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 7007 if (c) 7008 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); 7009 else 7010 t = integer_minus_one_node; 7011 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 7012 GOMP_TARGET_ARG_THREAD_LIMIT, t, 7013 &args); 7014 7015 /* Add HSA-specific grid sizes, if available. */ 7016 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7017 OMP_CLAUSE__GRIDDIM_)) 7018 { 7019 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES; 7020 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id); 7021 args.quick_push (t); 7022 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); 7023 } 7024 7025 /* Produce more, perhaps device specific, arguments here. */ 7026 7027 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, 7028 args.length () + 1), 7029 ".omp_target_args"); 7030 for (unsigned i = 0; i < args.length (); i++) 7031 { 7032 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 7033 build_int_cst (integer_type_node, i), 7034 NULL_TREE, NULL_TREE); 7035 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), 7036 GSI_SAME_STMT); 7037 } 7038 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 7039 build_int_cst (integer_type_node, args.length ()), 7040 NULL_TREE, NULL_TREE); 7041 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), 7042 GSI_SAME_STMT); 7043 TREE_ADDRESSABLE (argarray) = 1; 7044 return build_fold_addr_expr (argarray); 7045} 7046 7047/* Expand the GIMPLE_OMP_TARGET starting at REGION. */ 7048 7049static void 7050expand_omp_target (struct omp_region *region) 7051{ 7052 basic_block entry_bb, exit_bb, new_bb; 7053 struct function *child_cfun; 7054 tree child_fn, block, t; 7055 gimple_stmt_iterator gsi; 7056 gomp_target *entry_stmt; 7057 gimple *stmt; 7058 edge e; 7059 bool offloaded, data_region; 7060 7061 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); 7062 new_bb = region->entry; 7063 7064 offloaded = is_gimple_omp_offloaded (entry_stmt); 7065 switch (gimple_omp_target_kind (entry_stmt)) 7066 { 7067 case GF_OMP_TARGET_KIND_REGION: 7068 case GF_OMP_TARGET_KIND_UPDATE: 7069 case GF_OMP_TARGET_KIND_ENTER_DATA: 7070 case GF_OMP_TARGET_KIND_EXIT_DATA: 7071 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7072 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7073 case GF_OMP_TARGET_KIND_OACC_UPDATE: 7074 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 7075 case GF_OMP_TARGET_KIND_OACC_DECLARE: 7076 data_region = false; 7077 break; 7078 case GF_OMP_TARGET_KIND_DATA: 7079 case GF_OMP_TARGET_KIND_OACC_DATA: 7080 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 7081 data_region = true; 7082 break; 7083 default: 7084 gcc_unreachable (); 7085 } 7086 7087 child_fn = NULL_TREE; 7088 child_cfun = NULL; 7089 if (offloaded) 7090 { 7091 child_fn = gimple_omp_target_child_fn (entry_stmt); 7092 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 7093 } 7094 7095 /* Supported by expand_omp_taskreg, but not here. */ 7096 if (child_cfun != NULL) 7097 gcc_checking_assert (!child_cfun->cfg); 7098 gcc_checking_assert (!gimple_in_ssa_p (cfun)); 7099 7100 entry_bb = region->entry; 7101 exit_bb = region->exit; 7102 7103 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) 7104 mark_loops_in_oacc_kernels_region (region->entry, region->exit); 7105 7106 if (offloaded) 7107 { 7108 unsigned srcidx, dstidx, num; 7109 7110 /* If the offloading region needs data sent from the parent 7111 function, then the very first statement (except possible 7112 tree profile counter updates) of the offloading body 7113 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 7114 &.OMP_DATA_O is passed as an argument to the child function, 7115 we need to replace it with the argument as seen by the child 7116 function. 7117 7118 In most cases, this will end up being the identity assignment 7119 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had 7120 a function call that has been inlined, the original PARM_DECL 7121 .OMP_DATA_I may have been converted into a different local 7122 variable. In which case, we need to keep the assignment. */ 7123 tree data_arg = gimple_omp_target_data_arg (entry_stmt); 7124 if (data_arg) 7125 { 7126 basic_block entry_succ_bb = single_succ (entry_bb); 7127 gimple_stmt_iterator gsi; 7128 tree arg; 7129 gimple *tgtcopy_stmt = NULL; 7130 tree sender = TREE_VEC_ELT (data_arg, 0); 7131 7132 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 7133 { 7134 gcc_assert (!gsi_end_p (gsi)); 7135 stmt = gsi_stmt (gsi); 7136 if (gimple_code (stmt) != GIMPLE_ASSIGN) 7137 continue; 7138 7139 if (gimple_num_ops (stmt) == 2) 7140 { 7141 tree arg = gimple_assign_rhs1 (stmt); 7142 7143 /* We're ignoring the subcode because we're 7144 effectively doing a STRIP_NOPS. */ 7145 7146 if (TREE_CODE (arg) == ADDR_EXPR 7147 && TREE_OPERAND (arg, 0) == sender) 7148 { 7149 tgtcopy_stmt = stmt; 7150 break; 7151 } 7152 } 7153 } 7154 7155 gcc_assert (tgtcopy_stmt != NULL); 7156 arg = DECL_ARGUMENTS (child_fn); 7157 7158 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); 7159 gsi_remove (&gsi, true); 7160 } 7161 7162 /* Declare local variables needed in CHILD_CFUN. */ 7163 block = DECL_INITIAL (child_fn); 7164 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 7165 /* The gimplifier could record temporaries in the offloading block 7166 rather than in containing function's local_decls chain, 7167 which would mean cgraph missed finalizing them. Do it now. */ 7168 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 7169 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 7170 varpool_node::finalize_decl (t); 7171 DECL_SAVED_TREE (child_fn) = NULL; 7172 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 7173 gimple_set_body (child_fn, NULL); 7174 TREE_USED (block) = 1; 7175 7176 /* Reset DECL_CONTEXT on function arguments. */ 7177 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 7178 DECL_CONTEXT (t) = child_fn; 7179 7180 /* Split ENTRY_BB at GIMPLE_*, 7181 so that it can be moved to the child function. */ 7182 gsi = gsi_last_bb (entry_bb); 7183 stmt = gsi_stmt (gsi); 7184 gcc_assert (stmt 7185 && gimple_code (stmt) == gimple_code (entry_stmt)); 7186 e = split_block (entry_bb, stmt); 7187 gsi_remove (&gsi, true); 7188 entry_bb = e->dest; 7189 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 7190 7191 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ 7192 if (exit_bb) 7193 { 7194 gsi = gsi_last_bb (exit_bb); 7195 gcc_assert (!gsi_end_p (gsi) 7196 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7197 stmt = gimple_build_return (NULL); 7198 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 7199 gsi_remove (&gsi, true); 7200 } 7201 7202 /* Make sure to generate early debug for the function before 7203 outlining anything. */ 7204 if (! gimple_in_ssa_p (cfun)) 7205 (*debug_hooks->early_global_decl) (cfun->decl); 7206 7207 /* Move the offloading region into CHILD_CFUN. */ 7208 7209 block = gimple_block (entry_stmt); 7210 7211 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 7212 if (exit_bb) 7213 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 7214 /* When the OMP expansion process cannot guarantee an up-to-date 7215 loop tree arrange for the child function to fixup loops. */ 7216 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 7217 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 7218 7219 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 7220 num = vec_safe_length (child_cfun->local_decls); 7221 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 7222 { 7223 t = (*child_cfun->local_decls)[srcidx]; 7224 if (DECL_CONTEXT (t) == cfun->decl) 7225 continue; 7226 if (srcidx != dstidx) 7227 (*child_cfun->local_decls)[dstidx] = t; 7228 dstidx++; 7229 } 7230 if (dstidx != num) 7231 vec_safe_truncate (child_cfun->local_decls, dstidx); 7232 7233 /* Inform the callgraph about the new function. */ 7234 child_cfun->curr_properties = cfun->curr_properties; 7235 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 7236 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 7237 cgraph_node *node = cgraph_node::get_create (child_fn); 7238 node->parallelized_function = 1; 7239 cgraph_node::add_new_function (child_fn, true); 7240 7241 /* Add the new function to the offload table. */ 7242 if (ENABLE_OFFLOADING) 7243 vec_safe_push (offload_funcs, child_fn); 7244 7245 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 7246 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 7247 7248 /* Fix the callgraph edges for child_cfun. Those for cfun will be 7249 fixed in a following pass. */ 7250 push_cfun (child_cfun); 7251 if (need_asm) 7252 assign_assembler_name_if_needed (child_fn); 7253 cgraph_edge::rebuild_edges (); 7254 7255 /* Some EH regions might become dead, see PR34608. If 7256 pass_cleanup_cfg isn't the first pass to happen with the 7257 new child, these dead EH edges might cause problems. 7258 Clean them up now. */ 7259 if (flag_exceptions) 7260 { 7261 basic_block bb; 7262 bool changed = false; 7263 7264 FOR_EACH_BB_FN (bb, cfun) 7265 changed |= gimple_purge_dead_eh_edges (bb); 7266 if (changed) 7267 cleanup_tree_cfg (); 7268 } 7269 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 7270 verify_loop_structure (); 7271 pop_cfun (); 7272 7273 if (dump_file && !gimple_in_ssa_p (cfun)) 7274 { 7275 omp_any_child_fn_dumped = true; 7276 dump_function_header (dump_file, child_fn, dump_flags); 7277 dump_function_to_file (child_fn, dump_file, dump_flags); 7278 } 7279 } 7280 7281 /* Emit a library call to launch the offloading region, or do data 7282 transfers. */ 7283 tree t1, t2, t3, t4, device, cond, depend, c, clauses; 7284 enum built_in_function start_ix; 7285 location_t clause_loc; 7286 unsigned int flags_i = 0; 7287 bool oacc_kernels_p = false; 7288 7289 switch (gimple_omp_target_kind (entry_stmt)) 7290 { 7291 case GF_OMP_TARGET_KIND_REGION: 7292 start_ix = BUILT_IN_GOMP_TARGET; 7293 break; 7294 case GF_OMP_TARGET_KIND_DATA: 7295 start_ix = BUILT_IN_GOMP_TARGET_DATA; 7296 break; 7297 case GF_OMP_TARGET_KIND_UPDATE: 7298 start_ix = BUILT_IN_GOMP_TARGET_UPDATE; 7299 break; 7300 case GF_OMP_TARGET_KIND_ENTER_DATA: 7301 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 7302 break; 7303 case GF_OMP_TARGET_KIND_EXIT_DATA: 7304 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 7305 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; 7306 break; 7307 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7308 oacc_kernels_p = true; 7309 /* FALLTHROUGH */ 7310 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7311 start_ix = BUILT_IN_GOACC_PARALLEL; 7312 break; 7313 case GF_OMP_TARGET_KIND_OACC_DATA: 7314 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 7315 start_ix = BUILT_IN_GOACC_DATA_START; 7316 break; 7317 case GF_OMP_TARGET_KIND_OACC_UPDATE: 7318 start_ix = BUILT_IN_GOACC_UPDATE; 7319 break; 7320 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 7321 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; 7322 break; 7323 case GF_OMP_TARGET_KIND_OACC_DECLARE: 7324 start_ix = BUILT_IN_GOACC_DECLARE; 7325 break; 7326 default: 7327 gcc_unreachable (); 7328 } 7329 7330 clauses = gimple_omp_target_clauses (entry_stmt); 7331 7332 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime 7333 library choose) and there is no conditional. */ 7334 cond = NULL_TREE; 7335 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); 7336 7337 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 7338 if (c) 7339 cond = OMP_CLAUSE_IF_EXPR (c); 7340 7341 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); 7342 if (c) 7343 { 7344 /* Even if we pass it to all library function calls, it is currently only 7345 defined/used for the OpenMP target ones. */ 7346 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET 7347 || start_ix == BUILT_IN_GOMP_TARGET_DATA 7348 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE 7349 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA); 7350 7351 device = OMP_CLAUSE_DEVICE_ID (c); 7352 clause_loc = OMP_CLAUSE_LOCATION (c); 7353 } 7354 else 7355 clause_loc = gimple_location (entry_stmt); 7356 7357 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); 7358 if (c) 7359 flags_i |= GOMP_TARGET_FLAG_NOWAIT; 7360 7361 /* Ensure 'device' is of the correct type. */ 7362 device = fold_convert_loc (clause_loc, integer_type_node, device); 7363 7364 /* If we found the clause 'if (cond)', build 7365 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */ 7366 if (cond) 7367 { 7368 cond = gimple_boolify (cond); 7369 7370 basic_block cond_bb, then_bb, else_bb; 7371 edge e; 7372 tree tmp_var; 7373 7374 tmp_var = create_tmp_var (TREE_TYPE (device)); 7375 if (offloaded) 7376 e = split_block_after_labels (new_bb); 7377 else 7378 { 7379 gsi = gsi_last_bb (new_bb); 7380 gsi_prev (&gsi); 7381 e = split_block (new_bb, gsi_stmt (gsi)); 7382 } 7383 cond_bb = e->src; 7384 new_bb = e->dest; 7385 remove_edge (e); 7386 7387 then_bb = create_empty_bb (cond_bb); 7388 else_bb = create_empty_bb (then_bb); 7389 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 7390 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 7391 7392 stmt = gimple_build_cond_empty (cond); 7393 gsi = gsi_last_bb (cond_bb); 7394 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7395 7396 gsi = gsi_start_bb (then_bb); 7397 stmt = gimple_build_assign (tmp_var, device); 7398 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7399 7400 gsi = gsi_start_bb (else_bb); 7401 stmt = gimple_build_assign (tmp_var, 7402 build_int_cst (integer_type_node, 7403 GOMP_DEVICE_HOST_FALLBACK)); 7404 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7405 7406 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 7407 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 7408 add_bb_to_loop (then_bb, cond_bb->loop_father); 7409 add_bb_to_loop (else_bb, cond_bb->loop_father); 7410 make_edge (then_bb, new_bb, EDGE_FALLTHRU); 7411 make_edge (else_bb, new_bb, EDGE_FALLTHRU); 7412 7413 device = tmp_var; 7414 gsi = gsi_last_bb (new_bb); 7415 } 7416 else 7417 { 7418 gsi = gsi_last_bb (new_bb); 7419 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, 7420 true, GSI_SAME_STMT); 7421 } 7422 7423 t = gimple_omp_target_data_arg (entry_stmt); 7424 if (t == NULL) 7425 { 7426 t1 = size_zero_node; 7427 t2 = build_zero_cst (ptr_type_node); 7428 t3 = t2; 7429 t4 = t2; 7430 } 7431 else 7432 { 7433 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); 7434 t1 = size_binop (PLUS_EXPR, t1, size_int (1)); 7435 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); 7436 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); 7437 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); 7438 } 7439 7440 gimple *g; 7441 bool tagging = false; 7442 /* The maximum number used by any start_ix, without varargs. */ 7443 auto_vec<tree, 11> args; 7444 args.quick_push (device); 7445 if (offloaded) 7446 args.quick_push (build_fold_addr_expr (child_fn)); 7447 args.quick_push (t1); 7448 args.quick_push (t2); 7449 args.quick_push (t3); 7450 args.quick_push (t4); 7451 switch (start_ix) 7452 { 7453 case BUILT_IN_GOACC_DATA_START: 7454 case BUILT_IN_GOACC_DECLARE: 7455 case BUILT_IN_GOMP_TARGET_DATA: 7456 break; 7457 case BUILT_IN_GOMP_TARGET: 7458 case BUILT_IN_GOMP_TARGET_UPDATE: 7459 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: 7460 args.quick_push (build_int_cst (unsigned_type_node, flags_i)); 7461 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 7462 if (c) 7463 depend = OMP_CLAUSE_DECL (c); 7464 else 7465 depend = build_int_cst (ptr_type_node, 0); 7466 args.quick_push (depend); 7467 if (start_ix == BUILT_IN_GOMP_TARGET) 7468 args.quick_push (get_target_arguments (&gsi, entry_stmt)); 7469 break; 7470 case BUILT_IN_GOACC_PARALLEL: 7471 { 7472 oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args); 7473 tagging = true; 7474 } 7475 /* FALLTHRU */ 7476 case BUILT_IN_GOACC_ENTER_EXIT_DATA: 7477 case BUILT_IN_GOACC_UPDATE: 7478 { 7479 tree t_async = NULL_TREE; 7480 7481 /* If present, use the value specified by the respective 7482 clause, making sure that is of the correct type. */ 7483 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC); 7484 if (c) 7485 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), 7486 integer_type_node, 7487 OMP_CLAUSE_ASYNC_EXPR (c)); 7488 else if (!tagging) 7489 /* Default values for t_async. */ 7490 t_async = fold_convert_loc (gimple_location (entry_stmt), 7491 integer_type_node, 7492 build_int_cst (integer_type_node, 7493 GOMP_ASYNC_SYNC)); 7494 if (tagging && t_async) 7495 { 7496 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; 7497 7498 if (TREE_CODE (t_async) == INTEGER_CST) 7499 { 7500 /* See if we can pack the async arg in to the tag's 7501 operand. */ 7502 i_async = TREE_INT_CST_LOW (t_async); 7503 if (i_async < GOMP_LAUNCH_OP_MAX) 7504 t_async = NULL_TREE; 7505 else 7506 i_async = GOMP_LAUNCH_OP_MAX; 7507 } 7508 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, 7509 i_async)); 7510 } 7511 if (t_async) 7512 args.safe_push (t_async); 7513 7514 /* Save the argument index, and ... */ 7515 unsigned t_wait_idx = args.length (); 7516 unsigned num_waits = 0; 7517 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT); 7518 if (!tagging || c) 7519 /* ... push a placeholder. */ 7520 args.safe_push (integer_zero_node); 7521 7522 for (; c; c = OMP_CLAUSE_CHAIN (c)) 7523 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) 7524 { 7525 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c), 7526 integer_type_node, 7527 OMP_CLAUSE_WAIT_EXPR (c))); 7528 num_waits++; 7529 } 7530 7531 if (!tagging || num_waits) 7532 { 7533 tree len; 7534 7535 /* Now that we know the number, update the placeholder. */ 7536 if (tagging) 7537 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); 7538 else 7539 len = build_int_cst (integer_type_node, num_waits); 7540 len = fold_convert_loc (gimple_location (entry_stmt), 7541 unsigned_type_node, len); 7542 args[t_wait_idx] = len; 7543 } 7544 } 7545 break; 7546 default: 7547 gcc_unreachable (); 7548 } 7549 if (tagging) 7550 /* Push terminal marker - zero. */ 7551 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); 7552 7553 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); 7554 gimple_set_location (g, gimple_location (entry_stmt)); 7555 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 7556 if (!offloaded) 7557 { 7558 g = gsi_stmt (gsi); 7559 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); 7560 gsi_remove (&gsi, true); 7561 } 7562 if (data_region && region->exit) 7563 { 7564 gsi = gsi_last_bb (region->exit); 7565 g = gsi_stmt (gsi); 7566 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN); 7567 gsi_remove (&gsi, true); 7568 } 7569} 7570 7571/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with 7572 iteration variable derived from the thread number. INTRA_GROUP means this 7573 is an expansion of a loop iterating over work-items within a separate 7574 iteration over groups. */ 7575 7576static void 7577grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) 7578{ 7579 gimple_stmt_iterator gsi; 7580 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 7581 gcc_checking_assert (gimple_omp_for_kind (for_stmt) 7582 == GF_OMP_FOR_KIND_GRID_LOOP); 7583 size_t collapse = gimple_omp_for_collapse (for_stmt); 7584 struct omp_for_data_loop *loops 7585 = XALLOCAVEC (struct omp_for_data_loop, 7586 gimple_omp_for_collapse (for_stmt)); 7587 struct omp_for_data fd; 7588 7589 remove_edge (BRANCH_EDGE (kfor->entry)); 7590 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; 7591 7592 gcc_assert (kfor->cont); 7593 omp_extract_for_data (for_stmt, &fd, loops); 7594 7595 gsi = gsi_start_bb (body_bb); 7596 7597 for (size_t dim = 0; dim < collapse; dim++) 7598 { 7599 tree type, itype; 7600 itype = type = TREE_TYPE (fd.loops[dim].v); 7601 if (POINTER_TYPE_P (type)) 7602 itype = signed_type_for (type); 7603 7604 tree n1 = fd.loops[dim].n1; 7605 tree step = fd.loops[dim].step; 7606 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 7607 true, NULL_TREE, true, GSI_SAME_STMT); 7608 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 7609 true, NULL_TREE, true, GSI_SAME_STMT); 7610 tree threadid; 7611 if (gimple_omp_for_grid_group_iter (for_stmt)) 7612 { 7613 gcc_checking_assert (!intra_group); 7614 threadid = build_call_expr (builtin_decl_explicit 7615 (BUILT_IN_HSA_WORKGROUPID), 1, 7616 build_int_cstu (unsigned_type_node, dim)); 7617 } 7618 else if (intra_group) 7619 threadid = build_call_expr (builtin_decl_explicit 7620 (BUILT_IN_HSA_WORKITEMID), 1, 7621 build_int_cstu (unsigned_type_node, dim)); 7622 else 7623 threadid = build_call_expr (builtin_decl_explicit 7624 (BUILT_IN_HSA_WORKITEMABSID), 1, 7625 build_int_cstu (unsigned_type_node, dim)); 7626 threadid = fold_convert (itype, threadid); 7627 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 7628 true, GSI_SAME_STMT); 7629 7630 tree startvar = fd.loops[dim].v; 7631 tree t = fold_build2 (MULT_EXPR, itype, threadid, step); 7632 if (POINTER_TYPE_P (type)) 7633 t = fold_build_pointer_plus (n1, t); 7634 else 7635 t = fold_build2 (PLUS_EXPR, type, t, n1); 7636 t = fold_convert (type, t); 7637 t = force_gimple_operand_gsi (&gsi, t, 7638 DECL_P (startvar) 7639 && TREE_ADDRESSABLE (startvar), 7640 NULL_TREE, true, GSI_SAME_STMT); 7641 gassign *assign_stmt = gimple_build_assign (startvar, t); 7642 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 7643 } 7644 /* Remove the omp for statement. */ 7645 gsi = gsi_last_bb (kfor->entry); 7646 gsi_remove (&gsi, true); 7647 7648 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 7649 gsi = gsi_last_bb (kfor->cont); 7650 gcc_assert (!gsi_end_p (gsi) 7651 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); 7652 gsi_remove (&gsi, true); 7653 7654 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ 7655 gsi = gsi_last_bb (kfor->exit); 7656 gcc_assert (!gsi_end_p (gsi) 7657 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7658 if (intra_group) 7659 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT); 7660 gsi_remove (&gsi, true); 7661 7662 /* Fixup the much simpler CFG. */ 7663 remove_edge (find_edge (kfor->cont, body_bb)); 7664 7665 if (kfor->cont != body_bb) 7666 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); 7667 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); 7668} 7669 7670/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap 7671 argument_decls. */ 7672 7673struct grid_arg_decl_map 7674{ 7675 tree old_arg; 7676 tree new_arg; 7677}; 7678 7679/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones 7680 pertaining to kernel function. */ 7681 7682static tree 7683grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) 7684{ 7685 struct walk_stmt_info *wi = (struct walk_stmt_info *) data; 7686 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; 7687 tree t = *tp; 7688 7689 if (t == adm->old_arg) 7690 *tp = adm->new_arg; 7691 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 7692 return NULL_TREE; 7693} 7694 7695/* If TARGET region contains a kernel body for loop, remove its region from the 7696 TARGET and expand it in HSA gridified kernel fashion. */ 7697 7698static void 7699grid_expand_target_grid_body (struct omp_region *target) 7700{ 7701 if (!hsa_gen_requested_p ()) 7702 return; 7703 7704 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); 7705 struct omp_region **pp; 7706 7707 for (pp = &target->inner; *pp; pp = &(*pp)->next) 7708 if ((*pp)->type == GIMPLE_OMP_GRID_BODY) 7709 break; 7710 7711 struct omp_region *gpukernel = *pp; 7712 7713 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); 7714 if (!gpukernel) 7715 { 7716 /* HSA cannot handle OACC stuff. */ 7717 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) 7718 return; 7719 gcc_checking_assert (orig_child_fndecl); 7720 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7721 OMP_CLAUSE__GRIDDIM_)); 7722 cgraph_node *n = cgraph_node::get (orig_child_fndecl); 7723 7724 hsa_register_kernel (n); 7725 return; 7726 } 7727 7728 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7729 OMP_CLAUSE__GRIDDIM_)); 7730 tree inside_block 7731 = gimple_block (first_stmt (single_succ (gpukernel->entry))); 7732 *pp = gpukernel->next; 7733 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) 7734 if ((*pp)->type == GIMPLE_OMP_FOR) 7735 break; 7736 7737 struct omp_region *kfor = *pp; 7738 gcc_assert (kfor); 7739 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 7740 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); 7741 *pp = kfor->next; 7742 if (kfor->inner) 7743 { 7744 if (gimple_omp_for_grid_group_iter (for_stmt)) 7745 { 7746 struct omp_region **next_pp; 7747 for (pp = &kfor->inner; *pp; pp = next_pp) 7748 { 7749 next_pp = &(*pp)->next; 7750 if ((*pp)->type != GIMPLE_OMP_FOR) 7751 continue; 7752 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); 7753 gcc_assert (gimple_omp_for_kind (inner) 7754 == GF_OMP_FOR_KIND_GRID_LOOP); 7755 grid_expand_omp_for_loop (*pp, true); 7756 *pp = (*pp)->next; 7757 next_pp = pp; 7758 } 7759 } 7760 expand_omp (kfor->inner); 7761 } 7762 if (gpukernel->inner) 7763 expand_omp (gpukernel->inner); 7764 7765 tree kern_fndecl = copy_node (orig_child_fndecl); 7766 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel"); 7767 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); 7768 tree tgtblock = gimple_block (tgt_stmt); 7769 tree fniniblock = make_node (BLOCK); 7770 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock; 7771 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); 7772 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); 7773 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; 7774 DECL_INITIAL (kern_fndecl) = fniniblock; 7775 push_struct_function (kern_fndecl); 7776 cfun->function_end_locus = gimple_location (tgt_stmt); 7777 init_tree_ssa (cfun); 7778 pop_cfun (); 7779 7780 /* Make sure to generate early debug for the function before 7781 outlining anything. */ 7782 if (! gimple_in_ssa_p (cfun)) 7783 (*debug_hooks->early_global_decl) (cfun->decl); 7784 7785 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); 7786 gcc_assert (!DECL_CHAIN (old_parm_decl)); 7787 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); 7788 DECL_CONTEXT (new_parm_decl) = kern_fndecl; 7789 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; 7790 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); 7791 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); 7792 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; 7793 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); 7794 kern_cfun->curr_properties = cfun->curr_properties; 7795 7796 grid_expand_omp_for_loop (kfor, false); 7797 7798 /* Remove the omp for statement. */ 7799 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry); 7800 gsi_remove (&gsi, true); 7801 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real 7802 return. */ 7803 gsi = gsi_last_bb (gpukernel->exit); 7804 gcc_assert (!gsi_end_p (gsi) 7805 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7806 gimple *ret_stmt = gimple_build_return (NULL); 7807 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); 7808 gsi_remove (&gsi, true); 7809 7810 /* Statements in the first BB in the target construct have been produced by 7811 target lowering and must be copied inside the GPUKERNEL, with the two 7812 exceptions of the first OMP statement and the OMP_DATA assignment 7813 statement. */ 7814 gsi = gsi_start_bb (single_succ (gpukernel->entry)); 7815 tree data_arg = gimple_omp_target_data_arg (tgt_stmt); 7816 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; 7817 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); 7818 !gsi_end_p (tsi); gsi_next (&tsi)) 7819 { 7820 gimple *stmt = gsi_stmt (tsi); 7821 if (is_gimple_omp (stmt)) 7822 break; 7823 if (sender 7824 && is_gimple_assign (stmt) 7825 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR 7826 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) 7827 continue; 7828 gimple *copy = gimple_copy (stmt); 7829 gsi_insert_before (&gsi, copy, GSI_SAME_STMT); 7830 gimple_set_block (copy, fniniblock); 7831 } 7832 7833 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), 7834 gpukernel->exit, inside_block); 7835 7836 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); 7837 kcn->mark_force_output (); 7838 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); 7839 7840 hsa_register_kernel (kcn, orig_child); 7841 7842 cgraph_node::add_new_function (kern_fndecl, true); 7843 push_cfun (kern_cfun); 7844 cgraph_edge::rebuild_edges (); 7845 7846 /* Re-map any mention of the PARM_DECL of the original function to the 7847 PARM_DECL of the new one. 7848 7849 TODO: It would be great if lowering produced references into the GPU 7850 kernel decl straight away and we did not have to do this. */ 7851 struct grid_arg_decl_map adm; 7852 adm.old_arg = old_parm_decl; 7853 adm.new_arg = new_parm_decl; 7854 basic_block bb; 7855 FOR_EACH_BB_FN (bb, kern_cfun) 7856 { 7857 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 7858 { 7859 gimple *stmt = gsi_stmt (gsi); 7860 struct walk_stmt_info wi; 7861 memset (&wi, 0, sizeof (wi)); 7862 wi.info = &adm; 7863 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); 7864 } 7865 } 7866 pop_cfun (); 7867 7868 return; 7869} 7870 7871/* Expand the parallel region tree rooted at REGION. Expansion 7872 proceeds in depth-first order. Innermost regions are expanded 7873 first. This way, parallel regions that require a new function to 7874 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any 7875 internal dependencies in their body. */ 7876 7877static void 7878expand_omp (struct omp_region *region) 7879{ 7880 omp_any_child_fn_dumped = false; 7881 while (region) 7882 { 7883 location_t saved_location; 7884 gimple *inner_stmt = NULL; 7885 7886 /* First, determine whether this is a combined parallel+workshare 7887 region. */ 7888 if (region->type == GIMPLE_OMP_PARALLEL) 7889 determine_parallel_type (region); 7890 else if (region->type == GIMPLE_OMP_TARGET) 7891 grid_expand_target_grid_body (region); 7892 7893 if (region->type == GIMPLE_OMP_FOR 7894 && gimple_omp_for_combined_p (last_stmt (region->entry))) 7895 inner_stmt = last_stmt (region->inner->entry); 7896 7897 if (region->inner) 7898 expand_omp (region->inner); 7899 7900 saved_location = input_location; 7901 if (gimple_has_location (last_stmt (region->entry))) 7902 input_location = gimple_location (last_stmt (region->entry)); 7903 7904 switch (region->type) 7905 { 7906 case GIMPLE_OMP_PARALLEL: 7907 case GIMPLE_OMP_TASK: 7908 expand_omp_taskreg (region); 7909 break; 7910 7911 case GIMPLE_OMP_FOR: 7912 expand_omp_for (region, inner_stmt); 7913 break; 7914 7915 case GIMPLE_OMP_SECTIONS: 7916 expand_omp_sections (region); 7917 break; 7918 7919 case GIMPLE_OMP_SECTION: 7920 /* Individual omp sections are handled together with their 7921 parent GIMPLE_OMP_SECTIONS region. */ 7922 break; 7923 7924 case GIMPLE_OMP_SINGLE: 7925 expand_omp_single (region); 7926 break; 7927 7928 case GIMPLE_OMP_ORDERED: 7929 { 7930 gomp_ordered *ord_stmt 7931 = as_a <gomp_ordered *> (last_stmt (region->entry)); 7932 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), 7933 OMP_CLAUSE_DEPEND)) 7934 { 7935 /* We'll expand these when expanding corresponding 7936 worksharing region with ordered(n) clause. */ 7937 gcc_assert (region->outer 7938 && region->outer->type == GIMPLE_OMP_FOR); 7939 region->ord_stmt = ord_stmt; 7940 break; 7941 } 7942 } 7943 /* FALLTHRU */ 7944 case GIMPLE_OMP_MASTER: 7945 case GIMPLE_OMP_TASKGROUP: 7946 case GIMPLE_OMP_CRITICAL: 7947 case GIMPLE_OMP_TEAMS: 7948 expand_omp_synch (region); 7949 break; 7950 7951 case GIMPLE_OMP_ATOMIC_LOAD: 7952 expand_omp_atomic (region); 7953 break; 7954 7955 case GIMPLE_OMP_TARGET: 7956 expand_omp_target (region); 7957 break; 7958 7959 default: 7960 gcc_unreachable (); 7961 } 7962 7963 input_location = saved_location; 7964 region = region->next; 7965 } 7966 if (omp_any_child_fn_dumped) 7967 { 7968 if (dump_file) 7969 dump_function_header (dump_file, current_function_decl, dump_flags); 7970 omp_any_child_fn_dumped = false; 7971 } 7972} 7973 7974/* Helper for build_omp_regions. Scan the dominator tree starting at 7975 block BB. PARENT is the region that contains BB. If SINGLE_TREE is 7976 true, the function ends once a single tree is built (otherwise, whole 7977 forest of OMP constructs may be built). */ 7978 7979static void 7980build_omp_regions_1 (basic_block bb, struct omp_region *parent, 7981 bool single_tree) 7982{ 7983 gimple_stmt_iterator gsi; 7984 gimple *stmt; 7985 basic_block son; 7986 7987 gsi = gsi_last_bb (bb); 7988 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) 7989 { 7990 struct omp_region *region; 7991 enum gimple_code code; 7992 7993 stmt = gsi_stmt (gsi); 7994 code = gimple_code (stmt); 7995 if (code == GIMPLE_OMP_RETURN) 7996 { 7997 /* STMT is the return point out of region PARENT. Mark it 7998 as the exit point and make PARENT the immediately 7999 enclosing region. */ 8000 gcc_assert (parent); 8001 region = parent; 8002 region->exit = bb; 8003 parent = parent->outer; 8004 } 8005 else if (code == GIMPLE_OMP_ATOMIC_STORE) 8006 { 8007 /* GIMPLE_OMP_ATOMIC_STORE is analogous to 8008 GIMPLE_OMP_RETURN, but matches with 8009 GIMPLE_OMP_ATOMIC_LOAD. */ 8010 gcc_assert (parent); 8011 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); 8012 region = parent; 8013 region->exit = bb; 8014 parent = parent->outer; 8015 } 8016 else if (code == GIMPLE_OMP_CONTINUE) 8017 { 8018 gcc_assert (parent); 8019 parent->cont = bb; 8020 } 8021 else if (code == GIMPLE_OMP_SECTIONS_SWITCH) 8022 { 8023 /* GIMPLE_OMP_SECTIONS_SWITCH is part of 8024 GIMPLE_OMP_SECTIONS, and we do nothing for it. */ 8025 } 8026 else 8027 { 8028 region = new_omp_region (bb, code, parent); 8029 /* Otherwise... */ 8030 if (code == GIMPLE_OMP_TARGET) 8031 { 8032 switch (gimple_omp_target_kind (stmt)) 8033 { 8034 case GF_OMP_TARGET_KIND_REGION: 8035 case GF_OMP_TARGET_KIND_DATA: 8036 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8037 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8038 case GF_OMP_TARGET_KIND_OACC_DATA: 8039 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8040 break; 8041 case GF_OMP_TARGET_KIND_UPDATE: 8042 case GF_OMP_TARGET_KIND_ENTER_DATA: 8043 case GF_OMP_TARGET_KIND_EXIT_DATA: 8044 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8045 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8046 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8047 /* ..., other than for those stand-alone directives... */ 8048 region = NULL; 8049 break; 8050 default: 8051 gcc_unreachable (); 8052 } 8053 } 8054 else if (code == GIMPLE_OMP_ORDERED 8055 && omp_find_clause (gimple_omp_ordered_clauses 8056 (as_a <gomp_ordered *> (stmt)), 8057 OMP_CLAUSE_DEPEND)) 8058 /* #pragma omp ordered depend is also just a stand-alone 8059 directive. */ 8060 region = NULL; 8061 /* ..., this directive becomes the parent for a new region. */ 8062 if (region) 8063 parent = region; 8064 } 8065 } 8066 8067 if (single_tree && !parent) 8068 return; 8069 8070 for (son = first_dom_son (CDI_DOMINATORS, bb); 8071 son; 8072 son = next_dom_son (CDI_DOMINATORS, son)) 8073 build_omp_regions_1 (son, parent, single_tree); 8074} 8075 8076/* Builds the tree of OMP regions rooted at ROOT, storing it to 8077 root_omp_region. */ 8078 8079static void 8080build_omp_regions_root (basic_block root) 8081{ 8082 gcc_assert (root_omp_region == NULL); 8083 build_omp_regions_1 (root, NULL, true); 8084 gcc_assert (root_omp_region != NULL); 8085} 8086 8087/* Expands omp construct (and its subconstructs) starting in HEAD. */ 8088 8089void 8090omp_expand_local (basic_block head) 8091{ 8092 build_omp_regions_root (head); 8093 if (dump_file && (dump_flags & TDF_DETAILS)) 8094 { 8095 fprintf (dump_file, "\nOMP region tree\n\n"); 8096 dump_omp_region (dump_file, root_omp_region, 0); 8097 fprintf (dump_file, "\n"); 8098 } 8099 8100 remove_exit_barriers (root_omp_region); 8101 expand_omp (root_omp_region); 8102 8103 omp_free_regions (); 8104} 8105 8106/* Scan the CFG and build a tree of OMP regions. Return the root of 8107 the OMP region tree. */ 8108 8109static void 8110build_omp_regions (void) 8111{ 8112 gcc_assert (root_omp_region == NULL); 8113 calculate_dominance_info (CDI_DOMINATORS); 8114 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); 8115} 8116 8117/* Main entry point for expanding OMP-GIMPLE into runtime calls. */ 8118 8119static unsigned int 8120execute_expand_omp (void) 8121{ 8122 build_omp_regions (); 8123 8124 if (!root_omp_region) 8125 return 0; 8126 8127 if (dump_file) 8128 { 8129 fprintf (dump_file, "\nOMP region tree\n\n"); 8130 dump_omp_region (dump_file, root_omp_region, 0); 8131 fprintf (dump_file, "\n"); 8132 } 8133 8134 remove_exit_barriers (root_omp_region); 8135 8136 expand_omp (root_omp_region); 8137 8138 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 8139 verify_loop_structure (); 8140 cleanup_tree_cfg (); 8141 8142 omp_free_regions (); 8143 8144 return 0; 8145} 8146 8147/* OMP expansion -- the default pass, run before creation of SSA form. */ 8148 8149namespace { 8150 8151const pass_data pass_data_expand_omp = 8152{ 8153 GIMPLE_PASS, /* type */ 8154 "ompexp", /* name */ 8155 OPTGROUP_OMP, /* optinfo_flags */ 8156 TV_NONE, /* tv_id */ 8157 PROP_gimple_any, /* properties_required */ 8158 PROP_gimple_eomp, /* properties_provided */ 8159 0, /* properties_destroyed */ 8160 0, /* todo_flags_start */ 8161 0, /* todo_flags_finish */ 8162}; 8163 8164class pass_expand_omp : public gimple_opt_pass 8165{ 8166public: 8167 pass_expand_omp (gcc::context *ctxt) 8168 : gimple_opt_pass (pass_data_expand_omp, ctxt) 8169 {} 8170 8171 /* opt_pass methods: */ 8172 virtual unsigned int execute (function *) 8173 { 8174 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0 8175 || flag_openmp_simd != 0) 8176 && !seen_error ()); 8177 8178 /* This pass always runs, to provide PROP_gimple_eomp. 8179 But often, there is nothing to do. */ 8180 if (!gate) 8181 return 0; 8182 8183 return execute_expand_omp (); 8184 } 8185 8186}; // class pass_expand_omp 8187 8188} // anon namespace 8189 8190gimple_opt_pass * 8191make_pass_expand_omp (gcc::context *ctxt) 8192{ 8193 return new pass_expand_omp (ctxt); 8194} 8195 8196namespace { 8197 8198const pass_data pass_data_expand_omp_ssa = 8199{ 8200 GIMPLE_PASS, /* type */ 8201 "ompexpssa", /* name */ 8202 OPTGROUP_OMP, /* optinfo_flags */ 8203 TV_NONE, /* tv_id */ 8204 PROP_cfg | PROP_ssa, /* properties_required */ 8205 PROP_gimple_eomp, /* properties_provided */ 8206 0, /* properties_destroyed */ 8207 0, /* todo_flags_start */ 8208 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ 8209}; 8210 8211class pass_expand_omp_ssa : public gimple_opt_pass 8212{ 8213public: 8214 pass_expand_omp_ssa (gcc::context *ctxt) 8215 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) 8216 {} 8217 8218 /* opt_pass methods: */ 8219 virtual bool gate (function *fun) 8220 { 8221 return !(fun->curr_properties & PROP_gimple_eomp); 8222 } 8223 virtual unsigned int execute (function *) { return execute_expand_omp (); } 8224 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } 8225 8226}; // class pass_expand_omp_ssa 8227 8228} // anon namespace 8229 8230gimple_opt_pass * 8231make_pass_expand_omp_ssa (gcc::context *ctxt) 8232{ 8233 return new pass_expand_omp_ssa (ctxt); 8234} 8235 8236/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant 8237 GIMPLE_* codes. */ 8238 8239bool 8240omp_make_gimple_edges (basic_block bb, struct omp_region **region, 8241 int *region_idx) 8242{ 8243 gimple *last = last_stmt (bb); 8244 enum gimple_code code = gimple_code (last); 8245 struct omp_region *cur_region = *region; 8246 bool fallthru = false; 8247 8248 switch (code) 8249 { 8250 case GIMPLE_OMP_PARALLEL: 8251 case GIMPLE_OMP_TASK: 8252 case GIMPLE_OMP_FOR: 8253 case GIMPLE_OMP_SINGLE: 8254 case GIMPLE_OMP_TEAMS: 8255 case GIMPLE_OMP_MASTER: 8256 case GIMPLE_OMP_TASKGROUP: 8257 case GIMPLE_OMP_CRITICAL: 8258 case GIMPLE_OMP_SECTION: 8259 case GIMPLE_OMP_GRID_BODY: 8260 cur_region = new_omp_region (bb, code, cur_region); 8261 fallthru = true; 8262 break; 8263 8264 case GIMPLE_OMP_ORDERED: 8265 cur_region = new_omp_region (bb, code, cur_region); 8266 fallthru = true; 8267 if (omp_find_clause (gimple_omp_ordered_clauses 8268 (as_a <gomp_ordered *> (last)), 8269 OMP_CLAUSE_DEPEND)) 8270 cur_region = cur_region->outer; 8271 break; 8272 8273 case GIMPLE_OMP_TARGET: 8274 cur_region = new_omp_region (bb, code, cur_region); 8275 fallthru = true; 8276 switch (gimple_omp_target_kind (last)) 8277 { 8278 case GF_OMP_TARGET_KIND_REGION: 8279 case GF_OMP_TARGET_KIND_DATA: 8280 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8281 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8282 case GF_OMP_TARGET_KIND_OACC_DATA: 8283 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8284 break; 8285 case GF_OMP_TARGET_KIND_UPDATE: 8286 case GF_OMP_TARGET_KIND_ENTER_DATA: 8287 case GF_OMP_TARGET_KIND_EXIT_DATA: 8288 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8289 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8290 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8291 cur_region = cur_region->outer; 8292 break; 8293 default: 8294 gcc_unreachable (); 8295 } 8296 break; 8297 8298 case GIMPLE_OMP_SECTIONS: 8299 cur_region = new_omp_region (bb, code, cur_region); 8300 fallthru = true; 8301 break; 8302 8303 case GIMPLE_OMP_SECTIONS_SWITCH: 8304 fallthru = false; 8305 break; 8306 8307 case GIMPLE_OMP_ATOMIC_LOAD: 8308 case GIMPLE_OMP_ATOMIC_STORE: 8309 fallthru = true; 8310 break; 8311 8312 case GIMPLE_OMP_RETURN: 8313 /* In the case of a GIMPLE_OMP_SECTION, the edge will go 8314 somewhere other than the next block. This will be 8315 created later. */ 8316 cur_region->exit = bb; 8317 if (cur_region->type == GIMPLE_OMP_TASK) 8318 /* Add an edge corresponding to not scheduling the task 8319 immediately. */ 8320 make_edge (cur_region->entry, bb, EDGE_ABNORMAL); 8321 fallthru = cur_region->type != GIMPLE_OMP_SECTION; 8322 cur_region = cur_region->outer; 8323 break; 8324 8325 case GIMPLE_OMP_CONTINUE: 8326 cur_region->cont = bb; 8327 switch (cur_region->type) 8328 { 8329 case GIMPLE_OMP_FOR: 8330 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE 8331 succs edges as abnormal to prevent splitting 8332 them. */ 8333 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; 8334 /* Make the loopback edge. */ 8335 make_edge (bb, single_succ (cur_region->entry), 8336 EDGE_ABNORMAL); 8337 8338 /* Create an edge from GIMPLE_OMP_FOR to exit, which 8339 corresponds to the case that the body of the loop 8340 is not executed at all. */ 8341 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); 8342 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); 8343 fallthru = false; 8344 break; 8345 8346 case GIMPLE_OMP_SECTIONS: 8347 /* Wire up the edges into and out of the nested sections. */ 8348 { 8349 basic_block switch_bb = single_succ (cur_region->entry); 8350 8351 struct omp_region *i; 8352 for (i = cur_region->inner; i ; i = i->next) 8353 { 8354 gcc_assert (i->type == GIMPLE_OMP_SECTION); 8355 make_edge (switch_bb, i->entry, 0); 8356 make_edge (i->exit, bb, EDGE_FALLTHRU); 8357 } 8358 8359 /* Make the loopback edge to the block with 8360 GIMPLE_OMP_SECTIONS_SWITCH. */ 8361 make_edge (bb, switch_bb, 0); 8362 8363 /* Make the edge from the switch to exit. */ 8364 make_edge (switch_bb, bb->next_bb, 0); 8365 fallthru = false; 8366 } 8367 break; 8368 8369 case GIMPLE_OMP_TASK: 8370 fallthru = true; 8371 break; 8372 8373 default: 8374 gcc_unreachable (); 8375 } 8376 break; 8377 8378 default: 8379 gcc_unreachable (); 8380 } 8381 8382 if (*region != cur_region) 8383 { 8384 *region = cur_region; 8385 if (cur_region) 8386 *region_idx = cur_region->entry->index; 8387 else 8388 *region_idx = 0; 8389 } 8390 8391 return fallthru; 8392} 8393 8394#include "gt-omp-expand.h" 8395