1/* Transformation Utilities for Loop Vectorization. 2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc. 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to the Free 19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 2002110-1301, USA. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "tm.h" 26#include "ggc.h" 27#include "tree.h" 28#include "target.h" 29#include "rtl.h" 30#include "basic-block.h" 31#include "diagnostic.h" 32#include "tree-flow.h" 33#include "tree-dump.h" 34#include "timevar.h" 35#include "cfgloop.h" 36#include "expr.h" 37#include "optabs.h" 38#include "recog.h" 39#include "tree-data-ref.h" 40#include "tree-chrec.h" 41#include "tree-scalar-evolution.h" 42#include "tree-vectorizer.h" 43#include "langhooks.h" 44#include "tree-pass.h" 45#include "toplev.h" 46#include "real.h" 47 48/* Utility functions for the code transformation. */ 49static bool vect_transform_stmt (tree, block_stmt_iterator *); 50static void vect_align_data_ref (tree); 51static tree vect_create_destination_var (tree, tree); 52static tree vect_create_data_ref_ptr 53 (tree, block_stmt_iterator *, tree, tree *, bool); 54static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); 55static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); 56static tree vect_get_vec_def_for_operand (tree, tree, tree *); 57static tree vect_init_vector (tree, tree); 58static void vect_finish_stmt_generation 59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); 60static bool vect_is_simple_cond (tree, loop_vec_info); 61static void update_vuses_to_preheader (tree, struct loop*); 62static tree get_initial_def_for_reduction (tree, tree, tree *); 63 64/* Utility function dealing with loop peeling (not peeling itself). */ 65static void vect_generate_tmps_on_preheader 66 (loop_vec_info, tree *, tree *, tree *); 67static tree vect_build_loop_niters (loop_vec_info); 68static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge); 69static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); 70static void vect_update_init_of_dr (struct data_reference *, tree niters); 71static void vect_update_inits_of_drs (loop_vec_info, tree); 72static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *); 73static void vect_do_peeling_for_loop_bound 74 (loop_vec_info, tree *, struct loops *); 75static int vect_min_worthwhile_factor (enum tree_code); 76 77 78/* Function vect_get_new_vect_var. 79 80 Returns a name for a new variable. The current naming scheme appends the 81 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to 82 the name of vectorizer generated variables, and appends that to NAME if 83 provided. */ 84 85static tree 86vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) 87{ 88 const char *prefix; 89 tree new_vect_var; 90 91 switch (var_kind) 92 { 93 case vect_simple_var: 94 prefix = "vect_"; 95 break; 96 case vect_scalar_var: 97 prefix = "stmp_"; 98 break; 99 case vect_pointer_var: 100 prefix = "vect_p"; 101 break; 102 default: 103 gcc_unreachable (); 104 } 105 106 if (name) 107 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); 108 else 109 new_vect_var = create_tmp_var (type, prefix); 110 111 return new_vect_var; 112} 113 114 115/* Function vect_create_addr_base_for_vector_ref. 116 117 Create an expression that computes the address of the first memory location 118 that will be accessed for a data reference. 119 120 Input: 121 STMT: The statement containing the data reference. 122 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. 123 OFFSET: Optional. If supplied, it is be added to the initial address. 124 125 Output: 126 1. Return an SSA_NAME whose value is the address of the memory location of 127 the first vector of the data reference. 128 2. If new_stmt_list is not NULL_TREE after return then the caller must insert 129 these statement(s) which define the returned SSA_NAME. 130 131 FORNOW: We are only handling array accesses with step 1. */ 132 133static tree 134vect_create_addr_base_for_vector_ref (tree stmt, 135 tree *new_stmt_list, 136 tree offset) 137{ 138 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 139 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 140 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr)); 141 tree base_name = build_fold_indirect_ref (data_ref_base); 142 tree ref = DR_REF (dr); 143 tree scalar_type = TREE_TYPE (ref); 144 tree scalar_ptr_type = build_pointer_type (scalar_type); 145 tree vec_stmt; 146 tree new_temp; 147 tree addr_base, addr_expr; 148 tree dest, new_stmt; 149 tree base_offset = unshare_expr (DR_OFFSET (dr)); 150 tree init = unshare_expr (DR_INIT (dr)); 151 152 /* Create base_offset */ 153 base_offset = size_binop (PLUS_EXPR, base_offset, init); 154 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off"); 155 add_referenced_tmp_var (dest); 156 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest); 157 append_to_statement_list_force (new_stmt, new_stmt_list); 158 159 if (offset) 160 { 161 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset"); 162 add_referenced_tmp_var (tmp); 163 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, 164 DR_STEP (dr)); 165 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset), 166 base_offset, offset); 167 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp); 168 append_to_statement_list_force (new_stmt, new_stmt_list); 169 } 170 171 /* base + base_offset */ 172 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base, 173 base_offset); 174 175 /* addr_expr = addr_base */ 176 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var, 177 get_name (base_name)); 178 add_referenced_tmp_var (addr_expr); 179 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base); 180 new_temp = make_ssa_name (addr_expr, vec_stmt); 181 TREE_OPERAND (vec_stmt, 0) = new_temp; 182 append_to_statement_list_force (vec_stmt, new_stmt_list); 183 184 if (vect_print_dump_info (REPORT_DETAILS)) 185 { 186 fprintf (vect_dump, "created "); 187 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); 188 } 189 return new_temp; 190} 191 192 193/* Function vect_align_data_ref. 194 195 Handle misalignment of a memory accesses. 196 197 FORNOW: Can't handle misaligned accesses. 198 Make sure that the dataref is aligned. */ 199 200static void 201vect_align_data_ref (tree stmt) 202{ 203 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 204 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 205 206 /* FORNOW: can't handle misaligned accesses; 207 all accesses expected to be aligned. */ 208 gcc_assert (aligned_access_p (dr)); 209} 210 211 212/* Function vect_create_data_ref_ptr. 213 214 Create a memory reference expression for vector access, to be used in a 215 vector load/store stmt. The reference is based on a new pointer to vector 216 type (vp). 217 218 Input: 219 1. STMT: a stmt that references memory. Expected to be of the form 220 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>. 221 2. BSI: block_stmt_iterator where new stmts can be added. 222 3. OFFSET (optional): an offset to be added to the initial address accessed 223 by the data-ref in STMT. 224 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain 225 pointing to the initial address. 226 227 Output: 228 1. Declare a new ptr to vector_type, and have it point to the base of the 229 data reference (initial addressed accessed by the data reference). 230 For example, for vector of type V8HI, the following code is generated: 231 232 v8hi *vp; 233 vp = (v8hi *)initial_address; 234 235 if OFFSET is not supplied: 236 initial_address = &a[init]; 237 if OFFSET is supplied: 238 initial_address = &a[init + OFFSET]; 239 240 Return the initial_address in INITIAL_ADDRESS. 241 242 2. If ONLY_INIT is true, return the initial pointer. Otherwise, create 243 a data-reference in the loop based on the new vector pointer vp. This 244 new data reference will by some means be updated each iteration of 245 the loop. Return the pointer vp'. 246 247 FORNOW: handle only aligned and consecutive accesses. */ 248 249static tree 250vect_create_data_ref_ptr (tree stmt, 251 block_stmt_iterator *bsi ATTRIBUTE_UNUSED, 252 tree offset, tree *initial_address, bool only_init) 253{ 254 tree base_name; 255 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 256 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 257 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 258 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 259 tree vect_ptr_type; 260 tree vect_ptr; 261 tree tag; 262 tree new_temp; 263 tree vec_stmt; 264 tree new_stmt_list = NULL_TREE; 265 edge pe = loop_preheader_edge (loop); 266 basic_block new_bb; 267 tree vect_ptr_init; 268 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 269 270 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr))); 271 272 if (vect_print_dump_info (REPORT_DETAILS)) 273 { 274 tree data_ref_base = base_name; 275 fprintf (vect_dump, "create vector-pointer variable to type: "); 276 print_generic_expr (vect_dump, vectype, TDF_SLIM); 277 if (TREE_CODE (data_ref_base) == VAR_DECL) 278 fprintf (vect_dump, " vectorizing a one dimensional array ref: "); 279 else if (TREE_CODE (data_ref_base) == ARRAY_REF) 280 fprintf (vect_dump, " vectorizing a multidimensional array ref: "); 281 else if (TREE_CODE (data_ref_base) == COMPONENT_REF) 282 fprintf (vect_dump, " vectorizing a record based array ref: "); 283 else if (TREE_CODE (data_ref_base) == SSA_NAME) 284 fprintf (vect_dump, " vectorizing a pointer ref: "); 285 print_generic_expr (vect_dump, base_name, TDF_SLIM); 286 } 287 288 /** (1) Create the new vector-pointer variable: **/ 289 290 vect_ptr_type = build_pointer_type (vectype); 291 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, 292 get_name (base_name)); 293 add_referenced_tmp_var (vect_ptr); 294 295 296 /** (2) Add aliasing information to the new vector-pointer: 297 (The points-to info (DR_PTR_INFO) may be defined later.) **/ 298 299 tag = DR_MEMTAG (dr); 300 gcc_assert (tag); 301 302 /* If tag is a variable (and NOT_A_TAG) than a new type alias 303 tag must be created with tag added to its may alias list. */ 304 if (var_ann (tag)->mem_tag_kind == NOT_A_TAG) 305 new_type_alias (vect_ptr, tag); 306 else 307 var_ann (vect_ptr)->type_mem_tag = tag; 308 309 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr); 310 311 /** (3) Calculate the initial address the vector-pointer, and set 312 the vector-pointer to point to it before the loop: **/ 313 314 /* Create: (&(base[init_val+offset]) in the loop preheader. */ 315 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, 316 offset); 317 pe = loop_preheader_edge (loop); 318 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); 319 gcc_assert (!new_bb); 320 *initial_address = new_temp; 321 322 /* Create: p = (vectype *) initial_base */ 323 vec_stmt = fold_convert (vect_ptr_type, new_temp); 324 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); 325 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt); 326 TREE_OPERAND (vec_stmt, 0) = vect_ptr_init; 327 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt); 328 gcc_assert (!new_bb); 329 330 331 /** (4) Handle the updating of the vector-pointer inside the loop: **/ 332 333 if (only_init) /* No update in loop is required. */ 334 { 335 /* Copy the points-to information if it exists. */ 336 if (DR_PTR_INFO (dr)) 337 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr)); 338 return vect_ptr_init; 339 } 340 else 341 { 342 block_stmt_iterator incr_bsi; 343 bool insert_after; 344 tree indx_before_incr, indx_after_incr; 345 tree incr; 346 347 standard_iv_increment_position (loop, &incr_bsi, &insert_after); 348 create_iv (vect_ptr_init, 349 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)), 350 NULL_TREE, loop, &incr_bsi, insert_after, 351 &indx_before_incr, &indx_after_incr); 352 incr = bsi_stmt (incr_bsi); 353 set_stmt_info ((tree_ann_t)stmt_ann (incr), 354 new_stmt_vec_info (incr, loop_vinfo)); 355 356 /* Copy the points-to information if it exists. */ 357 if (DR_PTR_INFO (dr)) 358 { 359 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr)); 360 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr)); 361 } 362 merge_alias_info (vect_ptr_init, indx_before_incr); 363 merge_alias_info (vect_ptr_init, indx_after_incr); 364 365 return indx_before_incr; 366 } 367} 368 369 370/* Function vect_create_destination_var. 371 372 Create a new temporary of type VECTYPE. */ 373 374static tree 375vect_create_destination_var (tree scalar_dest, tree vectype) 376{ 377 tree vec_dest; 378 const char *new_name; 379 tree type; 380 enum vect_var_kind kind; 381 382 kind = vectype ? vect_simple_var : vect_scalar_var; 383 type = vectype ? vectype : TREE_TYPE (scalar_dest); 384 385 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); 386 387 new_name = get_name (scalar_dest); 388 if (!new_name) 389 new_name = "var_"; 390 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name); 391 add_referenced_tmp_var (vec_dest); 392 393 return vec_dest; 394} 395 396 397/* Function vect_init_vector. 398 399 Insert a new stmt (INIT_STMT) that initializes a new vector variable with 400 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be 401 used in the vectorization of STMT. */ 402 403static tree 404vect_init_vector (tree stmt, tree vector_var) 405{ 406 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 407 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 408 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 409 tree new_var; 410 tree init_stmt; 411 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 412 tree vec_oprnd; 413 edge pe; 414 tree new_temp; 415 basic_block new_bb; 416 417 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_"); 418 add_referenced_tmp_var (new_var); 419 420 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var); 421 new_temp = make_ssa_name (new_var, init_stmt); 422 TREE_OPERAND (init_stmt, 0) = new_temp; 423 424 pe = loop_preheader_edge (loop); 425 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); 426 gcc_assert (!new_bb); 427 428 if (vect_print_dump_info (REPORT_DETAILS)) 429 { 430 fprintf (vect_dump, "created new init_stmt: "); 431 print_generic_expr (vect_dump, init_stmt, TDF_SLIM); 432 } 433 434 vec_oprnd = TREE_OPERAND (init_stmt, 0); 435 return vec_oprnd; 436} 437 438 439/* Function vect_get_vec_def_for_operand. 440 441 OP is an operand in STMT. This function returns a (vector) def that will be 442 used in the vectorized stmt for STMT. 443 444 In the case that OP is an SSA_NAME which is defined in the loop, then 445 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. 446 447 In case OP is an invariant or constant, a new stmt that creates a vector def 448 needs to be introduced. */ 449 450static tree 451vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) 452{ 453 tree vec_oprnd; 454 tree vec_stmt; 455 tree def_stmt; 456 stmt_vec_info def_stmt_info = NULL; 457 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 458 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 459 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 460 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 461 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 462 tree vec_inv; 463 tree vec_cst; 464 tree t = NULL_TREE; 465 tree def; 466 int i; 467 enum vect_def_type dt; 468 bool is_simple_use; 469 470 if (vect_print_dump_info (REPORT_DETAILS)) 471 { 472 fprintf (vect_dump, "vect_get_vec_def_for_operand: "); 473 print_generic_expr (vect_dump, op, TDF_SLIM); 474 } 475 476 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt); 477 gcc_assert (is_simple_use); 478 if (vect_print_dump_info (REPORT_DETAILS)) 479 { 480 if (def) 481 { 482 fprintf (vect_dump, "def = "); 483 print_generic_expr (vect_dump, def, TDF_SLIM); 484 } 485 if (def_stmt) 486 { 487 fprintf (vect_dump, " def_stmt = "); 488 print_generic_expr (vect_dump, def_stmt, TDF_SLIM); 489 } 490 } 491 492 switch (dt) 493 { 494 /* Case 1: operand is a constant. */ 495 case vect_constant_def: 496 { 497 if (scalar_def) 498 *scalar_def = op; 499 500 /* Create 'vect_cst_ = {cst,cst,...,cst}' */ 501 if (vect_print_dump_info (REPORT_DETAILS)) 502 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); 503 504 for (i = nunits - 1; i >= 0; --i) 505 { 506 t = tree_cons (NULL_TREE, op, t); 507 } 508 vec_cst = build_vector (vectype, t); 509 return vect_init_vector (stmt, vec_cst); 510 } 511 512 /* Case 2: operand is defined outside the loop - loop invariant. */ 513 case vect_invariant_def: 514 { 515 if (scalar_def) 516 *scalar_def = def; 517 518 /* Create 'vec_inv = {inv,inv,..,inv}' */ 519 if (vect_print_dump_info (REPORT_DETAILS)) 520 fprintf (vect_dump, "Create vector_inv."); 521 522 for (i = nunits - 1; i >= 0; --i) 523 { 524 t = tree_cons (NULL_TREE, def, t); 525 } 526 527 /* FIXME: use build_constructor directly. */ 528 vec_inv = build_constructor_from_list (vectype, t); 529 return vect_init_vector (stmt, vec_inv); 530 } 531 532 /* Case 3: operand is defined inside the loop. */ 533 case vect_loop_def: 534 { 535 if (scalar_def) 536 *scalar_def = def_stmt; 537 538 /* Get the def from the vectorized stmt. */ 539 def_stmt_info = vinfo_for_stmt (def_stmt); 540 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 541 gcc_assert (vec_stmt); 542 vec_oprnd = TREE_OPERAND (vec_stmt, 0); 543 return vec_oprnd; 544 } 545 546 /* Case 4: operand is defined by a loop header phi - reduction */ 547 case vect_reduction_def: 548 { 549 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); 550 551 /* Get the def before the loop */ 552 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); 553 return get_initial_def_for_reduction (stmt, op, scalar_def); 554 } 555 556 /* Case 5: operand is defined by loop-header phi - induction. */ 557 case vect_induction_def: 558 { 559 if (vect_print_dump_info (REPORT_DETAILS)) 560 fprintf (vect_dump, "induction - unsupported."); 561 internal_error ("no support for induction"); /* FORNOW */ 562 } 563 564 default: 565 gcc_unreachable (); 566 } 567} 568 569 570/* Function vect_finish_stmt_generation. 571 572 Insert a new stmt. */ 573 574static void 575vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) 576{ 577 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); 578 579 if (vect_print_dump_info (REPORT_DETAILS)) 580 { 581 fprintf (vect_dump, "add new stmt: "); 582 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); 583 } 584 585 /* Make sure bsi points to the stmt that is being vectorized. */ 586 gcc_assert (stmt == bsi_stmt (*bsi)); 587 588#ifdef USE_MAPPED_LOCATION 589 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt)); 590#else 591 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt)); 592#endif 593} 594 595 596#define ADJUST_IN_EPILOG 1 597 598/* Function get_initial_def_for_reduction 599 600 Input: 601 STMT - a stmt that performs a reduction operation in the loop. 602 INIT_VAL - the initial value of the reduction variable 603 604 Output: 605 SCALAR_DEF - a tree that holds a value to be added to the final result 606 of the reduction (used for "ADJUST_IN_EPILOG" - see below). 607 Return a vector variable, initialized according to the operation that STMT 608 performs. This vector will be used as the initial value of the 609 vector of partial results. 610 611 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows: 612 add: [0,0,...,0,0] 613 mult: [1,1,...,1,1] 614 min/max: [init_val,init_val,..,init_val,init_val] 615 bit and/or: [init_val,init_val,..,init_val,init_val] 616 and when necessary (e.g. add/mult case) let the caller know 617 that it needs to adjust the result by init_val. 618 619 Option2: Initialize the vector as follows: 620 add: [0,0,...,0,init_val] 621 mult: [1,1,...,1,init_val] 622 min/max: [init_val,init_val,...,init_val] 623 bit and/or: [init_val,init_val,...,init_val] 624 and no adjustments are needed. 625 626 For example, for the following code: 627 628 s = init_val; 629 for (i=0;i<n;i++) 630 s = s + a[i]; 631 632 STMT is 's = s + a[i]', and the reduction variable is 's'. 633 For a vector of 4 units, we want to return either [0,0,0,init_val], 634 or [0,0,0,0] and let the caller know that it needs to adjust 635 the result at the end by 'init_val'. 636 637 FORNOW: We use the "ADJUST_IN_EPILOG" scheme. 638 TODO: Use some cost-model to estimate which scheme is more profitable. 639*/ 640 641static tree 642get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def) 643{ 644 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 645 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 646 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); 647 int nelements; 648 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); 649 tree type = TREE_TYPE (init_val); 650 tree def; 651 tree vec, t = NULL_TREE; 652 bool need_epilog_adjust; 653 int i; 654 655 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)); 656 657 switch (code) 658 { 659 case PLUS_EXPR: 660 if (INTEGRAL_TYPE_P (type)) 661 def = build_int_cst (type, 0); 662 else 663 def = build_real (type, dconst0); 664 665#ifdef ADJUST_IN_EPILOG 666 /* All the 'nunits' elements are set to 0. The final result will be 667 adjusted by 'init_val' at the loop epilog. */ 668 nelements = nunits; 669 need_epilog_adjust = true; 670#else 671 /* 'nunits - 1' elements are set to 0; The last element is set to 672 'init_val'. No further adjustments at the epilog are needed. */ 673 nelements = nunits - 1; 674 need_epilog_adjust = false; 675#endif 676 break; 677 678 case MIN_EXPR: 679 case MAX_EXPR: 680 def = init_val; 681 nelements = nunits; 682 need_epilog_adjust = false; 683 break; 684 685 default: 686 gcc_unreachable (); 687 } 688 689 for (i = nelements - 1; i >= 0; --i) 690 t = tree_cons (NULL_TREE, def, t); 691 692 if (nelements == nunits - 1) 693 { 694 /* Set the last element of the vector. */ 695 t = tree_cons (NULL_TREE, init_val, t); 696 nelements += 1; 697 } 698 gcc_assert (nelements == nunits); 699 700 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST) 701 vec = build_vector (vectype, t); 702 else 703 vec = build_constructor_from_list (vectype, t); 704 705 if (!need_epilog_adjust) 706 *scalar_def = NULL_TREE; 707 else 708 *scalar_def = init_val; 709 710 return vect_init_vector (stmt, vec); 711} 712 713 714/* Function vect_create_epilog_for_reduction: 715 716 Create code at the loop-epilog to finalize the result of a reduction 717 computation. 718 719 LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it 720 into a single result, by applying the operation REDUC_CODE on the 721 partial-results-vector. For this, we need to create a new phi node at the 722 loop exit to preserve loop-closed form, as illustrated below. 723 724 STMT is the original scalar reduction stmt that is being vectorized. 725 REDUCTION_OP is the scalar reduction-variable. 726 REDUCTION_PHI is the phi-node that carries the reduction computation. 727 This function also sets the arguments for the REDUCTION_PHI: 728 The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP. 729 The loop-latch argument is VECT_DEF - the vector of partial sums. 730 731 This function transforms this: 732 733 loop: 734 vec_def = phi <null, null> # REDUCTION_PHI 735 .... 736 VECT_DEF = ... 737 738 loop_exit: 739 s_out0 = phi <s_loop> # EXIT_PHI 740 741 use <s_out0> 742 use <s_out0> 743 744 Into: 745 746 loop: 747 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI 748 .... 749 VECT_DEF = ... 750 751 loop_exit: 752 s_out0 = phi <s_loop> # EXIT_PHI 753 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI 754 755 v_out2 = reduc_expr <v_out1> 756 s_out3 = extract_field <v_out2, 0> 757 758 use <s_out3> 759 use <s_out3> 760*/ 761 762static void 763vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op, 764 enum tree_code reduc_code, tree reduction_phi) 765{ 766 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 767 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 768 enum machine_mode mode = TYPE_MODE (vectype); 769 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 770 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 771 basic_block exit_bb; 772 tree scalar_dest = TREE_OPERAND (stmt, 0); 773 tree scalar_type = TREE_TYPE (scalar_dest); 774 tree new_phi; 775 block_stmt_iterator exit_bsi; 776 tree vec_dest; 777 tree new_temp; 778 tree new_name; 779 tree epilog_stmt; 780 tree new_scalar_dest, exit_phi; 781 tree bitsize, bitpos, bytesize; 782 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); 783 tree scalar_initial_def; 784 tree vec_initial_def; 785 tree orig_name; 786 imm_use_iterator imm_iter; 787 use_operand_p use_p; 788 bool extract_scalar_result; 789 790 /*** 1. Create the reduction def-use cycle ***/ 791 792 /* 1.1 set the loop-entry arg of the reduction-phi: */ 793 /* For the case of reduction, vect_get_vec_def_for_operand returns 794 the scalar def before the loop, that defines the initial value 795 of the reduction variable. */ 796 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, 797 &scalar_initial_def); 798 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop)); 799 800 801 /* 1.2 set the loop-latch arg for the reduction-phi: */ 802 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop)); 803 804 if (vect_print_dump_info (REPORT_DETAILS)) 805 { 806 fprintf (vect_dump, "transform reduction: created def-use cycle:"); 807 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM); 808 fprintf (vect_dump, "\n"); 809 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM); 810 } 811 812 813 /*** 2. Create epilog code ***/ 814 815 /* 2.1 Create new loop-exit-phi to preserve loop-closed form: 816 v_out1 = phi <v_loop> */ 817 818 exit_bb = loop->single_exit->dest; 819 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); 820 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def); 821 822 exit_bsi = bsi_start (exit_bb); 823 824 825 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); 826 bitsize = TYPE_SIZE (scalar_type); 827 bytesize = TYPE_SIZE_UNIT (scalar_type); 828 829 /* 2.2 Create the reduction code. */ 830 831 if (reduc_code < NUM_TREE_CODES) 832 { 833 /*** Case 1: Create: 834 v_out2 = reduc_expr <v_out1> */ 835 836 if (vect_print_dump_info (REPORT_DETAILS)) 837 fprintf (vect_dump, "Reduce using direct vector reduction."); 838 839 vec_dest = vect_create_destination_var (scalar_dest, vectype); 840 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 841 build1 (reduc_code, vectype, PHI_RESULT (new_phi))); 842 new_temp = make_ssa_name (vec_dest, epilog_stmt); 843 TREE_OPERAND (epilog_stmt, 0) = new_temp; 844 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 845 846 extract_scalar_result = true; 847 } 848 else 849 { 850 enum tree_code shift_code = 0; 851 bool have_whole_vector_shift = true; 852 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */ 853 int bit_offset; 854 int element_bitsize = tree_low_cst (bitsize, 1); 855 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); 856 tree vec_temp; 857 858 /* The result of the reduction is expected to be at the least 859 significant bits of the vector. This is merely convention, 860 as it's the extraction later that really matters, and that 861 is also under our control. */ 862 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing) 863 shift_code = VEC_RSHIFT_EXPR; 864 else 865 have_whole_vector_shift = false; 866 867 /* Regardless of whether we have a whole vector shift, if we're 868 emulating the operation via tree-vect-generic, we don't want 869 to use it. Only the first round of the reduction is likely 870 to still be profitable via emulation. */ 871 /* ??? It might be better to emit a reduction tree code here, so that 872 tree-vect-generic can expand the first round via bit tricks. */ 873 if (!VECTOR_MODE_P (mode)) 874 have_whole_vector_shift = false; 875 else 876 { 877 optab optab = optab_for_tree_code (code, vectype); 878 if (optab->handlers[mode].insn_code == CODE_FOR_nothing) 879 have_whole_vector_shift = false; 880 } 881 882 if (have_whole_vector_shift) 883 { 884 /*** Case 2: 885 for (offset = VS/2; offset >= element_size; offset/=2) 886 { 887 Create: va' = vec_shift <va, offset> 888 Create: va = vop <va, va'> 889 } */ 890 891 if (vect_print_dump_info (REPORT_DETAILS)) 892 fprintf (vect_dump, "Reduce using vector shifts"); 893 894 vec_dest = vect_create_destination_var (scalar_dest, vectype); 895 new_temp = PHI_RESULT (new_phi); 896 897 for (bit_offset = vec_size_in_bits/2; 898 bit_offset >= element_bitsize; 899 bit_offset /= 2) 900 { 901 tree bitpos = size_int (bit_offset); 902 903 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 904 build2 (shift_code, vectype, new_temp, bitpos)); 905 new_name = make_ssa_name (vec_dest, epilog_stmt); 906 TREE_OPERAND (epilog_stmt, 0) = new_name; 907 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 908 if (vect_print_dump_info (REPORT_DETAILS)) 909 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); 910 911 912 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 913 build2 (code, vectype, new_name, new_temp)); 914 new_temp = make_ssa_name (vec_dest, epilog_stmt); 915 TREE_OPERAND (epilog_stmt, 0) = new_temp; 916 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 917 if (vect_print_dump_info (REPORT_DETAILS)) 918 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); 919 } 920 921 extract_scalar_result = true; 922 } 923 else 924 { 925 tree rhs; 926 927 /*** Case 3: 928 Create: 929 s = extract_field <v_out2, 0> 930 for (offset=element_size; offset<vector_size; offset+=element_size;) 931 { 932 Create: s' = extract_field <v_out2, offset> 933 Create: s = op <s, s'> 934 } */ 935 936 if (vect_print_dump_info (REPORT_DETAILS)) 937 fprintf (vect_dump, "Reduce using scalar code. "); 938 939 vec_temp = PHI_RESULT (new_phi); 940 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); 941 942 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, 943 bitsize_zero_node); 944 945 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); 946 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, 947 rhs); 948 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 949 TREE_OPERAND (epilog_stmt, 0) = new_temp; 950 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 951 if (vect_print_dump_info (REPORT_DETAILS)) 952 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); 953 954 for (bit_offset = element_bitsize; 955 bit_offset < vec_size_in_bits; 956 bit_offset += element_bitsize) 957 { 958 tree bitpos = bitsize_int (bit_offset); 959 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, 960 bitpos); 961 962 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); 963 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, 964 rhs); 965 new_name = make_ssa_name (new_scalar_dest, epilog_stmt); 966 TREE_OPERAND (epilog_stmt, 0) = new_name; 967 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 968 if (vect_print_dump_info (REPORT_DETAILS)) 969 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); 970 971 972 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, 973 build2 (code, scalar_type, new_name, new_temp)); 974 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 975 TREE_OPERAND (epilog_stmt, 0) = new_temp; 976 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 977 if (vect_print_dump_info (REPORT_DETAILS)) 978 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); 979 } 980 981 extract_scalar_result = false; 982 } 983 } 984 985 986 /* 2.3 Extract the final scalar result. Create: 987 s_out3 = extract_field <v_out2, bitpos> */ 988 989 if (extract_scalar_result) 990 { 991 tree rhs; 992 993 if (vect_print_dump_info (REPORT_DETAILS)) 994 fprintf (vect_dump, "extract scalar result"); 995 996 /* The result is in the low order bits. */ 997 if (BYTES_BIG_ENDIAN) 998 bitpos = size_binop (MULT_EXPR, 999 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1), 1000 TYPE_SIZE (scalar_type)); 1001 else 1002 bitpos = bitsize_zero_node; 1003 1004 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos); 1005 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); 1006 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs); 1007 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 1008 TREE_OPERAND (epilog_stmt, 0) = new_temp; 1009 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1010 if (vect_print_dump_info (REPORT_DETAILS)) 1011 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); 1012 } 1013 1014 1015 /* 2.4 Adjust the final result by the initial value of the reduction 1016 variable. (when such adjustment is not needed, then 1017 'scalar_initial_def' is zero). 1018 1019 Create: 1020 s_out = scalar_expr <s_out, scalar_initial_def> */ 1021 1022 if (scalar_initial_def) 1023 { 1024 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, 1025 build2 (code, scalar_type, new_temp, scalar_initial_def)); 1026 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 1027 TREE_OPERAND (epilog_stmt, 0) = new_temp; 1028 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1029 1030 if (vect_print_dump_info (REPORT_DETAILS)) 1031 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); 1032 } 1033 1034 1035 /* 2.5 Replace uses of s_out0 with uses of s_out3 */ 1036 1037 /* Find the loop-closed-use at the loop exit of the original 1038 scalar result. (The reduction result is expected to have 1039 two immediate uses - one at the latch block, and one at the 1040 loop exit). */ 1041 exit_phi = NULL; 1042 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) 1043 { 1044 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p)))) 1045 { 1046 exit_phi = USE_STMT (use_p); 1047 break; 1048 } 1049 } 1050 1051 orig_name = PHI_RESULT (exit_phi); 1052 1053 FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name) 1054 SET_USE (use_p, new_temp); 1055} 1056 1057 1058/* Function vectorizable_reduction. 1059 1060 Check if STMT performs a reduction operation that can be vectorized. 1061 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1062 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1063 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1064 1065bool 1066vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1067{ 1068 tree vec_dest; 1069 tree scalar_dest; 1070 tree op0, op1; 1071 tree loop_vec_def; 1072 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1073 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1074 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1075 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1076 tree operation; 1077 enum tree_code code, reduc_code = 0; 1078 enum machine_mode vec_mode; 1079 int op_type; 1080 optab optab, reduc_optab; 1081 tree new_temp; 1082 tree def0, def1, def_stmt0, def_stmt1; 1083 enum vect_def_type dt0, dt1; 1084 tree new_phi; 1085 tree scalar_type; 1086 bool is_simple_use0; 1087 bool is_simple_use1; 1088 1089 /* Is vectorizable reduction? */ 1090 1091 /* Not supportable if the reduction variable is used in the loop. */ 1092 if (STMT_VINFO_RELEVANT_P (stmt_info)) 1093 return false; 1094 1095 if (!STMT_VINFO_LIVE_P (stmt_info)) 1096 return false; 1097 1098 /* Make sure it was already recognized as a reduction pattern. */ 1099 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def) 1100 return false; 1101 1102 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); 1103 1104 operation = TREE_OPERAND (stmt, 1); 1105 code = TREE_CODE (operation); 1106 op_type = TREE_CODE_LENGTH (code); 1107 1108 if (op_type != binary_op) 1109 return false; 1110 1111 op0 = TREE_OPERAND (operation, 0); 1112 op1 = TREE_OPERAND (operation, 1); 1113 scalar_dest = TREE_OPERAND (stmt, 0); 1114 scalar_type = TREE_TYPE (scalar_dest); 1115 1116 /* Check the first operand. It is expected to be defined inside the loop. */ 1117 is_simple_use0 = 1118 vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0); 1119 is_simple_use1 = 1120 vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1); 1121 1122 gcc_assert (is_simple_use0); 1123 gcc_assert (is_simple_use1); 1124 gcc_assert (dt0 == vect_loop_def); 1125 gcc_assert (dt1 == vect_reduction_def); 1126 gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE); 1127 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1)); 1128 1129 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1))) 1130 return false; 1131 1132 /* Supportable by target? */ 1133 1134 /* check support for the operation in the loop */ 1135 optab = optab_for_tree_code (code, vectype); 1136 if (!optab) 1137 { 1138 if (vect_print_dump_info (REPORT_DETAILS)) 1139 fprintf (vect_dump, "no optab."); 1140 return false; 1141 } 1142 vec_mode = TYPE_MODE (vectype); 1143 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) 1144 { 1145 if (vect_print_dump_info (REPORT_DETAILS)) 1146 fprintf (vect_dump, "op not supported by target."); 1147 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 1148 || LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1149 < vect_min_worthwhile_factor (code)) 1150 return false; 1151 if (vect_print_dump_info (REPORT_DETAILS)) 1152 fprintf (vect_dump, "proceeding using word mode."); 1153 } 1154 1155 /* Worthwhile without SIMD support? */ 1156 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 1157 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1158 < vect_min_worthwhile_factor (code)) 1159 { 1160 if (vect_print_dump_info (REPORT_DETAILS)) 1161 fprintf (vect_dump, "not worthwhile without SIMD support."); 1162 return false; 1163 } 1164 1165 /* check support for the epilog operation */ 1166 if (!reduction_code_for_scalar_code (code, &reduc_code)) 1167 return false; 1168 reduc_optab = optab_for_tree_code (reduc_code, vectype); 1169 if (!reduc_optab) 1170 { 1171 if (vect_print_dump_info (REPORT_DETAILS)) 1172 fprintf (vect_dump, "no optab for reduction."); 1173 reduc_code = NUM_TREE_CODES; 1174 } 1175 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) 1176 { 1177 if (vect_print_dump_info (REPORT_DETAILS)) 1178 fprintf (vect_dump, "reduc op not supported by target."); 1179 reduc_code = NUM_TREE_CODES; 1180 } 1181 1182 if (!vec_stmt) /* transformation not required. */ 1183 { 1184 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; 1185 return true; 1186 } 1187 1188 /** Transform. **/ 1189 1190 if (vect_print_dump_info (REPORT_DETAILS)) 1191 fprintf (vect_dump, "transform reduction."); 1192 1193 /* Create the destination vector */ 1194 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1195 1196 1197 /* Create the reduction-phi that defines the reduction-operand. */ 1198 new_phi = create_phi_node (vec_dest, loop->header); 1199 1200 1201 /* Prepare the operand that is defined inside the loop body */ 1202 loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL); 1203 1204 /* Create the vectorized operation that computes the partial results */ 1205 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 1206 build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi))); 1207 new_temp = make_ssa_name (vec_dest, *vec_stmt); 1208 TREE_OPERAND (*vec_stmt, 0) = new_temp; 1209 vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1210 1211 1212 /* Finalize the reduction-phi (set it's arguments) and create the 1213 epilog reduction code. */ 1214 vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi); 1215 return true; 1216} 1217 1218 1219/* Function vectorizable_assignment. 1220 1221 Check if STMT performs an assignment (copy) that can be vectorized. 1222 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1223 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1224 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1225 1226bool 1227vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1228{ 1229 tree vec_dest; 1230 tree scalar_dest; 1231 tree op; 1232 tree vec_oprnd; 1233 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1234 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1236 tree new_temp; 1237 tree def, def_stmt; 1238 enum vect_def_type dt; 1239 1240 /* Is vectorizable assignment? */ 1241 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1242 return false; 1243 1244 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 1245 1246 if (TREE_CODE (stmt) != MODIFY_EXPR) 1247 return false; 1248 1249 scalar_dest = TREE_OPERAND (stmt, 0); 1250 if (TREE_CODE (scalar_dest) != SSA_NAME) 1251 return false; 1252 1253 op = TREE_OPERAND (stmt, 1); 1254 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 1255 { 1256 if (vect_print_dump_info (REPORT_DETAILS)) 1257 fprintf (vect_dump, "use not simple."); 1258 return false; 1259 } 1260 1261 if (!vec_stmt) /* transformation not required. */ 1262 { 1263 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 1264 return true; 1265 } 1266 1267 /** Transform. **/ 1268 if (vect_print_dump_info (REPORT_DETAILS)) 1269 fprintf (vect_dump, "transform assignment."); 1270 1271 /* Handle def. */ 1272 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1273 1274 /* Handle use. */ 1275 op = TREE_OPERAND (stmt, 1); 1276 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL); 1277 1278 /* Arguments are ready. create the new vector stmt. */ 1279 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); 1280 new_temp = make_ssa_name (vec_dest, *vec_stmt); 1281 TREE_OPERAND (*vec_stmt, 0) = new_temp; 1282 vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1283 1284 return true; 1285} 1286 1287 1288/* Function vect_min_worthwhile_factor. 1289 1290 For a loop where we could vectorize the operation indicated by CODE, 1291 return the minimum vectorization factor that makes it worthwhile 1292 to use generic vectors. */ 1293static int 1294vect_min_worthwhile_factor (enum tree_code code) 1295{ 1296 switch (code) 1297 { 1298 case PLUS_EXPR: 1299 case MINUS_EXPR: 1300 case NEGATE_EXPR: 1301 return 4; 1302 1303 case BIT_AND_EXPR: 1304 case BIT_IOR_EXPR: 1305 case BIT_XOR_EXPR: 1306 case BIT_NOT_EXPR: 1307 return 2; 1308 1309 default: 1310 return INT_MAX; 1311 } 1312} 1313 1314 1315/* Function vectorizable_operation. 1316 1317 Check if STMT performs a binary or unary operation that can be vectorized. 1318 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1319 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1320 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1321 1322bool 1323vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1324{ 1325 tree vec_dest; 1326 tree scalar_dest; 1327 tree operation; 1328 tree op0, op1 = NULL; 1329 tree vec_oprnd0, vec_oprnd1=NULL; 1330 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1331 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1332 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1333 int i; 1334 enum tree_code code; 1335 enum machine_mode vec_mode; 1336 tree new_temp; 1337 int op_type; 1338 tree op; 1339 optab optab; 1340 int icode; 1341 enum machine_mode optab_op2_mode; 1342 tree def, def_stmt; 1343 enum vect_def_type dt; 1344 1345 /* Is STMT a vectorizable binary/unary operation? */ 1346 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1347 return false; 1348 1349 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 1350 1351 if (STMT_VINFO_LIVE_P (stmt_info)) 1352 { 1353 /* FORNOW: not yet supported. */ 1354 if (vect_print_dump_info (REPORT_DETAILS)) 1355 fprintf (vect_dump, "value used after loop."); 1356 return false; 1357 } 1358 1359 if (TREE_CODE (stmt) != MODIFY_EXPR) 1360 return false; 1361 1362 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) 1363 return false; 1364 1365 operation = TREE_OPERAND (stmt, 1); 1366 code = TREE_CODE (operation); 1367 optab = optab_for_tree_code (code, vectype); 1368 1369 /* Support only unary or binary operations. */ 1370 op_type = TREE_CODE_LENGTH (code); 1371 if (op_type != unary_op && op_type != binary_op) 1372 { 1373 if (vect_print_dump_info (REPORT_DETAILS)) 1374 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type); 1375 return false; 1376 } 1377 1378 for (i = 0; i < op_type; i++) 1379 { 1380 op = TREE_OPERAND (operation, i); 1381 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 1382 { 1383 if (vect_print_dump_info (REPORT_DETAILS)) 1384 fprintf (vect_dump, "use not simple."); 1385 return false; 1386 } 1387 } 1388 1389 /* Supportable by target? */ 1390 if (!optab) 1391 { 1392 if (vect_print_dump_info (REPORT_DETAILS)) 1393 fprintf (vect_dump, "no optab."); 1394 return false; 1395 } 1396 vec_mode = TYPE_MODE (vectype); 1397 icode = (int) optab->handlers[(int) vec_mode].insn_code; 1398 if (icode == CODE_FOR_nothing) 1399 { 1400 if (vect_print_dump_info (REPORT_DETAILS)) 1401 fprintf (vect_dump, "op not supported by target."); 1402 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 1403 || LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1404 < vect_min_worthwhile_factor (code)) 1405 return false; 1406 if (vect_print_dump_info (REPORT_DETAILS)) 1407 fprintf (vect_dump, "proceeding using word mode."); 1408 } 1409 1410 /* Worthwhile without SIMD support? */ 1411 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 1412 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1413 < vect_min_worthwhile_factor (code)) 1414 { 1415 if (vect_print_dump_info (REPORT_DETAILS)) 1416 fprintf (vect_dump, "not worthwhile without SIMD support."); 1417 return false; 1418 } 1419 1420 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR) 1421 { 1422 /* FORNOW: not yet supported. */ 1423 if (!VECTOR_MODE_P (vec_mode)) 1424 return false; 1425 1426 /* Invariant argument is needed for a vector shift 1427 by a scalar shift operand. */ 1428 optab_op2_mode = insn_data[icode].operand[2].mode; 1429 if (! (VECTOR_MODE_P (optab_op2_mode) 1430 || dt == vect_constant_def 1431 || dt == vect_invariant_def)) 1432 { 1433 if (vect_print_dump_info (REPORT_DETAILS)) 1434 fprintf (vect_dump, "operand mode requires invariant argument."); 1435 return false; 1436 } 1437 } 1438 1439 if (!vec_stmt) /* transformation not required. */ 1440 { 1441 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; 1442 return true; 1443 } 1444 1445 /** Transform. **/ 1446 1447 if (vect_print_dump_info (REPORT_DETAILS)) 1448 fprintf (vect_dump, "transform binary/unary operation."); 1449 1450 /* Handle def. */ 1451 scalar_dest = TREE_OPERAND (stmt, 0); 1452 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1453 1454 /* Handle uses. */ 1455 op0 = TREE_OPERAND (operation, 0); 1456 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); 1457 1458 if (op_type == binary_op) 1459 { 1460 op1 = TREE_OPERAND (operation, 1); 1461 1462 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR) 1463 { 1464 /* Vector shl and shr insn patterns can be defined with 1465 scalar operand 2 (shift operand). In this case, use 1466 constant or loop invariant op1 directly, without 1467 extending it to vector mode first. */ 1468 1469 optab_op2_mode = insn_data[icode].operand[2].mode; 1470 if (!VECTOR_MODE_P (optab_op2_mode)) 1471 { 1472 if (vect_print_dump_info (REPORT_DETAILS)) 1473 fprintf (vect_dump, "operand 1 using scalar mode."); 1474 vec_oprnd1 = op1; 1475 } 1476 } 1477 1478 if (!vec_oprnd1) 1479 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); 1480 } 1481 1482 /* Arguments are ready. create the new vector stmt. */ 1483 1484 if (op_type == binary_op) 1485 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 1486 build2 (code, vectype, vec_oprnd0, vec_oprnd1)); 1487 else 1488 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 1489 build1 (code, vectype, vec_oprnd0)); 1490 new_temp = make_ssa_name (vec_dest, *vec_stmt); 1491 TREE_OPERAND (*vec_stmt, 0) = new_temp; 1492 vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1493 1494 return true; 1495} 1496 1497 1498/* Function vectorizable_store. 1499 1500 Check if STMT defines a non scalar data-ref (array/pointer/structure) that 1501 can be vectorized. 1502 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1503 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1504 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1505 1506bool 1507vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1508{ 1509 tree scalar_dest; 1510 tree data_ref; 1511 tree op; 1512 tree vec_oprnd1; 1513 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1514 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 1515 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1516 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1517 enum machine_mode vec_mode; 1518 tree dummy; 1519 enum dr_alignment_support alignment_support_cheme; 1520 ssa_op_iter iter; 1521 tree def, def_stmt; 1522 enum vect_def_type dt; 1523 1524 /* Is vectorizable store? */ 1525 1526 if (TREE_CODE (stmt) != MODIFY_EXPR) 1527 return false; 1528 1529 scalar_dest = TREE_OPERAND (stmt, 0); 1530 if (TREE_CODE (scalar_dest) != ARRAY_REF 1531 && TREE_CODE (scalar_dest) != INDIRECT_REF) 1532 return false; 1533 1534 op = TREE_OPERAND (stmt, 1); 1535 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 1536 { 1537 if (vect_print_dump_info (REPORT_DETAILS)) 1538 fprintf (vect_dump, "use not simple."); 1539 return false; 1540 } 1541 1542 vec_mode = TYPE_MODE (vectype); 1543 /* FORNOW. In some cases can vectorize even if data-type not supported 1544 (e.g. - array initialization with 0). */ 1545 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) 1546 return false; 1547 1548 if (!STMT_VINFO_DATA_REF (stmt_info)) 1549 return false; 1550 1551 1552 if (!vec_stmt) /* transformation not required. */ 1553 { 1554 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; 1555 return true; 1556 } 1557 1558 /** Transform. **/ 1559 1560 if (vect_print_dump_info (REPORT_DETAILS)) 1561 fprintf (vect_dump, "transform store"); 1562 1563 alignment_support_cheme = vect_supportable_dr_alignment (dr); 1564 gcc_assert (alignment_support_cheme); 1565 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */ 1566 1567 /* Handle use - get the vectorized def from the defining stmt. */ 1568 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL); 1569 1570 /* Handle def. */ 1571 /* FORNOW: make sure the data reference is aligned. */ 1572 vect_align_data_ref (stmt); 1573 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); 1574 data_ref = build_fold_indirect_ref (data_ref); 1575 1576 /* Arguments are ready. create the new vector stmt. */ 1577 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1); 1578 vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1579 1580 /* Copy the V_MAY_DEFS representing the aliasing of the original array 1581 element's definition to the vector's definition then update the 1582 defining statement. The original is being deleted so the same 1583 SSA_NAMEs can be used. */ 1584 copy_virtual_operands (*vec_stmt, stmt); 1585 1586 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF) 1587 { 1588 SSA_NAME_DEF_STMT (def) = *vec_stmt; 1589 1590 /* If this virtual def has a use outside the loop and a loop peel is 1591 performed then the def may be renamed by the peel. Mark it for 1592 renaming so the later use will also be renamed. */ 1593 mark_sym_for_renaming (SSA_NAME_VAR (def)); 1594 } 1595 1596 return true; 1597} 1598 1599 1600/* vectorizable_load. 1601 1602 Check if STMT reads a non scalar data-ref (array/pointer/structure) that 1603 can be vectorized. 1604 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1605 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1606 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1607 1608bool 1609vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1610{ 1611 tree scalar_dest; 1612 tree vec_dest = NULL; 1613 tree data_ref = NULL; 1614 tree op; 1615 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1616 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 1617 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1618 tree new_temp; 1619 int mode; 1620 tree init_addr; 1621 tree new_stmt; 1622 tree dummy; 1623 basic_block new_bb; 1624 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1625 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1626 edge pe = loop_preheader_edge (loop); 1627 enum dr_alignment_support alignment_support_cheme; 1628 1629 /* Is vectorizable load? */ 1630 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1631 return false; 1632 1633 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 1634 1635 if (STMT_VINFO_LIVE_P (stmt_info)) 1636 { 1637 /* FORNOW: not yet supported. */ 1638 if (vect_print_dump_info (REPORT_DETAILS)) 1639 fprintf (vect_dump, "value used after loop."); 1640 return false; 1641 } 1642 1643 if (TREE_CODE (stmt) != MODIFY_EXPR) 1644 return false; 1645 1646 scalar_dest = TREE_OPERAND (stmt, 0); 1647 if (TREE_CODE (scalar_dest) != SSA_NAME) 1648 return false; 1649 1650 op = TREE_OPERAND (stmt, 1); 1651 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF) 1652 return false; 1653 1654 if (!STMT_VINFO_DATA_REF (stmt_info)) 1655 return false; 1656 1657 mode = (int) TYPE_MODE (vectype); 1658 1659 /* FORNOW. In some cases can vectorize even if data-type not supported 1660 (e.g. - data copies). */ 1661 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing) 1662 { 1663 if (vect_print_dump_info (REPORT_DETAILS)) 1664 fprintf (vect_dump, "Aligned load, but unsupported type."); 1665 return false; 1666 } 1667 1668 if (!vec_stmt) /* transformation not required. */ 1669 { 1670 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 1671 return true; 1672 } 1673 1674 /** Transform. **/ 1675 1676 if (vect_print_dump_info (REPORT_DETAILS)) 1677 fprintf (vect_dump, "transform load."); 1678 1679 alignment_support_cheme = vect_supportable_dr_alignment (dr); 1680 gcc_assert (alignment_support_cheme); 1681 1682 if (alignment_support_cheme == dr_aligned 1683 || alignment_support_cheme == dr_unaligned_supported) 1684 { 1685 /* Create: 1686 p = initial_addr; 1687 indx = 0; 1688 loop { 1689 vec_dest = *(p); 1690 indx = indx + 1; 1691 } 1692 */ 1693 1694 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1695 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); 1696 if (aligned_access_p (dr)) 1697 data_ref = build_fold_indirect_ref (data_ref); 1698 else 1699 { 1700 int mis = DR_MISALIGNMENT (dr); 1701 tree tmis = (mis == -1 ? size_zero_node : size_int (mis)); 1702 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT)); 1703 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis); 1704 } 1705 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); 1706 new_temp = make_ssa_name (vec_dest, new_stmt); 1707 TREE_OPERAND (new_stmt, 0) = new_temp; 1708 vect_finish_stmt_generation (stmt, new_stmt, bsi); 1709 copy_virtual_operands (new_stmt, stmt); 1710 } 1711 else if (alignment_support_cheme == dr_unaligned_software_pipeline) 1712 { 1713 /* Create: 1714 p1 = initial_addr; 1715 msq_init = *(floor(p1)) 1716 p2 = initial_addr + VS - 1; 1717 magic = have_builtin ? builtin_result : initial_address; 1718 indx = 0; 1719 loop { 1720 p2' = p2 + indx * vectype_size 1721 lsq = *(floor(p2')) 1722 vec_dest = realign_load (msq, lsq, magic) 1723 indx = indx + 1; 1724 msq = lsq; 1725 } 1726 */ 1727 1728 tree offset; 1729 tree magic; 1730 tree phi_stmt; 1731 tree msq_init; 1732 tree msq, lsq; 1733 tree dataref_ptr; 1734 tree params; 1735 1736 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */ 1737 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1738 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, 1739 &init_addr, true); 1740 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref); 1741 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); 1742 new_temp = make_ssa_name (vec_dest, new_stmt); 1743 TREE_OPERAND (new_stmt, 0) = new_temp; 1744 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); 1745 gcc_assert (!new_bb); 1746 msq_init = TREE_OPERAND (new_stmt, 0); 1747 copy_virtual_operands (new_stmt, stmt); 1748 update_vuses_to_preheader (new_stmt, loop); 1749 1750 1751 /* <2> Create lsq = *(floor(p2')) in the loop */ 1752 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); 1753 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1754 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false); 1755 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); 1756 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); 1757 new_temp = make_ssa_name (vec_dest, new_stmt); 1758 TREE_OPERAND (new_stmt, 0) = new_temp; 1759 vect_finish_stmt_generation (stmt, new_stmt, bsi); 1760 lsq = TREE_OPERAND (new_stmt, 0); 1761 copy_virtual_operands (new_stmt, stmt); 1762 1763 1764 /* <3> */ 1765 if (targetm.vectorize.builtin_mask_for_load) 1766 { 1767 /* Create permutation mask, if required, in loop preheader. */ 1768 tree builtin_decl; 1769 params = build_tree_list (NULL_TREE, init_addr); 1770 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1771 builtin_decl = targetm.vectorize.builtin_mask_for_load (); 1772 new_stmt = build_function_call_expr (builtin_decl, params); 1773 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); 1774 new_temp = make_ssa_name (vec_dest, new_stmt); 1775 TREE_OPERAND (new_stmt, 0) = new_temp; 1776 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); 1777 gcc_assert (!new_bb); 1778 magic = TREE_OPERAND (new_stmt, 0); 1779 1780 /* The result of the CALL_EXPR to this builtin is determined from 1781 the value of the parameter and no global variables are touched 1782 which makes the builtin a "const" function. Requiring the 1783 builtin to have the "const" attribute makes it unnecessary 1784 to call mark_call_clobbered. */ 1785 gcc_assert (TREE_READONLY (builtin_decl)); 1786 } 1787 else 1788 { 1789 /* Use current address instead of init_addr for reduced reg pressure. 1790 */ 1791 magic = dataref_ptr; 1792 } 1793 1794 1795 /* <4> Create msq = phi <msq_init, lsq> in loop */ 1796 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1797 msq = make_ssa_name (vec_dest, NULL_TREE); 1798 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */ 1799 SSA_NAME_DEF_STMT (msq) = phi_stmt; 1800 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop)); 1801 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop)); 1802 1803 1804 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */ 1805 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1806 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic); 1807 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); 1808 new_temp = make_ssa_name (vec_dest, new_stmt); 1809 TREE_OPERAND (new_stmt, 0) = new_temp; 1810 vect_finish_stmt_generation (stmt, new_stmt, bsi); 1811 } 1812 else 1813 gcc_unreachable (); 1814 1815 *vec_stmt = new_stmt; 1816 return true; 1817} 1818 1819 1820/* Function vectorizable_live_operation. 1821 1822 STMT computes a value that is used outside the loop. Check if 1823 it can be supported. */ 1824 1825bool 1826vectorizable_live_operation (tree stmt, 1827 block_stmt_iterator *bsi ATTRIBUTE_UNUSED, 1828 tree *vec_stmt ATTRIBUTE_UNUSED) 1829{ 1830 tree operation; 1831 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1832 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1833 int i; 1834 enum tree_code code; 1835 int op_type; 1836 tree op; 1837 tree def, def_stmt; 1838 enum vect_def_type dt; 1839 1840 if (!STMT_VINFO_LIVE_P (stmt_info)) 1841 return false; 1842 1843 if (TREE_CODE (stmt) != MODIFY_EXPR) 1844 return false; 1845 1846 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) 1847 return false; 1848 1849 operation = TREE_OPERAND (stmt, 1); 1850 code = TREE_CODE (operation); 1851 1852 op_type = TREE_CODE_LENGTH (code); 1853 1854 /* FORNOW: support only if all uses are invariant. This means 1855 that the scalar operations can remain in place, unvectorized. 1856 The original last scalar value that they compute will be used. */ 1857 1858 for (i = 0; i < op_type; i++) 1859 { 1860 op = TREE_OPERAND (operation, i); 1861 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 1862 { 1863 if (vect_print_dump_info (REPORT_DETAILS)) 1864 fprintf (vect_dump, "use not simple."); 1865 return false; 1866 } 1867 1868 if (dt != vect_invariant_def && dt != vect_constant_def) 1869 return false; 1870 } 1871 1872 /* No transformation is required for the cases we currently support. */ 1873 return true; 1874} 1875 1876 1877/* Function vect_is_simple_cond. 1878 1879 Input: 1880 LOOP - the loop that is being vectorized. 1881 COND - Condition that is checked for simple use. 1882 1883 Returns whether a COND can be vectorized. Checks whether 1884 condition operands are supportable using vec_is_simple_use. */ 1885 1886static bool 1887vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo) 1888{ 1889 tree lhs, rhs; 1890 tree def; 1891 enum vect_def_type dt; 1892 1893 if (!COMPARISON_CLASS_P (cond)) 1894 return false; 1895 1896 lhs = TREE_OPERAND (cond, 0); 1897 rhs = TREE_OPERAND (cond, 1); 1898 1899 if (TREE_CODE (lhs) == SSA_NAME) 1900 { 1901 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); 1902 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt)) 1903 return false; 1904 } 1905 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST) 1906 return false; 1907 1908 if (TREE_CODE (rhs) == SSA_NAME) 1909 { 1910 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); 1911 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt)) 1912 return false; 1913 } 1914 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST) 1915 return false; 1916 1917 return true; 1918} 1919 1920/* vectorizable_condition. 1921 1922 Check if STMT is conditional modify expression that can be vectorized. 1923 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1924 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it 1925 at BSI. 1926 1927 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1928 1929bool 1930vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1931{ 1932 tree scalar_dest = NULL_TREE; 1933 tree vec_dest = NULL_TREE; 1934 tree op = NULL_TREE; 1935 tree cond_expr, then_clause, else_clause; 1936 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1937 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1938 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause; 1939 tree vec_compare, vec_cond_expr; 1940 tree new_temp; 1941 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1942 enum machine_mode vec_mode; 1943 tree def; 1944 enum vect_def_type dt; 1945 1946 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1947 return false; 1948 1949 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 1950 1951 if (STMT_VINFO_LIVE_P (stmt_info)) 1952 { 1953 /* FORNOW: not yet supported. */ 1954 if (vect_print_dump_info (REPORT_DETAILS)) 1955 fprintf (vect_dump, "value used after loop."); 1956 return false; 1957 } 1958 1959 if (TREE_CODE (stmt) != MODIFY_EXPR) 1960 return false; 1961 1962 op = TREE_OPERAND (stmt, 1); 1963 1964 if (TREE_CODE (op) != COND_EXPR) 1965 return false; 1966 1967 cond_expr = TREE_OPERAND (op, 0); 1968 then_clause = TREE_OPERAND (op, 1); 1969 else_clause = TREE_OPERAND (op, 2); 1970 1971 if (!vect_is_simple_cond (cond_expr, loop_vinfo)) 1972 return false; 1973 1974 /* We do not handle two different vector types for the condition 1975 and the values. */ 1976 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype)) 1977 return false; 1978 1979 if (TREE_CODE (then_clause) == SSA_NAME) 1980 { 1981 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause); 1982 if (!vect_is_simple_use (then_clause, loop_vinfo, 1983 &then_def_stmt, &def, &dt)) 1984 return false; 1985 } 1986 else if (TREE_CODE (then_clause) != INTEGER_CST 1987 && TREE_CODE (then_clause) != REAL_CST) 1988 return false; 1989 1990 if (TREE_CODE (else_clause) == SSA_NAME) 1991 { 1992 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause); 1993 if (!vect_is_simple_use (else_clause, loop_vinfo, 1994 &else_def_stmt, &def, &dt)) 1995 return false; 1996 } 1997 else if (TREE_CODE (else_clause) != INTEGER_CST 1998 && TREE_CODE (else_clause) != REAL_CST) 1999 return false; 2000 2001 2002 vec_mode = TYPE_MODE (vectype); 2003 2004 if (!vec_stmt) 2005 { 2006 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; 2007 return expand_vec_cond_expr_p (op, vec_mode); 2008 } 2009 2010 /* Transform */ 2011 2012 /* Handle def. */ 2013 scalar_dest = TREE_OPERAND (stmt, 0); 2014 vec_dest = vect_create_destination_var (scalar_dest, vectype); 2015 2016 /* Handle cond expr. */ 2017 vec_cond_lhs = 2018 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL); 2019 vec_cond_rhs = 2020 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL); 2021 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL); 2022 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL); 2023 2024 /* Arguments are ready. create the new vector stmt. */ 2025 vec_compare = build2 (TREE_CODE (cond_expr), vectype, 2026 vec_cond_lhs, vec_cond_rhs); 2027 vec_cond_expr = build (VEC_COND_EXPR, vectype, 2028 vec_compare, vec_then_clause, vec_else_clause); 2029 2030 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr); 2031 new_temp = make_ssa_name (vec_dest, *vec_stmt); 2032 TREE_OPERAND (*vec_stmt, 0) = new_temp; 2033 vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 2034 2035 return true; 2036} 2037 2038/* Function vect_transform_stmt. 2039 2040 Create a vectorized stmt to replace STMT, and insert it at BSI. */ 2041 2042bool 2043vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) 2044{ 2045 bool is_store = false; 2046 tree vec_stmt = NULL_TREE; 2047 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2048 bool done; 2049 2050 if (STMT_VINFO_RELEVANT_P (stmt_info)) 2051 { 2052 switch (STMT_VINFO_TYPE (stmt_info)) 2053 { 2054 case op_vec_info_type: 2055 done = vectorizable_operation (stmt, bsi, &vec_stmt); 2056 gcc_assert (done); 2057 break; 2058 2059 case assignment_vec_info_type: 2060 done = vectorizable_assignment (stmt, bsi, &vec_stmt); 2061 gcc_assert (done); 2062 break; 2063 2064 case load_vec_info_type: 2065 done = vectorizable_load (stmt, bsi, &vec_stmt); 2066 gcc_assert (done); 2067 break; 2068 2069 case store_vec_info_type: 2070 done = vectorizable_store (stmt, bsi, &vec_stmt); 2071 gcc_assert (done); 2072 is_store = true; 2073 break; 2074 2075 case condition_vec_info_type: 2076 done = vectorizable_condition (stmt, bsi, &vec_stmt); 2077 gcc_assert (done); 2078 break; 2079 2080 default: 2081 if (vect_print_dump_info (REPORT_DETAILS)) 2082 fprintf (vect_dump, "stmt not supported."); 2083 gcc_unreachable (); 2084 } 2085 2086 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 2087 } 2088 2089 if (STMT_VINFO_LIVE_P (stmt_info)) 2090 { 2091 switch (STMT_VINFO_TYPE (stmt_info)) 2092 { 2093 case reduc_vec_info_type: 2094 done = vectorizable_reduction (stmt, bsi, &vec_stmt); 2095 gcc_assert (done); 2096 break; 2097 2098 default: 2099 done = vectorizable_live_operation (stmt, bsi, &vec_stmt); 2100 gcc_assert (done); 2101 } 2102 2103 if (vec_stmt) 2104 { 2105 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info)); 2106 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 2107 } 2108 } 2109 2110 return is_store; 2111} 2112 2113 2114/* This function builds ni_name = number of iterations loop executes 2115 on the loop preheader. */ 2116 2117static tree 2118vect_build_loop_niters (loop_vec_info loop_vinfo) 2119{ 2120 tree ni_name, stmt, var; 2121 edge pe; 2122 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2123 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); 2124 2125 var = create_tmp_var (TREE_TYPE (ni), "niters"); 2126 add_referenced_tmp_var (var); 2127 ni_name = force_gimple_operand (ni, &stmt, false, var); 2128 2129 pe = loop_preheader_edge (loop); 2130 if (stmt) 2131 { 2132 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2133 gcc_assert (!new_bb); 2134 } 2135 2136 return ni_name; 2137} 2138 2139 2140/* This function generates the following statements: 2141 2142 ni_name = number of iterations loop executes 2143 ratio = ni_name / vf 2144 ratio_mult_vf_name = ratio * vf 2145 2146 and places them at the loop preheader edge. */ 2147 2148static void 2149vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, 2150 tree *ni_name_ptr, 2151 tree *ratio_mult_vf_name_ptr, 2152 tree *ratio_name_ptr) 2153{ 2154 2155 edge pe; 2156 basic_block new_bb; 2157 tree stmt, ni_name; 2158 tree var; 2159 tree ratio_name; 2160 tree ratio_mult_vf_name; 2161 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2162 tree ni = LOOP_VINFO_NITERS (loop_vinfo); 2163 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2164 tree log_vf; 2165 2166 pe = loop_preheader_edge (loop); 2167 2168 /* Generate temporary variable that contains 2169 number of iterations loop executes. */ 2170 2171 ni_name = vect_build_loop_niters (loop_vinfo); 2172 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); 2173 2174 /* Create: ratio = ni >> log2(vf) */ 2175 2176 var = create_tmp_var (TREE_TYPE (ni), "bnd"); 2177 add_referenced_tmp_var (var); 2178 ratio_name = make_ssa_name (var, NULL_TREE); 2179 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name, 2180 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf)); 2181 SSA_NAME_DEF_STMT (ratio_name) = stmt; 2182 2183 pe = loop_preheader_edge (loop); 2184 new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2185 gcc_assert (!new_bb); 2186 2187 /* Create: ratio_mult_vf = ratio << log2 (vf). */ 2188 2189 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); 2190 add_referenced_tmp_var (var); 2191 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE); 2192 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name, 2193 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf)); 2194 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt; 2195 2196 pe = loop_preheader_edge (loop); 2197 new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2198 gcc_assert (!new_bb); 2199 2200 *ni_name_ptr = ni_name; 2201 *ratio_mult_vf_name_ptr = ratio_mult_vf_name; 2202 *ratio_name_ptr = ratio_name; 2203 2204 return; 2205} 2206 2207 2208/* Function update_vuses_to_preheader. 2209 2210 Input: 2211 STMT - a statement with potential VUSEs. 2212 LOOP - the loop whose preheader will contain STMT. 2213 2214 It's possible to vectorize a loop even though an SSA_NAME from a VUSE 2215 appears to be defined in a V_MAY_DEF in another statement in a loop. 2216 One such case is when the VUSE is at the dereference of a __restricted__ 2217 pointer in a load and the V_MAY_DEF is at the dereference of a different 2218 __restricted__ pointer in a store. Vectorization may result in 2219 copy_virtual_uses being called to copy the problematic VUSE to a new 2220 statement that is being inserted in the loop preheader. This procedure 2221 is called to change the SSA_NAME in the new statement's VUSE from the 2222 SSA_NAME updated in the loop to the related SSA_NAME available on the 2223 path entering the loop. 2224 2225 When this function is called, we have the following situation: 2226 2227 # vuse <name1> 2228 S1: vload 2229 do { 2230 # name1 = phi < name0 , name2> 2231 2232 # vuse <name1> 2233 S2: vload 2234 2235 # name2 = vdef <name1> 2236 S3: vstore 2237 2238 }while... 2239 2240 Stmt S1 was created in the loop preheader block as part of misaligned-load 2241 handling. This function fixes the name of the vuse of S1 from 'name1' to 2242 'name0'. */ 2243 2244static void 2245update_vuses_to_preheader (tree stmt, struct loop *loop) 2246{ 2247 basic_block header_bb = loop->header; 2248 edge preheader_e = loop_preheader_edge (loop); 2249 ssa_op_iter iter; 2250 use_operand_p use_p; 2251 2252 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE) 2253 { 2254 tree ssa_name = USE_FROM_PTR (use_p); 2255 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name); 2256 tree name_var = SSA_NAME_VAR (ssa_name); 2257 basic_block bb = bb_for_stmt (def_stmt); 2258 2259 /* For a use before any definitions, def_stmt is a NOP_EXPR. */ 2260 if (!IS_EMPTY_STMT (def_stmt) 2261 && flow_bb_inside_loop_p (loop, bb)) 2262 { 2263 /* If the block containing the statement defining the SSA_NAME 2264 is in the loop then it's necessary to find the definition 2265 outside the loop using the PHI nodes of the header. */ 2266 tree phi; 2267 bool updated = false; 2268 2269 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi)) 2270 { 2271 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var) 2272 { 2273 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx)); 2274 updated = true; 2275 break; 2276 } 2277 } 2278 gcc_assert (updated); 2279 } 2280 } 2281} 2282 2283 2284/* Function vect_update_ivs_after_vectorizer. 2285 2286 "Advance" the induction variables of LOOP to the value they should take 2287 after the execution of LOOP. This is currently necessary because the 2288 vectorizer does not handle induction variables that are used after the 2289 loop. Such a situation occurs when the last iterations of LOOP are 2290 peeled, because: 2291 1. We introduced new uses after LOOP for IVs that were not originally used 2292 after LOOP: the IVs of LOOP are now used by an epilog loop. 2293 2. LOOP is going to be vectorized; this means that it will iterate N/VF 2294 times, whereas the loop IVs should be bumped N times. 2295 2296 Input: 2297 - LOOP - a loop that is going to be vectorized. The last few iterations 2298 of LOOP were peeled. 2299 - NITERS - the number of iterations that LOOP executes (before it is 2300 vectorized). i.e, the number of times the ivs should be bumped. 2301 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path 2302 coming out from LOOP on which there are uses of the LOOP ivs 2303 (this is the path from LOOP->exit to epilog_loop->preheader). 2304 2305 The new definitions of the ivs are placed in LOOP->exit. 2306 The phi args associated with the edge UPDATE_E in the bb 2307 UPDATE_E->dest are updated accordingly. 2308 2309 Assumption 1: Like the rest of the vectorizer, this function assumes 2310 a single loop exit that has a single predecessor. 2311 2312 Assumption 2: The phi nodes in the LOOP header and in update_bb are 2313 organized in the same order. 2314 2315 Assumption 3: The access function of the ivs is simple enough (see 2316 vect_can_advance_ivs_p). This assumption will be relaxed in the future. 2317 2318 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path 2319 coming out of LOOP on which the ivs of LOOP are used (this is the path 2320 that leads to the epilog loop; other paths skip the epilog loop). This 2321 path starts with the edge UPDATE_E, and its destination (denoted update_bb) 2322 needs to have its phis updated. 2323 */ 2324 2325static void 2326vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, 2327 edge update_e) 2328{ 2329 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2330 basic_block exit_bb = loop->single_exit->dest; 2331 tree phi, phi1; 2332 basic_block update_bb = update_e->dest; 2333 2334 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */ 2335 2336 /* Make sure there exists a single-predecessor exit bb: */ 2337 gcc_assert (single_pred_p (exit_bb)); 2338 2339 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb); 2340 phi && phi1; 2341 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1)) 2342 { 2343 tree access_fn = NULL; 2344 tree evolution_part; 2345 tree init_expr; 2346 tree step_expr; 2347 tree var, stmt, ni, ni_name; 2348 block_stmt_iterator last_bsi; 2349 2350 if (vect_print_dump_info (REPORT_DETAILS)) 2351 { 2352 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: "); 2353 print_generic_expr (vect_dump, phi, TDF_SLIM); 2354 } 2355 2356 /* Skip virtual phi's. */ 2357 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) 2358 { 2359 if (vect_print_dump_info (REPORT_DETAILS)) 2360 fprintf (vect_dump, "virtual phi. skip."); 2361 continue; 2362 } 2363 2364 /* Skip reduction phis. */ 2365 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) 2366 { 2367 if (vect_print_dump_info (REPORT_DETAILS)) 2368 fprintf (vect_dump, "reduc phi. skip."); 2369 continue; 2370 } 2371 2372 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); 2373 gcc_assert (access_fn); 2374 evolution_part = 2375 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num)); 2376 gcc_assert (evolution_part != NULL_TREE); 2377 2378 /* FORNOW: We do not support IVs whose evolution function is a polynomial 2379 of degree >= 2 or exponential. */ 2380 gcc_assert (!tree_is_chrec (evolution_part)); 2381 2382 step_expr = evolution_part; 2383 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, 2384 loop->num)); 2385 2386 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr), 2387 build2 (MULT_EXPR, TREE_TYPE (niters), 2388 niters, step_expr), init_expr); 2389 2390 var = create_tmp_var (TREE_TYPE (init_expr), "tmp"); 2391 add_referenced_tmp_var (var); 2392 2393 ni_name = force_gimple_operand (ni, &stmt, false, var); 2394 2395 /* Insert stmt into exit_bb. */ 2396 last_bsi = bsi_last (exit_bb); 2397 if (stmt) 2398 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT); 2399 2400 /* Fix phi expressions in the successor bb. */ 2401 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name); 2402 } 2403} 2404 2405 2406/* Function vect_do_peeling_for_loop_bound 2407 2408 Peel the last iterations of the loop represented by LOOP_VINFO. 2409 The peeled iterations form a new epilog loop. Given that the loop now 2410 iterates NITERS times, the new epilog loop iterates 2411 NITERS % VECTORIZATION_FACTOR times. 2412 2413 The original loop will later be made to iterate 2414 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */ 2415 2416static void 2417vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, 2418 struct loops *loops) 2419{ 2420 tree ni_name, ratio_mult_vf_name; 2421 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2422 struct loop *new_loop; 2423 edge update_e; 2424 basic_block preheader; 2425 int loop_num; 2426 2427 if (vect_print_dump_info (REPORT_DETAILS)) 2428 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ==="); 2429 2430 initialize_original_copy_tables (); 2431 2432 /* Generate the following variables on the preheader of original loop: 2433 2434 ni_name = number of iteration the original loop executes 2435 ratio = ni_name / vf 2436 ratio_mult_vf_name = ratio * vf */ 2437 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, 2438 &ratio_mult_vf_name, ratio); 2439 2440 loop_num = loop->num; 2441 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit, 2442 ratio_mult_vf_name, ni_name, false); 2443 gcc_assert (new_loop); 2444 gcc_assert (loop_num == loop->num); 2445#ifdef ENABLE_CHECKING 2446 slpeel_verify_cfg_after_peeling (loop, new_loop); 2447#endif 2448 2449 /* A guard that controls whether the new_loop is to be executed or skipped 2450 is placed in LOOP->exit. LOOP->exit therefore has two successors - one 2451 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other 2452 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that 2453 is on the path where the LOOP IVs are used and need to be updated. */ 2454 2455 preheader = loop_preheader_edge (new_loop)->src; 2456 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest) 2457 update_e = EDGE_PRED (preheader, 0); 2458 else 2459 update_e = EDGE_PRED (preheader, 1); 2460 2461 /* Update IVs of original loop as if they were advanced 2462 by ratio_mult_vf_name steps. */ 2463 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e); 2464 2465 /* After peeling we have to reset scalar evolution analyzer. */ 2466 scev_reset (); 2467 2468 free_original_copy_tables (); 2469} 2470 2471 2472/* Function vect_gen_niters_for_prolog_loop 2473 2474 Set the number of iterations for the loop represented by LOOP_VINFO 2475 to the minimum between LOOP_NITERS (the original iteration count of the loop) 2476 and the misalignment of DR - the data reference recorded in 2477 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of 2478 this loop, the data reference DR will refer to an aligned location. 2479 2480 The following computation is generated: 2481 2482 If the misalignment of DR is known at compile time: 2483 addr_mis = int mis = DR_MISALIGNMENT (dr); 2484 Else, compute address misalignment in bytes: 2485 addr_mis = addr & (vectype_size - 1) 2486 2487 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) ) 2488 2489 (elem_size = element type size; an element is the scalar element 2490 whose type is the inner type of the vectype) */ 2491 2492static tree 2493vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) 2494{ 2495 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); 2496 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2497 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2498 tree var, stmt; 2499 tree iters, iters_name; 2500 edge pe; 2501 basic_block new_bb; 2502 tree dr_stmt = DR_STMT (dr); 2503 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); 2504 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2505 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; 2506 tree niters_type = TREE_TYPE (loop_niters); 2507 2508 pe = loop_preheader_edge (loop); 2509 2510 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) 2511 { 2512 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); 2513 int element_size = vectype_align/vf; 2514 int elem_misalign = byte_misalign / element_size; 2515 2516 if (vect_print_dump_info (REPORT_DETAILS)) 2517 fprintf (vect_dump, "known alignment = %d.", byte_misalign); 2518 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1)); 2519 } 2520 else 2521 { 2522 tree new_stmts = NULL_TREE; 2523 tree start_addr = 2524 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE); 2525 tree ptr_type = TREE_TYPE (start_addr); 2526 tree size = TYPE_SIZE (ptr_type); 2527 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); 2528 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); 2529 tree elem_size_log = 2530 build_int_cst (type, exact_log2 (vectype_align/vf)); 2531 tree vf_minus_1 = build_int_cst (type, vf - 1); 2532 tree vf_tree = build_int_cst (type, vf); 2533 tree byte_misalign; 2534 tree elem_misalign; 2535 2536 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts); 2537 gcc_assert (!new_bb); 2538 2539 /* Create: byte_misalign = addr & (vectype_size - 1) */ 2540 byte_misalign = 2541 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1); 2542 2543 /* Create: elem_misalign = byte_misalign / element_size */ 2544 elem_misalign = 2545 build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log); 2546 2547 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */ 2548 iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign); 2549 iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1); 2550 iters = fold_convert (niters_type, iters); 2551 } 2552 2553 /* Create: prolog_loop_niters = min (iters, loop_niters) */ 2554 /* If the loop bound is known at compile time we already verified that it is 2555 greater than vf; since the misalignment ('iters') is at most vf, there's 2556 no need to generate the MIN_EXPR in this case. */ 2557 if (TREE_CODE (loop_niters) != INTEGER_CST) 2558 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters); 2559 2560 if (vect_print_dump_info (REPORT_DETAILS)) 2561 { 2562 fprintf (vect_dump, "niters for prolog loop: "); 2563 print_generic_expr (vect_dump, iters, TDF_SLIM); 2564 } 2565 2566 var = create_tmp_var (niters_type, "prolog_loop_niters"); 2567 add_referenced_tmp_var (var); 2568 iters_name = force_gimple_operand (iters, &stmt, false, var); 2569 2570 /* Insert stmt on loop preheader edge. */ 2571 if (stmt) 2572 { 2573 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2574 gcc_assert (!new_bb); 2575 } 2576 2577 return iters_name; 2578} 2579 2580 2581/* Function vect_update_init_of_dr 2582 2583 NITERS iterations were peeled from LOOP. DR represents a data reference 2584 in LOOP. This function updates the information recorded in DR to 2585 account for the fact that the first NITERS iterations had already been 2586 executed. Specifically, it updates the OFFSET field of DR. */ 2587 2588static void 2589vect_update_init_of_dr (struct data_reference *dr, tree niters) 2590{ 2591 tree offset = DR_OFFSET (dr); 2592 2593 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr)); 2594 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters); 2595 DR_OFFSET (dr) = offset; 2596} 2597 2598 2599/* Function vect_update_inits_of_drs 2600 2601 NITERS iterations were peeled from the loop represented by LOOP_VINFO. 2602 This function updates the information recorded for the data references in 2603 the loop to account for the fact that the first NITERS iterations had 2604 already been executed. Specifically, it updates the initial_condition of the 2605 access_function of all the data_references in the loop. */ 2606 2607static void 2608vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) 2609{ 2610 unsigned int i; 2611 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 2612 2613 if (vect_dump && (dump_flags & TDF_DETAILS)) 2614 fprintf (vect_dump, "=== vect_update_inits_of_dr ==="); 2615 2616 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) 2617 { 2618 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i); 2619 vect_update_init_of_dr (dr, niters); 2620 } 2621} 2622 2623 2624/* Function vect_do_peeling_for_alignment 2625 2626 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO. 2627 'niters' is set to the misalignment of one of the data references in the 2628 loop, thereby forcing it to refer to an aligned location at the beginning 2629 of the execution of this loop. The data reference for which we are 2630 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ 2631 2632static void 2633vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) 2634{ 2635 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2636 tree niters_of_prolog_loop, ni_name; 2637 tree n_iters; 2638 struct loop *new_loop; 2639 2640 if (vect_print_dump_info (REPORT_DETAILS)) 2641 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ==="); 2642 2643 initialize_original_copy_tables (); 2644 2645 ni_name = vect_build_loop_niters (loop_vinfo); 2646 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); 2647 2648 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ 2649 new_loop = 2650 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), 2651 niters_of_prolog_loop, ni_name, true); 2652 gcc_assert (new_loop); 2653#ifdef ENABLE_CHECKING 2654 slpeel_verify_cfg_after_peeling (new_loop, loop); 2655#endif 2656 2657 /* Update number of times loop executes. */ 2658 n_iters = LOOP_VINFO_NITERS (loop_vinfo); 2659 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR, 2660 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); 2661 2662 /* Update the init conditions of the access functions of all data refs. */ 2663 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); 2664 2665 /* After peeling we have to reset scalar evolution analyzer. */ 2666 scev_reset (); 2667 2668 free_original_copy_tables (); 2669} 2670 2671 2672/* Function vect_create_cond_for_align_checks. 2673 2674 Create a conditional expression that represents the alignment checks for 2675 all of data references (array element references) whose alignment must be 2676 checked at runtime. 2677 2678 Input: 2679 LOOP_VINFO - two fields of the loop information are used. 2680 LOOP_VINFO_PTR_MASK is the mask used to check the alignment. 2681 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked. 2682 2683 Output: 2684 COND_EXPR_STMT_LIST - statements needed to construct the conditional 2685 expression. 2686 The returned value is the conditional expression to be used in the if 2687 statement that controls which version of the loop gets executed at runtime. 2688 2689 The algorithm makes two assumptions: 2690 1) The number of bytes "n" in a vector is a power of 2. 2691 2) An address "a" is aligned if a%n is zero and that this 2692 test can be done as a&(n-1) == 0. For example, for 16 2693 byte vectors the test is a&0xf == 0. */ 2694 2695static tree 2696vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, 2697 tree *cond_expr_stmt_list) 2698{ 2699 VEC(tree,heap) *may_misalign_stmts 2700 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); 2701 tree ref_stmt; 2702 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo); 2703 tree mask_cst; 2704 unsigned int i; 2705 tree psize; 2706 tree int_ptrsize_type; 2707 char tmp_name[20]; 2708 tree or_tmp_name = NULL_TREE; 2709 tree and_tmp, and_tmp_name, and_stmt; 2710 tree ptrsize_zero; 2711 2712 /* Check that mask is one less than a power of 2, i.e., mask is 2713 all zeros followed by all ones. */ 2714 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0)); 2715 2716 /* CHECKME: what is the best integer or unsigned type to use to hold a 2717 cast from a pointer value? */ 2718 psize = TYPE_SIZE (ptr_type_node); 2719 int_ptrsize_type 2720 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0); 2721 2722 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address 2723 of the first vector of the i'th data reference. */ 2724 2725 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++) 2726 { 2727 tree new_stmt_list = NULL_TREE; 2728 tree addr_base; 2729 tree addr_tmp, addr_tmp_name, addr_stmt; 2730 tree or_tmp, new_or_tmp_name, or_stmt; 2731 2732 /* create: addr_tmp = (int)(address_of_first_vector) */ 2733 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt, 2734 &new_stmt_list, 2735 NULL_TREE); 2736 2737 if (new_stmt_list != NULL_TREE) 2738 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list); 2739 2740 sprintf (tmp_name, "%s%d", "addr2int", i); 2741 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name); 2742 add_referenced_tmp_var (addr_tmp); 2743 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE); 2744 addr_stmt = fold_convert (int_ptrsize_type, addr_base); 2745 addr_stmt = build2 (MODIFY_EXPR, void_type_node, 2746 addr_tmp_name, addr_stmt); 2747 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt; 2748 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list); 2749 2750 /* The addresses are OR together. */ 2751 2752 if (or_tmp_name != NULL_TREE) 2753 { 2754 /* create: or_tmp = or_tmp | addr_tmp */ 2755 sprintf (tmp_name, "%s%d", "orptrs", i); 2756 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name); 2757 add_referenced_tmp_var (or_tmp); 2758 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE); 2759 or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name, 2760 build2 (BIT_IOR_EXPR, int_ptrsize_type, 2761 or_tmp_name, 2762 addr_tmp_name)); 2763 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt; 2764 append_to_statement_list_force (or_stmt, cond_expr_stmt_list); 2765 or_tmp_name = new_or_tmp_name; 2766 } 2767 else 2768 or_tmp_name = addr_tmp_name; 2769 2770 } /* end for i */ 2771 2772 mask_cst = build_int_cst (int_ptrsize_type, mask); 2773 2774 /* create: and_tmp = or_tmp & mask */ 2775 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" ); 2776 add_referenced_tmp_var (and_tmp); 2777 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE); 2778 2779 and_stmt = build2 (MODIFY_EXPR, void_type_node, 2780 and_tmp_name, 2781 build2 (BIT_AND_EXPR, int_ptrsize_type, 2782 or_tmp_name, mask_cst)); 2783 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt; 2784 append_to_statement_list_force (and_stmt, cond_expr_stmt_list); 2785 2786 /* Make and_tmp the left operand of the conditional test against zero. 2787 if and_tmp has a non-zero bit then some address is unaligned. */ 2788 ptrsize_zero = build_int_cst (int_ptrsize_type, 0); 2789 return build2 (EQ_EXPR, boolean_type_node, 2790 and_tmp_name, ptrsize_zero); 2791} 2792 2793 2794/* Function vect_transform_loop. 2795 2796 The analysis phase has determined that the loop is vectorizable. 2797 Vectorize the loop - created vectorized stmts to replace the scalar 2798 stmts in the loop, and update the loop exit condition. */ 2799 2800void 2801vect_transform_loop (loop_vec_info loop_vinfo, 2802 struct loops *loops ATTRIBUTE_UNUSED) 2803{ 2804 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2805 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 2806 int nbbs = loop->num_nodes; 2807 block_stmt_iterator si; 2808 int i; 2809 tree ratio = NULL; 2810 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2811 bitmap_iterator bi; 2812 unsigned int j; 2813 2814 if (vect_print_dump_info (REPORT_DETAILS)) 2815 fprintf (vect_dump, "=== vec_transform_loop ==="); 2816 2817 /* If the loop has data references that may or may not be aligned then 2818 two versions of the loop need to be generated, one which is vectorized 2819 and one which isn't. A test is then generated to control which of the 2820 loops is executed. The test checks for the alignment of all of the 2821 data references that may or may not be aligned. */ 2822 2823 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))) 2824 { 2825 struct loop *nloop; 2826 tree cond_expr; 2827 tree cond_expr_stmt_list = NULL_TREE; 2828 basic_block condition_bb; 2829 block_stmt_iterator cond_exp_bsi; 2830 basic_block merge_bb; 2831 basic_block new_exit_bb; 2832 edge new_exit_e, e; 2833 tree orig_phi, new_phi, arg; 2834 2835 cond_expr = vect_create_cond_for_align_checks (loop_vinfo, 2836 &cond_expr_stmt_list); 2837 initialize_original_copy_tables (); 2838 nloop = loop_version (loops, loop, cond_expr, &condition_bb, true); 2839 free_original_copy_tables(); 2840 2841 /** Loop versioning violates an assumption we try to maintain during 2842 vectorization - that the loop exit block has a single predecessor. 2843 After versioning, the exit block of both loop versions is the same 2844 basic block (i.e. it has two predecessors). Just in order to simplify 2845 following transformations in the vectorizer, we fix this situation 2846 here by adding a new (empty) block on the exit-edge of the loop, 2847 with the proper loop-exit phis to maintain loop-closed-form. **/ 2848 2849 merge_bb = loop->single_exit->dest; 2850 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2); 2851 new_exit_bb = split_edge (loop->single_exit); 2852 add_bb_to_loop (new_exit_bb, loop->outer); 2853 new_exit_e = loop->single_exit; 2854 e = EDGE_SUCC (new_exit_bb, 0); 2855 2856 for (orig_phi = phi_nodes (merge_bb); orig_phi; 2857 orig_phi = PHI_CHAIN (orig_phi)) 2858 { 2859 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), 2860 new_exit_bb); 2861 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); 2862 add_phi_arg (new_phi, arg, new_exit_e); 2863 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi)); 2864 } 2865 2866 /** end loop-exit-fixes after versioning **/ 2867 2868 update_ssa (TODO_update_ssa); 2869 cond_exp_bsi = bsi_last (condition_bb); 2870 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT); 2871 } 2872 2873 /* CHECKME: we wouldn't need this if we calles update_ssa once 2874 for all loops. */ 2875 bitmap_zero (vect_vnames_to_rename); 2876 2877 /* Peel the loop if there are data refs with unknown alignment. 2878 Only one data ref with unknown store is allowed. */ 2879 2880 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) 2881 vect_do_peeling_for_alignment (loop_vinfo, loops); 2882 2883 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a 2884 compile time constant), or it is a constant that doesn't divide by the 2885 vectorization factor, then an epilog loop needs to be created. 2886 We therefore duplicate the loop: the original loop will be vectorized, 2887 and will compute the first (n/VF) iterations. The second copy of the loop 2888 will remain scalar and will compute the remaining (n%VF) iterations. 2889 (VF is the vectorization factor). */ 2890 2891 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 2892 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 2893 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)) 2894 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops); 2895 else 2896 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), 2897 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); 2898 2899 /* 1) Make sure the loop header has exactly two entries 2900 2) Make sure we have a preheader basic block. */ 2901 2902 gcc_assert (EDGE_COUNT (loop->header->preds) == 2); 2903 2904 loop_split_edge_with (loop_preheader_edge (loop), NULL); 2905 2906 2907 /* FORNOW: the vectorizer supports only loops which body consist 2908 of one basic block (header + empty latch). When the vectorizer will 2909 support more involved loop forms, the order by which the BBs are 2910 traversed need to be reconsidered. */ 2911 2912 for (i = 0; i < nbbs; i++) 2913 { 2914 basic_block bb = bbs[i]; 2915 2916 for (si = bsi_start (bb); !bsi_end_p (si);) 2917 { 2918 tree stmt = bsi_stmt (si); 2919 stmt_vec_info stmt_info; 2920 bool is_store; 2921 2922 if (vect_print_dump_info (REPORT_DETAILS)) 2923 { 2924 fprintf (vect_dump, "------>vectorizing statement: "); 2925 print_generic_expr (vect_dump, stmt, TDF_SLIM); 2926 } 2927 stmt_info = vinfo_for_stmt (stmt); 2928 gcc_assert (stmt_info); 2929 if (!STMT_VINFO_RELEVANT_P (stmt_info) 2930 && !STMT_VINFO_LIVE_P (stmt_info)) 2931 { 2932 bsi_next (&si); 2933 continue; 2934 } 2935 /* FORNOW: Verify that all stmts operate on the same number of 2936 units and no inner unrolling is necessary. */ 2937 gcc_assert 2938 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)) 2939 == (unsigned HOST_WIDE_INT) vectorization_factor); 2940 2941 /* -------- vectorize statement ------------ */ 2942 if (vect_print_dump_info (REPORT_DETAILS)) 2943 fprintf (vect_dump, "transform statement."); 2944 2945 is_store = vect_transform_stmt (stmt, &si); 2946 if (is_store) 2947 { 2948 /* Free the attached stmt_vec_info and remove the stmt. */ 2949 stmt_ann_t ann = stmt_ann (stmt); 2950 free (stmt_info); 2951 set_stmt_info ((tree_ann_t)ann, NULL); 2952 bsi_remove (&si); 2953 continue; 2954 } 2955 2956 bsi_next (&si); 2957 } /* stmts in BB */ 2958 } /* BBs in loop */ 2959 2960 slpeel_make_loop_iterate_ntimes (loop, ratio); 2961 2962 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi) 2963 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j))); 2964 2965 /* The memory tags and pointers in vectorized statements need to 2966 have their SSA forms updated. FIXME, why can't this be delayed 2967 until all the loops have been transformed? */ 2968 update_ssa (TODO_update_ssa); 2969 2970 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)) 2971 fprintf (vect_dump, "LOOP VECTORIZED."); 2972} 2973