1/* SLP - Basic Block Vectorization 2 Copyright (C) 2007, 2008, 2009, 2010 3 Free Software Foundation, Inc. 4 Contributed by Dorit Naishlos <dorit@il.ibm.com> 5 and Ira Rosen <irar@il.ibm.com> 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify it under 10the terms of the GNU General Public License as published by the Free 11Software Foundation; either version 3, or (at your option) any later 12version. 13 14GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15WARRANTY; without even the implied warranty of MERCHANTABILITY or 16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17for more details. 18 19You should have received a copy of the GNU General Public License 20along with GCC; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "tm.h" 27#include "ggc.h" 28#include "tree.h" 29#include "target.h" 30#include "basic-block.h" 31#include "diagnostic.h" 32#include "tree-flow.h" 33#include "tree-dump.h" 34#include "cfgloop.h" 35#include "cfglayout.h" 36#include "expr.h" 37#include "recog.h" 38#include "optabs.h" 39#include "tree-vectorizer.h" 40 41/* Extract the location of the basic block in the source code. 42 Return the basic block location if succeed and NULL if not. */ 43 44LOC 45find_bb_location (basic_block bb) 46{ 47 gimple stmt = NULL; 48 gimple_stmt_iterator si; 49 50 if (!bb) 51 return UNKNOWN_LOC; 52 53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 54 { 55 stmt = gsi_stmt (si); 56 if (gimple_location (stmt) != UNKNOWN_LOC) 57 return gimple_location (stmt); 58 } 59 60 return UNKNOWN_LOC; 61} 62 63 64/* Recursively free the memory allocated for the SLP tree rooted at NODE. */ 65 66static void 67vect_free_slp_tree (slp_tree node) 68{ 69 if (!node) 70 return; 71 72 if (SLP_TREE_LEFT (node)) 73 vect_free_slp_tree (SLP_TREE_LEFT (node)); 74 75 if (SLP_TREE_RIGHT (node)) 76 vect_free_slp_tree (SLP_TREE_RIGHT (node)); 77 78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node)); 79 80 if (SLP_TREE_VEC_STMTS (node)) 81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node)); 82 83 free (node); 84} 85 86 87/* Free the memory allocated for the SLP instance. */ 88 89void 90vect_free_slp_instance (slp_instance instance) 91{ 92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance)); 93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance)); 94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance)); 95} 96 97 98/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that 99 they are of a legal type and that they match the defs of the first stmt of 100 the SLP group (stored in FIRST_STMT_...). */ 101 102static bool 103vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, 104 slp_tree slp_node, gimple stmt, 105 VEC (gimple, heap) **def_stmts0, 106 VEC (gimple, heap) **def_stmts1, 107 enum vect_def_type *first_stmt_dt0, 108 enum vect_def_type *first_stmt_dt1, 109 tree *first_stmt_def0_type, 110 tree *first_stmt_def1_type, 111 tree *first_stmt_const_oprnd, 112 int ncopies_for_cost, 113 bool *pattern0, bool *pattern1) 114{ 115 tree oprnd; 116 unsigned int i, number_of_oprnds; 117 tree def; 118 gimple def_stmt; 119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 120 stmt_vec_info stmt_info = 121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)); 122 enum gimple_rhs_class rhs_class; 123 struct loop *loop = NULL; 124 125 if (loop_vinfo) 126 loop = LOOP_VINFO_LOOP (loop_vinfo); 127 128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt)); 129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */ 130 131 for (i = 0; i < number_of_oprnds; i++) 132 { 133 oprnd = gimple_op (stmt, i + 1); 134 135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, 136 &dt[i]) 137 || (!def_stmt && dt[i] != vect_constant_def)) 138 { 139 if (vect_print_dump_info (REPORT_SLP)) 140 { 141 fprintf (vect_dump, "Build SLP failed: can't find def for "); 142 print_generic_expr (vect_dump, oprnd, TDF_SLIM); 143 } 144 145 return false; 146 } 147 148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt 149 from the pattern. Check that all the stmts of the node are in the 150 pattern. */ 151 if (loop && def_stmt && gimple_bb (def_stmt) 152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) 153 && vinfo_for_stmt (def_stmt) 154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))) 155 { 156 if (!*first_stmt_dt0) 157 *pattern0 = true; 158 else 159 { 160 if (i == 1 && !*first_stmt_dt1) 161 *pattern1 = true; 162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1)) 163 { 164 if (vect_print_dump_info (REPORT_DETAILS)) 165 { 166 fprintf (vect_dump, "Build SLP failed: some of the stmts" 167 " are in a pattern, and others are not "); 168 print_generic_expr (vect_dump, oprnd, TDF_SLIM); 169 } 170 171 return false; 172 } 173 } 174 175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); 176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); 177 178 if (*dt == vect_unknown_def_type) 179 { 180 if (vect_print_dump_info (REPORT_DETAILS)) 181 fprintf (vect_dump, "Unsupported pattern."); 182 return false; 183 } 184 185 switch (gimple_code (def_stmt)) 186 { 187 case GIMPLE_PHI: 188 def = gimple_phi_result (def_stmt); 189 break; 190 191 case GIMPLE_ASSIGN: 192 def = gimple_assign_lhs (def_stmt); 193 break; 194 195 default: 196 if (vect_print_dump_info (REPORT_DETAILS)) 197 fprintf (vect_dump, "unsupported defining stmt: "); 198 return false; 199 } 200 } 201 202 if (!*first_stmt_dt0) 203 { 204 /* op0 of the first stmt of the group - store its info. */ 205 *first_stmt_dt0 = dt[i]; 206 if (def) 207 *first_stmt_def0_type = TREE_TYPE (def); 208 else 209 *first_stmt_const_oprnd = oprnd; 210 211 /* Analyze costs (for the first stmt of the group only). */ 212 if (rhs_class != GIMPLE_SINGLE_RHS) 213 /* Not memory operation (we don't call this functions for loads). */ 214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node); 215 else 216 /* Store. */ 217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node); 218 } 219 220 else 221 { 222 if (!*first_stmt_dt1 && i == 1) 223 { 224 /* op1 of the first stmt of the group - store its info. */ 225 *first_stmt_dt1 = dt[i]; 226 if (def) 227 *first_stmt_def1_type = TREE_TYPE (def); 228 else 229 { 230 /* We assume that the stmt contains only one constant 231 operand. We fail otherwise, to be on the safe side. */ 232 if (*first_stmt_const_oprnd) 233 { 234 if (vect_print_dump_info (REPORT_SLP)) 235 fprintf (vect_dump, "Build SLP failed: two constant " 236 "oprnds in stmt"); 237 return false; 238 } 239 *first_stmt_const_oprnd = oprnd; 240 } 241 } 242 else 243 { 244 /* Not first stmt of the group, check that the def-stmt/s match 245 the def-stmt/s of the first stmt. */ 246 if ((i == 0 247 && (*first_stmt_dt0 != dt[i] 248 || (*first_stmt_def0_type && def 249 && !types_compatible_p (*first_stmt_def0_type, 250 TREE_TYPE (def))))) 251 || (i == 1 252 && (*first_stmt_dt1 != dt[i] 253 || (*first_stmt_def1_type && def 254 && !types_compatible_p (*first_stmt_def1_type, 255 TREE_TYPE (def))))) 256 || (!def 257 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), 258 TREE_TYPE (oprnd)))) 259 { 260 if (vect_print_dump_info (REPORT_SLP)) 261 fprintf (vect_dump, "Build SLP failed: different types "); 262 263 return false; 264 } 265 } 266 } 267 268 /* Check the types of the definitions. */ 269 switch (dt[i]) 270 { 271 case vect_constant_def: 272 case vect_external_def: 273 break; 274 275 case vect_internal_def: 276 if (i == 0) 277 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); 278 else 279 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); 280 break; 281 282 default: 283 /* FORNOW: Not supported. */ 284 if (vect_print_dump_info (REPORT_SLP)) 285 { 286 fprintf (vect_dump, "Build SLP failed: illegal type of def "); 287 print_generic_expr (vect_dump, def, TDF_SLIM); 288 } 289 290 return false; 291 } 292 } 293 294 return true; 295} 296 297 298/* Recursively build an SLP tree starting from NODE. 299 Fail (and return FALSE) if def-stmts are not isomorphic, require data 300 permutation or are of unsupported types of operation. Otherwise, return 301 TRUE. */ 302 303static bool 304vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, 305 slp_tree *node, unsigned int group_size, 306 int *inside_cost, int *outside_cost, 307 int ncopies_for_cost, unsigned int *max_nunits, 308 VEC (int, heap) **load_permutation, 309 VEC (slp_tree, heap) **loads, 310 unsigned int vectorization_factor) 311{ 312 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); 313 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); 314 unsigned int i; 315 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node); 316 gimple stmt = VEC_index (gimple, stmts, 0); 317 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def; 318 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def; 319 enum tree_code first_stmt_code = ERROR_MARK, rhs_code; 320 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE; 321 tree lhs; 322 bool stop_recursion = false, need_same_oprnds = false; 323 tree vectype, scalar_type, first_op1 = NULL_TREE; 324 unsigned int ncopies; 325 optab optab; 326 int icode; 327 enum machine_mode optab_op2_mode; 328 enum machine_mode vec_mode; 329 tree first_stmt_const_oprnd = NULL_TREE; 330 struct data_reference *first_dr; 331 bool pattern0 = false, pattern1 = false; 332 HOST_WIDE_INT dummy; 333 bool permutation = false; 334 unsigned int load_place; 335 gimple first_load; 336 337 /* For every stmt in NODE find its def stmt/s. */ 338 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++) 339 { 340 if (vect_print_dump_info (REPORT_SLP)) 341 { 342 fprintf (vect_dump, "Build SLP for "); 343 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 344 } 345 346 lhs = gimple_get_lhs (stmt); 347 if (lhs == NULL_TREE) 348 { 349 if (vect_print_dump_info (REPORT_SLP)) 350 { 351 fprintf (vect_dump, 352 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL"); 353 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 354 } 355 356 return false; 357 } 358 359 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); 360 vectype = get_vectype_for_scalar_type (scalar_type); 361 if (!vectype) 362 { 363 if (vect_print_dump_info (REPORT_SLP)) 364 { 365 fprintf (vect_dump, "Build SLP failed: unsupported data-type "); 366 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 367 } 368 return false; 369 } 370 371 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); 372 if (ncopies != 1) 373 { 374 if (vect_print_dump_info (REPORT_SLP)) 375 fprintf (vect_dump, "SLP with multiple types "); 376 377 /* FORNOW: multiple types are unsupported in BB SLP. */ 378 if (bb_vinfo) 379 return false; 380 } 381 382 /* In case of multiple types we need to detect the smallest type. */ 383 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) 384 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); 385 386 if (is_gimple_call (stmt)) 387 rhs_code = CALL_EXPR; 388 else 389 rhs_code = gimple_assign_rhs_code (stmt); 390 391 /* Check the operation. */ 392 if (i == 0) 393 { 394 first_stmt_code = rhs_code; 395 396 /* Shift arguments should be equal in all the packed stmts for a 397 vector shift with scalar shift operand. */ 398 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR 399 || rhs_code == LROTATE_EXPR 400 || rhs_code == RROTATE_EXPR) 401 { 402 vec_mode = TYPE_MODE (vectype); 403 404 /* First see if we have a vector/vector shift. */ 405 optab = optab_for_tree_code (rhs_code, vectype, 406 optab_vector); 407 408 if (!optab 409 || (optab->handlers[(int) vec_mode].insn_code 410 == CODE_FOR_nothing)) 411 { 412 /* No vector/vector shift, try for a vector/scalar shift. */ 413 optab = optab_for_tree_code (rhs_code, vectype, 414 optab_scalar); 415 416 if (!optab) 417 { 418 if (vect_print_dump_info (REPORT_SLP)) 419 fprintf (vect_dump, "Build SLP failed: no optab."); 420 return false; 421 } 422 icode = (int) optab->handlers[(int) vec_mode].insn_code; 423 if (icode == CODE_FOR_nothing) 424 { 425 if (vect_print_dump_info (REPORT_SLP)) 426 fprintf (vect_dump, "Build SLP failed: " 427 "op not supported by target."); 428 return false; 429 } 430 optab_op2_mode = insn_data[icode].operand[2].mode; 431 if (!VECTOR_MODE_P (optab_op2_mode)) 432 { 433 need_same_oprnds = true; 434 first_op1 = gimple_assign_rhs2 (stmt); 435 } 436 } 437 } 438 } 439 else 440 { 441 if (first_stmt_code != rhs_code 442 && (first_stmt_code != IMAGPART_EXPR 443 || rhs_code != REALPART_EXPR) 444 && (first_stmt_code != REALPART_EXPR 445 || rhs_code != IMAGPART_EXPR)) 446 { 447 if (vect_print_dump_info (REPORT_SLP)) 448 { 449 fprintf (vect_dump, 450 "Build SLP failed: different operation in stmt "); 451 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 452 } 453 454 return false; 455 } 456 457 if (need_same_oprnds 458 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0)) 459 { 460 if (vect_print_dump_info (REPORT_SLP)) 461 { 462 fprintf (vect_dump, 463 "Build SLP failed: different shift arguments in "); 464 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 465 } 466 467 return false; 468 } 469 } 470 471 /* Strided store or load. */ 472 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))) 473 { 474 if (REFERENCE_CLASS_P (lhs)) 475 { 476 /* Store. */ 477 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, 478 stmt, &def_stmts0, &def_stmts1, 479 &first_stmt_dt0, 480 &first_stmt_dt1, 481 &first_stmt_def0_type, 482 &first_stmt_def1_type, 483 &first_stmt_const_oprnd, 484 ncopies_for_cost, 485 &pattern0, &pattern1)) 486 return false; 487 } 488 else 489 { 490 /* Load. */ 491 /* FORNOW: Check that there is no gap between the loads. */ 492 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt 493 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0) 494 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt 495 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)) 496 { 497 if (vect_print_dump_info (REPORT_SLP)) 498 { 499 fprintf (vect_dump, "Build SLP failed: strided " 500 "loads have gaps "); 501 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 502 } 503 504 return false; 505 } 506 507 /* Check that the size of interleaved loads group is not 508 greater than the SLP group size. */ 509 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) 510 > ncopies * group_size) 511 { 512 if (vect_print_dump_info (REPORT_SLP)) 513 { 514 fprintf (vect_dump, "Build SLP failed: the number of " 515 "interleaved loads is greater than" 516 " the SLP group size "); 517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 518 } 519 520 return false; 521 } 522 523 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)); 524 525 if (first_load == stmt) 526 { 527 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); 528 if (vect_supportable_dr_alignment (first_dr) 529 == dr_unaligned_unsupported) 530 { 531 if (vect_print_dump_info (REPORT_SLP)) 532 { 533 fprintf (vect_dump, "Build SLP failed: unsupported " 534 "unaligned load "); 535 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 536 } 537 538 return false; 539 } 540 541 /* Analyze costs (for the first stmt in the group). */ 542 vect_model_load_cost (vinfo_for_stmt (stmt), 543 ncopies_for_cost, *node); 544 } 545 546 /* Store the place of this load in the interleaving chain. In 547 case that permutation is needed we later decide if a specific 548 permutation is supported. */ 549 load_place = vect_get_place_in_interleaving_chain (stmt, 550 first_load); 551 if (load_place != i) 552 permutation = true; 553 554 VEC_safe_push (int, heap, *load_permutation, load_place); 555 556 /* We stop the tree when we reach a group of loads. */ 557 stop_recursion = true; 558 continue; 559 } 560 } /* Strided access. */ 561 else 562 { 563 if (TREE_CODE_CLASS (rhs_code) == tcc_reference) 564 { 565 /* Not strided load. */ 566 if (vect_print_dump_info (REPORT_SLP)) 567 { 568 fprintf (vect_dump, "Build SLP failed: not strided load "); 569 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 570 } 571 572 /* FORNOW: Not strided loads are not supported. */ 573 return false; 574 } 575 576 /* Not memory operation. */ 577 if (TREE_CODE_CLASS (rhs_code) != tcc_binary 578 && TREE_CODE_CLASS (rhs_code) != tcc_unary) 579 { 580 if (vect_print_dump_info (REPORT_SLP)) 581 { 582 fprintf (vect_dump, "Build SLP failed: operation"); 583 fprintf (vect_dump, " unsupported "); 584 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 585 } 586 587 return false; 588 } 589 590 /* Find the def-stmts. */ 591 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt, 592 &def_stmts0, &def_stmts1, 593 &first_stmt_dt0, &first_stmt_dt1, 594 &first_stmt_def0_type, 595 &first_stmt_def1_type, 596 &first_stmt_const_oprnd, 597 ncopies_for_cost, 598 &pattern0, &pattern1)) 599 return false; 600 } 601 } 602 603 /* Add the costs of the node to the overall instance costs. */ 604 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node); 605 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node); 606 607 /* Strided loads were reached - stop the recursion. */ 608 if (stop_recursion) 609 { 610 if (permutation) 611 { 612 VEC_safe_push (slp_tree, heap, *loads, *node); 613 *inside_cost += TARG_VEC_PERMUTE_COST * group_size; 614 } 615 616 return true; 617 } 618 619 /* Create SLP_TREE nodes for the definition node/s. */ 620 if (first_stmt_dt0 == vect_internal_def) 621 { 622 slp_tree left_node = XNEW (struct _slp_tree); 623 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0; 624 SLP_TREE_VEC_STMTS (left_node) = NULL; 625 SLP_TREE_LEFT (left_node) = NULL; 626 SLP_TREE_RIGHT (left_node) = NULL; 627 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0; 628 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0; 629 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size, 630 inside_cost, outside_cost, ncopies_for_cost, 631 max_nunits, load_permutation, loads, 632 vectorization_factor)) 633 return false; 634 635 SLP_TREE_LEFT (*node) = left_node; 636 } 637 638 if (first_stmt_dt1 == vect_internal_def) 639 { 640 slp_tree right_node = XNEW (struct _slp_tree); 641 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1; 642 SLP_TREE_VEC_STMTS (right_node) = NULL; 643 SLP_TREE_LEFT (right_node) = NULL; 644 SLP_TREE_RIGHT (right_node) = NULL; 645 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0; 646 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0; 647 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size, 648 inside_cost, outside_cost, ncopies_for_cost, 649 max_nunits, load_permutation, loads, 650 vectorization_factor)) 651 return false; 652 653 SLP_TREE_RIGHT (*node) = right_node; 654 } 655 656 return true; 657} 658 659 660static void 661vect_print_slp_tree (slp_tree node) 662{ 663 int i; 664 gimple stmt; 665 666 if (!node) 667 return; 668 669 fprintf (vect_dump, "node "); 670 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) 671 { 672 fprintf (vect_dump, "\n\tstmt %d ", i); 673 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 674 } 675 fprintf (vect_dump, "\n"); 676 677 vect_print_slp_tree (SLP_TREE_LEFT (node)); 678 vect_print_slp_tree (SLP_TREE_RIGHT (node)); 679} 680 681 682/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID). 683 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index 684 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the 685 stmts in NODE are to be marked. */ 686 687static void 688vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j) 689{ 690 int i; 691 gimple stmt; 692 693 if (!node) 694 return; 695 696 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) 697 if (j < 0 || i == j) 698 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; 699 700 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j); 701 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j); 702} 703 704 705/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */ 706 707static void 708vect_mark_slp_stmts_relevant (slp_tree node) 709{ 710 int i; 711 gimple stmt; 712 stmt_vec_info stmt_info; 713 714 if (!node) 715 return; 716 717 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) 718 { 719 stmt_info = vinfo_for_stmt (stmt); 720 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info) 721 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope); 722 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; 723 } 724 725 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node)); 726 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node)); 727} 728 729 730/* Check if the permutation required by the SLP INSTANCE is supported. 731 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */ 732 733static bool 734vect_supported_slp_permutation_p (slp_instance instance) 735{ 736 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0); 737 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); 738 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)); 739 VEC (slp_tree, heap) *sorted_loads = NULL; 740 int index; 741 slp_tree *tmp_loads = NULL; 742 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j; 743 slp_tree load; 744 745 /* FORNOW: The only supported loads permutation is loads from the same 746 location in all the loads in the node, when the data-refs in 747 nodes of LOADS constitute an interleaving chain. 748 Sort the nodes according to the order of accesses in the chain. */ 749 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size); 750 for (i = 0, j = 0; 751 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index) 752 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load); 753 i += group_size, j++) 754 { 755 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0); 756 /* Check that the loads are all in the same interleaving chain. */ 757 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load) 758 { 759 if (vect_print_dump_info (REPORT_DETAILS)) 760 { 761 fprintf (vect_dump, "Build SLP failed: unsupported data " 762 "permutation "); 763 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM); 764 } 765 766 free (tmp_loads); 767 return false; 768 } 769 770 tmp_loads[index] = load; 771 } 772 773 sorted_loads = VEC_alloc (slp_tree, heap, group_size); 774 for (i = 0; i < group_size; i++) 775 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]); 776 777 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance)); 778 SLP_INSTANCE_LOADS (instance) = sorted_loads; 779 free (tmp_loads); 780 781 if (!vect_transform_slp_perm_load (stmt, NULL, NULL, 782 SLP_INSTANCE_UNROLLING_FACTOR (instance), 783 instance, true)) 784 return false; 785 786 return true; 787} 788 789 790/* Check if the required load permutation is supported. 791 LOAD_PERMUTATION contains a list of indices of the loads. 792 In SLP this permutation is relative to the order of strided stores that are 793 the base of the SLP instance. */ 794 795static bool 796vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, 797 VEC (int, heap) *load_permutation) 798{ 799 int i = 0, j, prev = -1, next, k; 800 bool supported; 801 sbitmap load_index; 802 803 /* FORNOW: permutations are only supported in SLP. */ 804 if (!slp_instn) 805 return false; 806 807 if (vect_print_dump_info (REPORT_SLP)) 808 { 809 fprintf (vect_dump, "Load permutation "); 810 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++) 811 fprintf (vect_dump, "%d ", next); 812 } 813 814 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to 815 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as 816 well. */ 817 if (VEC_length (int, load_permutation) 818 != (unsigned int) (group_size * group_size)) 819 return false; 820 821 supported = true; 822 load_index = sbitmap_alloc (group_size); 823 sbitmap_zero (load_index); 824 for (j = 0; j < group_size; j++) 825 { 826 for (i = j * group_size, k = 0; 827 VEC_iterate (int, load_permutation, i, next) && k < group_size; 828 i++, k++) 829 { 830 if (i != j * group_size && next != prev) 831 { 832 supported = false; 833 break; 834 } 835 836 prev = next; 837 } 838 839 if (TEST_BIT (load_index, prev)) 840 { 841 supported = false; 842 break; 843 } 844 845 SET_BIT (load_index, prev); 846 } 847 848 for (j = 0; j < group_size; j++) 849 if (!TEST_BIT (load_index, j)) 850 return false; 851 852 sbitmap_free (load_index); 853 854 if (supported && i == group_size * group_size 855 && vect_supported_slp_permutation_p (slp_instn)) 856 return true; 857 858 return false; 859} 860 861 862/* Find the first load in the loop that belongs to INSTANCE. 863 When loads are in several SLP nodes, there can be a case in which the first 864 load does not appear in the first SLP node to be transformed, causing 865 incorrect order of statements. Since we generate all the loads together, 866 they must be inserted before the first load of the SLP instance and not 867 before the first load of the first node of the instance. */ 868static gimple 869vect_find_first_load_in_slp_instance (slp_instance instance) 870{ 871 int i, j; 872 slp_tree load_node; 873 gimple first_load = NULL, load; 874 875 for (i = 0; 876 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node); 877 i++) 878 for (j = 0; 879 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load); 880 j++) 881 first_load = get_earlier_stmt (load, first_load); 882 883 return first_load; 884} 885 886 887/* Analyze an SLP instance starting from a group of strided stores. Call 888 vect_build_slp_tree to build a tree of packed stmts if possible. 889 Return FALSE if it's impossible to SLP any stmt in the loop. */ 890 891static bool 892vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, 893 gimple stmt) 894{ 895 slp_instance new_instance; 896 slp_tree node = XNEW (struct _slp_tree); 897 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); 898 unsigned int unrolling_factor = 1, nunits; 899 tree vectype, scalar_type; 900 gimple next; 901 unsigned int vectorization_factor = 0; 902 int inside_cost = 0, outside_cost = 0, ncopies_for_cost; 903 unsigned int max_nunits = 0; 904 VEC (int, heap) *load_permutation; 905 VEC (slp_tree, heap) *loads; 906 907 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF ( 908 vinfo_for_stmt (stmt)))); 909 vectype = get_vectype_for_scalar_type (scalar_type); 910 if (!vectype) 911 { 912 if (vect_print_dump_info (REPORT_SLP)) 913 { 914 fprintf (vect_dump, "Build SLP failed: unsupported data-type "); 915 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 916 } 917 return false; 918 } 919 920 nunits = TYPE_VECTOR_SUBPARTS (vectype); 921 if (loop_vinfo) 922 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 923 else 924 /* No multitypes in BB SLP. */ 925 vectorization_factor = nunits; 926 927 /* Calculate the unrolling factor. */ 928 unrolling_factor = least_common_multiple (nunits, group_size) / group_size; 929 if (unrolling_factor != 1 && !loop_vinfo) 930 { 931 if (vect_print_dump_info (REPORT_SLP)) 932 fprintf (vect_dump, "Build SLP failed: unrolling required in basic" 933 " block SLP"); 934 935 return false; 936 } 937 938 /* Create a node (a root of the SLP tree) for the packed strided stores. */ 939 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size); 940 next = stmt; 941 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ 942 while (next) 943 { 944 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); 945 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); 946 } 947 948 SLP_TREE_VEC_STMTS (node) = NULL; 949 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; 950 SLP_TREE_LEFT (node) = NULL; 951 SLP_TREE_RIGHT (node) = NULL; 952 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; 953 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; 954 955 /* Calculate the number of vector stmts to create based on the unrolling 956 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is 957 GROUP_SIZE / NUNITS otherwise. */ 958 ncopies_for_cost = unrolling_factor * group_size / nunits; 959 960 load_permutation = VEC_alloc (int, heap, group_size * group_size); 961 loads = VEC_alloc (slp_tree, heap, group_size); 962 963 /* Build the tree for the SLP instance. */ 964 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, 965 &inside_cost, &outside_cost, ncopies_for_cost, 966 &max_nunits, &load_permutation, &loads, 967 vectorization_factor)) 968 { 969 /* Create a new SLP instance. */ 970 new_instance = XNEW (struct _slp_instance); 971 SLP_INSTANCE_TREE (new_instance) = node; 972 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; 973 /* Calculate the unrolling factor based on the smallest type in the 974 loop. */ 975 if (max_nunits > nunits) 976 unrolling_factor = least_common_multiple (max_nunits, group_size) 977 / group_size; 978 979 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; 980 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost; 981 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost; 982 SLP_INSTANCE_LOADS (new_instance) = loads; 983 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; 984 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; 985 if (VEC_length (slp_tree, loads)) 986 { 987 if (!vect_supported_load_permutation_p (new_instance, group_size, 988 load_permutation)) 989 { 990 if (vect_print_dump_info (REPORT_SLP)) 991 { 992 fprintf (vect_dump, "Build SLP failed: unsupported load " 993 "permutation "); 994 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 995 } 996 997 vect_free_slp_instance (new_instance); 998 return false; 999 } 1000 1001 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) 1002 = vect_find_first_load_in_slp_instance (new_instance); 1003 } 1004 else 1005 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance)); 1006 1007 if (loop_vinfo) 1008 VEC_safe_push (slp_instance, heap, 1009 LOOP_VINFO_SLP_INSTANCES (loop_vinfo), 1010 new_instance); 1011 else 1012 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo), 1013 new_instance); 1014 1015 if (vect_print_dump_info (REPORT_SLP)) 1016 vect_print_slp_tree (node); 1017 1018 return true; 1019 } 1020 1021 /* Failed to SLP. */ 1022 /* Free the allocated memory. */ 1023 vect_free_slp_tree (node); 1024 VEC_free (int, heap, load_permutation); 1025 VEC_free (slp_tree, heap, loads); 1026 1027 return false; 1028} 1029 1030 1031/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP 1032 trees of packed scalar stmts if SLP is possible. */ 1033 1034bool 1035vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 1036{ 1037 unsigned int i; 1038 VEC (gimple, heap) *strided_stores; 1039 gimple store; 1040 bool ok = false; 1041 1042 if (vect_print_dump_info (REPORT_SLP)) 1043 fprintf (vect_dump, "=== vect_analyze_slp ==="); 1044 1045 if (loop_vinfo) 1046 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo); 1047 else 1048 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo); 1049 1050 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++) 1051 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store)) 1052 ok = true; 1053 1054 if (bb_vinfo && !ok) 1055 { 1056 if (vect_print_dump_info (REPORT_SLP)) 1057 fprintf (vect_dump, "Failed to SLP the basic block."); 1058 1059 return false; 1060 } 1061 1062 return true; 1063} 1064 1065 1066/* For each possible SLP instance decide whether to SLP it and calculate overall 1067 unrolling factor needed to SLP the loop. */ 1068 1069void 1070vect_make_slp_decision (loop_vec_info loop_vinfo) 1071{ 1072 unsigned int i, unrolling_factor = 1; 1073 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); 1074 slp_instance instance; 1075 int decided_to_slp = 0; 1076 1077 if (vect_print_dump_info (REPORT_SLP)) 1078 fprintf (vect_dump, "=== vect_make_slp_decision ==="); 1079 1080 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) 1081 { 1082 /* FORNOW: SLP if you can. */ 1083 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance)) 1084 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance); 1085 1086 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we 1087 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and 1088 loop-based vectorization. Such stmts will be marked as HYBRID. */ 1089 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1); 1090 decided_to_slp++; 1091 } 1092 1093 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor; 1094 1095 if (decided_to_slp && vect_print_dump_info (REPORT_SLP)) 1096 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d", 1097 decided_to_slp, unrolling_factor); 1098} 1099 1100 1101/* Find stmts that must be both vectorized and SLPed (since they feed stmts that 1102 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */ 1103 1104static void 1105vect_detect_hybrid_slp_stmts (slp_tree node) 1106{ 1107 int i; 1108 gimple stmt; 1109 imm_use_iterator imm_iter; 1110 gimple use_stmt; 1111 stmt_vec_info stmt_vinfo; 1112 1113 if (!node) 1114 return; 1115 1116 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) 1117 if (PURE_SLP_STMT (vinfo_for_stmt (stmt)) 1118 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME) 1119 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0)) 1120 if ((stmt_vinfo = vinfo_for_stmt (use_stmt)) 1121 && !STMT_SLP_TYPE (stmt_vinfo) 1122 && (STMT_VINFO_RELEVANT (stmt_vinfo) 1123 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))) 1124 vect_mark_slp_stmts (node, hybrid, i); 1125 1126 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node)); 1127 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node)); 1128} 1129 1130 1131/* Find stmts that must be both vectorized and SLPed. */ 1132 1133void 1134vect_detect_hybrid_slp (loop_vec_info loop_vinfo) 1135{ 1136 unsigned int i; 1137 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); 1138 slp_instance instance; 1139 1140 if (vect_print_dump_info (REPORT_SLP)) 1141 fprintf (vect_dump, "=== vect_detect_hybrid_slp ==="); 1142 1143 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) 1144 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance)); 1145} 1146 1147 1148/* Create and initialize a new bb_vec_info struct for BB, as well as 1149 stmt_vec_info structs for all the stmts in it. */ 1150 1151static bb_vec_info 1152new_bb_vec_info (basic_block bb) 1153{ 1154 bb_vec_info res = NULL; 1155 gimple_stmt_iterator gsi; 1156 1157 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info)); 1158 BB_VINFO_BB (res) = bb; 1159 1160 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1161 { 1162 gimple stmt = gsi_stmt (gsi); 1163 gimple_set_uid (stmt, 0); 1164 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res)); 1165 } 1166 1167 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10); 1168 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2); 1169 1170 bb->aux = res; 1171 return res; 1172} 1173 1174 1175/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the 1176 stmts in the basic block. */ 1177 1178static void 1179destroy_bb_vec_info (bb_vec_info bb_vinfo) 1180{ 1181 basic_block bb; 1182 gimple_stmt_iterator si; 1183 1184 if (!bb_vinfo) 1185 return; 1186 1187 bb = BB_VINFO_BB (bb_vinfo); 1188 1189 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 1190 { 1191 gimple stmt = gsi_stmt (si); 1192 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1193 1194 if (stmt_info) 1195 /* Free stmt_vec_info. */ 1196 free_stmt_vec_info (stmt); 1197 } 1198 1199 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo)); 1200 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo)); 1201 free (bb_vinfo); 1202 bb->aux = NULL; 1203} 1204 1205 1206/* Analyze statements contained in SLP tree node after recursively analyzing 1207 the subtree. Return TRUE if the operations are supported. */ 1208 1209static bool 1210vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node) 1211{ 1212 bool dummy; 1213 int i; 1214 gimple stmt; 1215 1216 if (!node) 1217 return true; 1218 1219 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node)) 1220 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node))) 1221 return false; 1222 1223 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) 1224 { 1225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1226 gcc_assert (stmt_info); 1227 gcc_assert (PURE_SLP_STMT (stmt_info)); 1228 1229 if (!vect_analyze_stmt (stmt, &dummy, node)) 1230 return false; 1231 } 1232 1233 return true; 1234} 1235 1236 1237/* Analyze statements in SLP instances of the basic block. Return TRUE if the 1238 operations are supported. */ 1239 1240static bool 1241vect_slp_analyze_operations (bb_vec_info bb_vinfo) 1242{ 1243 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo); 1244 slp_instance instance; 1245 int i; 1246 1247 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); ) 1248 { 1249 if (!vect_slp_analyze_node_operations (bb_vinfo, 1250 SLP_INSTANCE_TREE (instance))) 1251 { 1252 vect_free_slp_instance (instance); 1253 VEC_ordered_remove (slp_instance, slp_instances, i); 1254 } 1255 else 1256 i++; 1257 } 1258 1259 if (!VEC_length (slp_instance, slp_instances)) 1260 return false; 1261 1262 return true; 1263} 1264 1265 1266/* Cheick if the basic block can be vectorized. */ 1267 1268bb_vec_info 1269vect_slp_analyze_bb (basic_block bb) 1270{ 1271 bb_vec_info bb_vinfo; 1272 VEC (ddr_p, heap) *ddrs; 1273 VEC (slp_instance, heap) *slp_instances; 1274 slp_instance instance; 1275 int i, insns = 0; 1276 gimple_stmt_iterator gsi; 1277 1278 if (vect_print_dump_info (REPORT_DETAILS)) 1279 fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); 1280 1281 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1282 { 1283 gimple stmt = gsi_stmt (gsi); 1284 if (!is_gimple_debug (stmt) 1285 && !gimple_nop_p (stmt) 1286 && gimple_code (stmt) != GIMPLE_LABEL) 1287 insns++; 1288 } 1289 1290 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) 1291 { 1292 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1293 fprintf (vect_dump, "not vectorized: too many instructions in basic " 1294 "block.\n"); 1295 1296 return NULL; 1297 } 1298 1299 bb_vinfo = new_bb_vec_info (bb); 1300 if (!bb_vinfo) 1301 return NULL; 1302 1303 if (!vect_analyze_data_refs (NULL, bb_vinfo)) 1304 { 1305 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1306 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic " 1307 "block.\n"); 1308 1309 destroy_bb_vec_info (bb_vinfo); 1310 return NULL; 1311 } 1312 1313 ddrs = BB_VINFO_DDRS (bb_vinfo); 1314 if (!VEC_length (ddr_p, ddrs)) 1315 { 1316 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1317 fprintf (vect_dump, "not vectorized: not enough data-refs in basic " 1318 "block.\n"); 1319 1320 destroy_bb_vec_info (bb_vinfo); 1321 return NULL; 1322 } 1323 1324 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo)) 1325 { 1326 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1327 fprintf (vect_dump, "not vectorized: bad data alignment in basic " 1328 "block.\n"); 1329 1330 destroy_bb_vec_info (bb_vinfo); 1331 return NULL; 1332 } 1333 1334 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo)) 1335 { 1336 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1337 fprintf (vect_dump, "not vectorized: unhandled data dependence in basic" 1338 " block.\n"); 1339 1340 destroy_bb_vec_info (bb_vinfo); 1341 return NULL; 1342 } 1343 1344 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo)) 1345 { 1346 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1347 fprintf (vect_dump, "not vectorized: unhandled data access in basic " 1348 "block.\n"); 1349 1350 destroy_bb_vec_info (bb_vinfo); 1351 return NULL; 1352 } 1353 1354 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo)) 1355 { 1356 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1357 fprintf (vect_dump, "not vectorized: unsupported alignment in basic " 1358 "block.\n"); 1359 1360 destroy_bb_vec_info (bb_vinfo); 1361 return NULL; 1362 } 1363 1364 /* Check the SLP opportunities in the basic block, analyze and build SLP 1365 trees. */ 1366 if (!vect_analyze_slp (NULL, bb_vinfo)) 1367 { 1368 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1369 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities " 1370 "in basic block.\n"); 1371 1372 destroy_bb_vec_info (bb_vinfo); 1373 return NULL; 1374 } 1375 1376 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo); 1377 1378 /* Mark all the statements that we want to vectorize as pure SLP and 1379 relevant. */ 1380 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) 1381 { 1382 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1); 1383 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance)); 1384 } 1385 1386 if (!vect_slp_analyze_operations (bb_vinfo)) 1387 { 1388 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1389 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n"); 1390 1391 destroy_bb_vec_info (bb_vinfo); 1392 return NULL; 1393 } 1394 1395 if (vect_print_dump_info (REPORT_DETAILS)) 1396 fprintf (vect_dump, "Basic block will be vectorized using SLP\n"); 1397 1398 return bb_vinfo; 1399} 1400 1401 1402/* SLP costs are calculated according to SLP instance unrolling factor (i.e., 1403 the number of created vector stmts depends on the unrolling factor). However, 1404 the actual number of vector stmts for every SLP node depends on VF which is 1405 set later in vect_analyze_operations(). Hence, SLP costs should be updated. 1406 In this function we assume that the inside costs calculated in 1407 vect_model_xxx_cost are linear in ncopies. */ 1408 1409void 1410vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo) 1411{ 1412 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1413 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); 1414 slp_instance instance; 1415 1416 if (vect_print_dump_info (REPORT_SLP)) 1417 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ==="); 1418 1419 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) 1420 /* We assume that costs are linear in ncopies. */ 1421 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf 1422 / SLP_INSTANCE_UNROLLING_FACTOR (instance); 1423} 1424 1425 1426/* For constant and loop invariant defs of SLP_NODE this function returns 1427 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. 1428 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar 1429 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */ 1430 1431static void 1432vect_get_constant_vectors (tree op, slp_tree slp_node, 1433 VEC(tree,heap) **vec_oprnds, 1434 unsigned int op_num, unsigned int number_of_vectors) 1435{ 1436 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node); 1437 gimple stmt = VEC_index (gimple, stmts, 0); 1438 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1439 int nunits; 1440 tree vec_cst; 1441 tree t = NULL_TREE; 1442 int j, number_of_places_left_in_vector; 1443 tree vector_type; 1444 tree vop; 1445 int group_size = VEC_length (gimple, stmts); 1446 unsigned int vec_num, i; 1447 int number_of_copies = 1; 1448 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors); 1449 bool constant_p, is_store; 1450 1451 if (STMT_VINFO_DATA_REF (stmt_vinfo)) 1452 { 1453 is_store = true; 1454 op = gimple_assign_rhs1 (stmt); 1455 } 1456 else 1457 is_store = false; 1458 1459 if (CONSTANT_CLASS_P (op)) 1460 constant_p = true; 1461 else 1462 constant_p = false; 1463 1464 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); 1465 gcc_assert (vector_type); 1466 1467 nunits = TYPE_VECTOR_SUBPARTS (vector_type); 1468 1469 /* NUMBER_OF_COPIES is the number of times we need to use the same values in 1470 created vectors. It is greater than 1 if unrolling is performed. 1471 1472 For example, we have two scalar operands, s1 and s2 (e.g., group of 1473 strided accesses of size two), while NUNITS is four (i.e., four scalars 1474 of this type can be packed in a vector). The output vector will contain 1475 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES 1476 will be 2). 1477 1478 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors 1479 containing the operands. 1480 1481 For example, NUNITS is four as before, and the group size is 8 1482 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and 1483 {s5, s6, s7, s8}. */ 1484 1485 number_of_copies = least_common_multiple (nunits, group_size) / group_size; 1486 1487 number_of_places_left_in_vector = nunits; 1488 for (j = 0; j < number_of_copies; j++) 1489 { 1490 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--) 1491 { 1492 if (is_store) 1493 op = gimple_assign_rhs1 (stmt); 1494 else 1495 op = gimple_op (stmt, op_num + 1); 1496 1497 /* Create 'vect_ = {op0,op1,...,opn}'. */ 1498 t = tree_cons (NULL_TREE, op, t); 1499 1500 number_of_places_left_in_vector--; 1501 1502 if (number_of_places_left_in_vector == 0) 1503 { 1504 number_of_places_left_in_vector = nunits; 1505 1506 if (constant_p) 1507 vec_cst = build_vector (vector_type, t); 1508 else 1509 vec_cst = build_constructor_from_list (vector_type, t); 1510 VEC_quick_push (tree, voprnds, 1511 vect_init_vector (stmt, vec_cst, vector_type, NULL)); 1512 t = NULL_TREE; 1513 } 1514 } 1515 } 1516 1517 /* Since the vectors are created in the reverse order, we should invert 1518 them. */ 1519 vec_num = VEC_length (tree, voprnds); 1520 for (j = vec_num - 1; j >= 0; j--) 1521 { 1522 vop = VEC_index (tree, voprnds, j); 1523 VEC_quick_push (tree, *vec_oprnds, vop); 1524 } 1525 1526 VEC_free (tree, heap, voprnds); 1527 1528 /* In case that VF is greater than the unrolling factor needed for the SLP 1529 group of stmts, NUMBER_OF_VECTORS to be created is greater than 1530 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have 1531 to replicate the vectors. */ 1532 while (number_of_vectors > VEC_length (tree, *vec_oprnds)) 1533 { 1534 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++) 1535 VEC_quick_push (tree, *vec_oprnds, vop); 1536 } 1537} 1538 1539 1540/* Get vectorized definitions from SLP_NODE that contains corresponding 1541 vectorized def-stmts. */ 1542 1543static void 1544vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds) 1545{ 1546 tree vec_oprnd; 1547 gimple vec_def_stmt; 1548 unsigned int i; 1549 1550 gcc_assert (SLP_TREE_VEC_STMTS (slp_node)); 1551 1552 for (i = 0; 1553 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt); 1554 i++) 1555 { 1556 gcc_assert (vec_def_stmt); 1557 vec_oprnd = gimple_get_lhs (vec_def_stmt); 1558 VEC_quick_push (tree, *vec_oprnds, vec_oprnd); 1559 } 1560} 1561 1562 1563/* Get vectorized definitions for SLP_NODE. 1564 If the scalar definitions are loop invariants or constants, collect them and 1565 call vect_get_constant_vectors() to create vector stmts. 1566 Otherwise, the def-stmts must be already vectorized and the vectorized stmts 1567 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call 1568 vect_get_slp_vect_defs() to retrieve them. 1569 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from 1570 the right node. This is used when the second operand must remain scalar. */ 1571 1572void 1573vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node, 1574 VEC (tree,heap) **vec_oprnds0, 1575 VEC (tree,heap) **vec_oprnds1) 1576{ 1577 gimple first_stmt; 1578 enum tree_code code; 1579 int number_of_vects; 1580 HOST_WIDE_INT lhs_size_unit, rhs_size_unit; 1581 1582 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); 1583 /* The number of vector defs is determined by the number of vector statements 1584 in the node from which we get those statements. */ 1585 if (SLP_TREE_LEFT (slp_node)) 1586 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node)); 1587 else 1588 { 1589 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 1590 /* Number of vector stmts was calculated according to LHS in 1591 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if 1592 necessary. See vect_get_smallest_scalar_type() for details. */ 1593 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, 1594 &rhs_size_unit); 1595 if (rhs_size_unit != lhs_size_unit) 1596 { 1597 number_of_vects *= rhs_size_unit; 1598 number_of_vects /= lhs_size_unit; 1599 } 1600 } 1601 1602 /* Allocate memory for vectorized defs. */ 1603 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects); 1604 1605 /* SLP_NODE corresponds either to a group of stores or to a group of 1606 unary/binary operations. We don't call this function for loads. */ 1607 if (SLP_TREE_LEFT (slp_node)) 1608 /* The defs are already vectorized. */ 1609 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0); 1610 else 1611 /* Build vectors from scalar defs. */ 1612 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects); 1613 1614 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))) 1615 /* Since we don't call this function with loads, this is a group of 1616 stores. */ 1617 return; 1618 1619 code = gimple_assign_rhs_code (first_stmt); 1620 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1) 1621 return; 1622 1623 /* The number of vector defs is determined by the number of vector statements 1624 in the node from which we get those statements. */ 1625 if (SLP_TREE_RIGHT (slp_node)) 1626 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node)); 1627 else 1628 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 1629 1630 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects); 1631 1632 if (SLP_TREE_RIGHT (slp_node)) 1633 /* The defs are already vectorized. */ 1634 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1); 1635 else 1636 /* Build vectors from scalar defs. */ 1637 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects); 1638} 1639 1640 1641/* Create NCOPIES permutation statements using the mask MASK_BYTES (by 1642 building a vector of type MASK_TYPE from it) and two input vectors placed in 1643 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and 1644 shifting by STRIDE elements of DR_CHAIN for every copy. 1645 (STRIDE is the number of vectorized stmts for NODE divided by the number of 1646 copies). 1647 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where 1648 the created stmts must be inserted. */ 1649 1650static inline void 1651vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt, 1652 tree mask, int first_vec_indx, int second_vec_indx, 1653 gimple_stmt_iterator *gsi, slp_tree node, 1654 tree builtin_decl, tree vectype, 1655 VEC(tree,heap) *dr_chain, 1656 int ncopies, int vect_stmts_counter) 1657{ 1658 tree perm_dest; 1659 gimple perm_stmt = NULL; 1660 stmt_vec_info next_stmt_info; 1661 int i, stride; 1662 tree first_vec, second_vec, data_ref; 1663 VEC (tree, heap) *params = NULL; 1664 1665 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies; 1666 1667 /* Initialize the vect stmts of NODE to properly insert the generated 1668 stmts later. */ 1669 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node)); 1670 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++) 1671 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL); 1672 1673 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); 1674 for (i = 0; i < ncopies; i++) 1675 { 1676 first_vec = VEC_index (tree, dr_chain, first_vec_indx); 1677 second_vec = VEC_index (tree, dr_chain, second_vec_indx); 1678 1679 /* Build argument list for the vectorized call. */ 1680 VEC_free (tree, heap, params); 1681 params = VEC_alloc (tree, heap, 3); 1682 VEC_quick_push (tree, params, first_vec); 1683 VEC_quick_push (tree, params, second_vec); 1684 VEC_quick_push (tree, params, mask); 1685 1686 /* Generate the permute statement. */ 1687 perm_stmt = gimple_build_call_vec (builtin_decl, params); 1688 data_ref = make_ssa_name (perm_dest, perm_stmt); 1689 gimple_call_set_lhs (perm_stmt, data_ref); 1690 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 1691 1692 /* Store the vector statement in NODE. */ 1693 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node), 1694 stride * i + vect_stmts_counter, perm_stmt); 1695 1696 first_vec_indx += stride; 1697 second_vec_indx += stride; 1698 } 1699 1700 /* Mark the scalar stmt as vectorized. */ 1701 next_stmt_info = vinfo_for_stmt (next_scalar_stmt); 1702 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt; 1703} 1704 1705 1706/* Given FIRST_MASK_ELEMENT - the mask element in element representation, 1707 return in CURRENT_MASK_ELEMENT its equivalent in target specific 1708 representation. Check that the mask is valid and return FALSE if not. 1709 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to 1710 the next vector, i.e., the current first vector is not needed. */ 1711 1712static bool 1713vect_get_mask_element (gimple stmt, int first_mask_element, int m, 1714 int mask_nunits, bool only_one_vec, int index, 1715 int *mask, int *current_mask_element, 1716 bool *need_next_vector, int *number_of_mask_fixes, 1717 bool *mask_fixed, bool *needs_first_vector) 1718{ 1719 int i; 1720 1721 /* Convert to target specific representation. */ 1722 *current_mask_element = first_mask_element + m; 1723 /* Adjust the value in case it's a mask for second and third vectors. */ 1724 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1); 1725 1726 if (*current_mask_element < mask_nunits) 1727 *needs_first_vector = true; 1728 1729 /* We have only one input vector to permute but the mask accesses values in 1730 the next vector as well. */ 1731 if (only_one_vec && *current_mask_element >= mask_nunits) 1732 { 1733 if (vect_print_dump_info (REPORT_DETAILS)) 1734 { 1735 fprintf (vect_dump, "permutation requires at least two vectors "); 1736 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 1737 } 1738 1739 return false; 1740 } 1741 1742 /* The mask requires the next vector. */ 1743 if (*current_mask_element >= mask_nunits * 2) 1744 { 1745 if (*needs_first_vector || *mask_fixed) 1746 { 1747 /* We either need the first vector too or have already moved to the 1748 next vector. In both cases, this permutation needs three 1749 vectors. */ 1750 if (vect_print_dump_info (REPORT_DETAILS)) 1751 { 1752 fprintf (vect_dump, "permutation requires at " 1753 "least three vectors "); 1754 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 1755 } 1756 1757 return false; 1758 } 1759 1760 /* We move to the next vector, dropping the first one and working with 1761 the second and the third - we need to adjust the values of the mask 1762 accordingly. */ 1763 *current_mask_element -= mask_nunits * *number_of_mask_fixes; 1764 1765 for (i = 0; i < index; i++) 1766 mask[i] -= mask_nunits * *number_of_mask_fixes; 1767 1768 (*number_of_mask_fixes)++; 1769 *mask_fixed = true; 1770 } 1771 1772 *need_next_vector = *mask_fixed; 1773 1774 /* This was the last element of this mask. Start a new one. */ 1775 if (index == mask_nunits - 1) 1776 { 1777 *number_of_mask_fixes = 1; 1778 *mask_fixed = false; 1779 *needs_first_vector = false; 1780 } 1781 1782 return true; 1783} 1784 1785 1786/* Generate vector permute statements from a list of loads in DR_CHAIN. 1787 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid 1788 permute statements for SLP_NODE_INSTANCE. */ 1789bool 1790vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain, 1791 gimple_stmt_iterator *gsi, int vf, 1792 slp_instance slp_node_instance, bool analyze_only) 1793{ 1794 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1795 tree mask_element_type = NULL_TREE, mask_type; 1796 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index; 1797 slp_tree node; 1798 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl; 1799 gimple next_scalar_stmt; 1800 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); 1801 int first_mask_element; 1802 int index, unroll_factor, *mask, current_mask_element, ncopies; 1803 bool only_one_vec = false, need_next_vector = false; 1804 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter; 1805 int number_of_mask_fixes = 1; 1806 bool mask_fixed = false; 1807 bool needs_first_vector = false; 1808 1809 if (!targetm.vectorize.builtin_vec_perm) 1810 { 1811 if (vect_print_dump_info (REPORT_DETAILS)) 1812 { 1813 fprintf (vect_dump, "no builtin for vect permute for "); 1814 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 1815 } 1816 1817 return false; 1818 } 1819 1820 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype, 1821 &mask_element_type); 1822 if (!builtin_decl || !mask_element_type) 1823 { 1824 if (vect_print_dump_info (REPORT_DETAILS)) 1825 { 1826 fprintf (vect_dump, "no builtin for vect permute for "); 1827 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 1828 } 1829 1830 return false; 1831 } 1832 1833 mask_type = get_vectype_for_scalar_type (mask_element_type); 1834 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type); 1835 mask = (int *) xmalloc (sizeof (int) * mask_nunits); 1836 nunits = TYPE_VECTOR_SUBPARTS (vectype); 1837 scale = mask_nunits / nunits; 1838 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); 1839 1840 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE 1841 unrolling factor. */ 1842 orig_vec_stmts_num = group_size * 1843 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits; 1844 if (orig_vec_stmts_num == 1) 1845 only_one_vec = true; 1846 1847 /* Number of copies is determined by the final vectorization factor 1848 relatively to SLP_NODE_INSTANCE unrolling factor. */ 1849 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); 1850 1851 /* Generate permutation masks for every NODE. Number of masks for each NODE 1852 is equal to GROUP_SIZE. 1853 E.g., we have a group of three nodes with three loads from the same 1854 location in each node, and the vector size is 4. I.e., we have a 1855 a0b0c0a1b1c1... sequence and we need to create the following vectors: 1856 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3 1857 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3 1858 ... 1859 1860 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target 1861 scpecific type, e.g., in bytes for Altivec. 1862 The last mask is illegal since we assume two operands for permute 1863 operation, and the mask element values can't be outside that range. Hence, 1864 the last mask must be converted into {2,5,5,5}. 1865 For the first two permutations we need the first and the second input 1866 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation 1867 we need the second and the third vectors: {b1,c1,a2,b2} and 1868 {c2,a3,b3,c3}. */ 1869 1870 for (i = 0; 1871 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), 1872 i, node); 1873 i++) 1874 { 1875 scalar_index = 0; 1876 index = 0; 1877 vect_stmts_counter = 0; 1878 vec_index = 0; 1879 first_vec_index = vec_index++; 1880 if (only_one_vec) 1881 second_vec_index = first_vec_index; 1882 else 1883 second_vec_index = vec_index++; 1884 1885 for (j = 0; j < unroll_factor; j++) 1886 { 1887 for (k = 0; k < group_size; k++) 1888 { 1889 first_mask_element = (i + j * group_size) * scale; 1890 for (m = 0; m < scale; m++) 1891 { 1892 if (!vect_get_mask_element (stmt, first_mask_element, m, 1893 mask_nunits, only_one_vec, index, mask, 1894 ¤t_mask_element, &need_next_vector, 1895 &number_of_mask_fixes, &mask_fixed, 1896 &needs_first_vector)) 1897 return false; 1898 1899 mask[index++] = current_mask_element; 1900 } 1901 1902 if (index == mask_nunits) 1903 { 1904 tree mask_vec = NULL; 1905 1906 while (--index >= 0) 1907 { 1908 tree t = build_int_cst (mask_element_type, mask[index]); 1909 mask_vec = tree_cons (NULL, t, mask_vec); 1910 } 1911 mask_vec = build_vector (mask_type, mask_vec); 1912 index = 0; 1913 1914 if (!targetm.vectorize.builtin_vec_perm_ok (vectype, 1915 mask_vec)) 1916 { 1917 if (vect_print_dump_info (REPORT_DETAILS)) 1918 { 1919 fprintf (vect_dump, "unsupported vect permute "); 1920 print_generic_expr (vect_dump, mask_vec, 0); 1921 } 1922 free (mask); 1923 return false; 1924 } 1925 1926 if (!analyze_only) 1927 { 1928 if (need_next_vector) 1929 { 1930 first_vec_index = second_vec_index; 1931 second_vec_index = vec_index; 1932 } 1933 1934 next_scalar_stmt = VEC_index (gimple, 1935 SLP_TREE_SCALAR_STMTS (node), scalar_index++); 1936 1937 vect_create_mask_and_perm (stmt, next_scalar_stmt, 1938 mask_vec, first_vec_index, second_vec_index, 1939 gsi, node, builtin_decl, vectype, dr_chain, 1940 ncopies, vect_stmts_counter++); 1941 } 1942 } 1943 } 1944 } 1945 } 1946 1947 free (mask); 1948 return true; 1949} 1950 1951 1952 1953/* Vectorize SLP instance tree in postorder. */ 1954 1955static bool 1956vect_schedule_slp_instance (slp_tree node, slp_instance instance, 1957 unsigned int vectorization_factor) 1958{ 1959 gimple stmt; 1960 bool strided_store, is_store; 1961 gimple_stmt_iterator si; 1962 stmt_vec_info stmt_info; 1963 unsigned int vec_stmts_size, nunits, group_size; 1964 tree vectype; 1965 int i; 1966 slp_tree loads_node; 1967 1968 if (!node) 1969 return false; 1970 1971 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance, 1972 vectorization_factor); 1973 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance, 1974 vectorization_factor); 1975 1976 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); 1977 stmt_info = vinfo_for_stmt (stmt); 1978 1979 /* VECTYPE is the type of the destination. */ 1980 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt))); 1981 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype); 1982 group_size = SLP_INSTANCE_GROUP_SIZE (instance); 1983 1984 /* For each SLP instance calculate number of vector stmts to be created 1985 for the scalar stmts in each node of the SLP tree. Number of vector 1986 elements in one vector iteration is the number of scalar elements in 1987 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector 1988 size. */ 1989 vec_stmts_size = (vectorization_factor * group_size) / nunits; 1990 1991 /* In case of load permutation we have to allocate vectorized statements for 1992 all the nodes that participate in that permutation. */ 1993 if (SLP_INSTANCE_LOAD_PERMUTATION (instance)) 1994 { 1995 for (i = 0; 1996 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node); 1997 i++) 1998 { 1999 if (!SLP_TREE_VEC_STMTS (loads_node)) 2000 { 2001 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap, 2002 vec_stmts_size); 2003 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size; 2004 } 2005 } 2006 } 2007 2008 if (!SLP_TREE_VEC_STMTS (node)) 2009 { 2010 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size); 2011 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size; 2012 } 2013 2014 if (vect_print_dump_info (REPORT_DETAILS)) 2015 { 2016 fprintf (vect_dump, "------>vectorizing SLP node starting from: "); 2017 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 2018 } 2019 2020 /* Loads should be inserted before the first load. */ 2021 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance) 2022 && STMT_VINFO_STRIDED_ACCESS (stmt_info) 2023 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))) 2024 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); 2025 else 2026 si = gsi_for_stmt (stmt); 2027 2028 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance); 2029 if (is_store) 2030 { 2031 if (DR_GROUP_FIRST_DR (stmt_info)) 2032 /* If IS_STORE is TRUE, the vectorization of the 2033 interleaving chain was completed - free all the stores in 2034 the chain. */ 2035 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info)); 2036 else 2037 /* FORNOW: SLP originates only from strided stores. */ 2038 gcc_unreachable (); 2039 2040 return true; 2041 } 2042 2043 /* FORNOW: SLP originates only from strided stores. */ 2044 return false; 2045} 2046 2047 2048bool 2049vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 2050{ 2051 VEC (slp_instance, heap) *slp_instances; 2052 slp_instance instance; 2053 unsigned int i, vf; 2054 bool is_store = false; 2055 2056 if (loop_vinfo) 2057 { 2058 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); 2059 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2060 } 2061 else 2062 { 2063 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo); 2064 vf = 1; 2065 } 2066 2067 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) 2068 { 2069 /* Schedule the tree of INSTANCE. */ 2070 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), 2071 instance, vf); 2072 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS) 2073 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2074 fprintf (vect_dump, "vectorizing stmts using SLP."); 2075 } 2076 2077 return is_store; 2078} 2079 2080 2081/* Vectorize the basic block. */ 2082 2083void 2084vect_slp_transform_bb (basic_block bb) 2085{ 2086 bb_vec_info bb_vinfo = vec_info_for_bb (bb); 2087 gimple_stmt_iterator si; 2088 2089 gcc_assert (bb_vinfo); 2090 2091 if (vect_print_dump_info (REPORT_DETAILS)) 2092 fprintf (vect_dump, "SLPing BB\n"); 2093 2094 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 2095 { 2096 gimple stmt = gsi_stmt (si); 2097 stmt_vec_info stmt_info; 2098 2099 if (vect_print_dump_info (REPORT_DETAILS)) 2100 { 2101 fprintf (vect_dump, "------>SLPing statement: "); 2102 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 2103 } 2104 2105 stmt_info = vinfo_for_stmt (stmt); 2106 gcc_assert (stmt_info); 2107 2108 /* Schedule all the SLP instances when the first SLP stmt is reached. */ 2109 if (STMT_SLP_TYPE (stmt_info)) 2110 { 2111 vect_schedule_slp (NULL, bb_vinfo); 2112 break; 2113 } 2114 } 2115 2116 mark_sym_for_renaming (gimple_vop (cfun)); 2117 /* The memory tags and pointers in vectorized statements need to 2118 have their SSA forms updated. FIXME, why can't this be delayed 2119 until all the loops have been transformed? */ 2120 update_ssa (TODO_update_ssa); 2121 2122 if (vect_print_dump_info (REPORT_DETAILS)) 2123 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n"); 2124 2125 destroy_bb_vec_info (bb_vinfo); 2126} 2127 2128