1/* Statement Analysis and Transformation for Vectorization 2 Copyright (C) 2003-2015 Free Software Foundation, Inc. 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 4 and Ira Rosen <irar@il.ibm.com> 5 6This file is part of GCC. 7 8GCC is free software; you can redistribute it and/or modify it under 9the terms of the GNU General Public License as published by the Free 10Software Foundation; either version 3, or (at your option) any later 11version. 12 13GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14WARRANTY; without even the implied warranty of MERCHANTABILITY or 15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16for more details. 17 18You should have received a copy of the GNU General Public License 19along with GCC; see the file COPYING3. If not see 20<http://www.gnu.org/licenses/>. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "dumpfile.h" 26#include "tm.h" 27#include "hash-set.h" 28#include "machmode.h" 29#include "vec.h" 30#include "double-int.h" 31#include "input.h" 32#include "alias.h" 33#include "symtab.h" 34#include "wide-int.h" 35#include "inchash.h" 36#include "tree.h" 37#include "fold-const.h" 38#include "stor-layout.h" 39#include "target.h" 40#include "predict.h" 41#include "hard-reg-set.h" 42#include "function.h" 43#include "dominance.h" 44#include "cfg.h" 45#include "basic-block.h" 46#include "gimple-pretty-print.h" 47#include "tree-ssa-alias.h" 48#include "internal-fn.h" 49#include "tree-eh.h" 50#include "gimple-expr.h" 51#include "is-a.h" 52#include "gimple.h" 53#include "gimplify.h" 54#include "gimple-iterator.h" 55#include "gimplify-me.h" 56#include "gimple-ssa.h" 57#include "tree-cfg.h" 58#include "tree-phinodes.h" 59#include "ssa-iterators.h" 60#include "stringpool.h" 61#include "tree-ssanames.h" 62#include "tree-ssa-loop-manip.h" 63#include "cfgloop.h" 64#include "tree-ssa-loop.h" 65#include "tree-scalar-evolution.h" 66#include "hashtab.h" 67#include "rtl.h" 68#include "flags.h" 69#include "statistics.h" 70#include "real.h" 71#include "fixed-value.h" 72#include "insn-config.h" 73#include "expmed.h" 74#include "dojump.h" 75#include "explow.h" 76#include "calls.h" 77#include "emit-rtl.h" 78#include "varasm.h" 79#include "stmt.h" 80#include "expr.h" 81#include "recog.h" /* FIXME: for insn_data */ 82#include "insn-codes.h" 83#include "optabs.h" 84#include "diagnostic-core.h" 85#include "tree-vectorizer.h" 86#include "hash-map.h" 87#include "plugin-api.h" 88#include "ipa-ref.h" 89#include "cgraph.h" 90#include "builtins.h" 91 92/* For lang_hooks.types.type_for_mode. */ 93#include "langhooks.h" 94 95/* Return the vectorized type for the given statement. */ 96 97tree 98stmt_vectype (struct _stmt_vec_info *stmt_info) 99{ 100 return STMT_VINFO_VECTYPE (stmt_info); 101} 102 103/* Return TRUE iff the given statement is in an inner loop relative to 104 the loop being vectorized. */ 105bool 106stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info) 107{ 108 gimple stmt = STMT_VINFO_STMT (stmt_info); 109 basic_block bb = gimple_bb (stmt); 110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 111 struct loop* loop; 112 113 if (!loop_vinfo) 114 return false; 115 116 loop = LOOP_VINFO_LOOP (loop_vinfo); 117 118 return (bb->loop_father == loop->inner); 119} 120 121/* Record the cost of a statement, either by directly informing the 122 target model or by saving it in a vector for later processing. 123 Return a preliminary estimate of the statement's cost. */ 124 125unsigned 126record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, 127 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, 128 int misalign, enum vect_cost_model_location where) 129{ 130 if (body_cost_vec) 131 { 132 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; 133 add_stmt_info_to_vec (body_cost_vec, count, kind, 134 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL, 135 misalign); 136 return (unsigned) 137 (builtin_vectorization_cost (kind, vectype, misalign) * count); 138 139 } 140 else 141 { 142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 144 void *target_cost_data; 145 146 if (loop_vinfo) 147 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); 148 else 149 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo); 150 151 return add_stmt_cost (target_cost_data, count, kind, stmt_info, 152 misalign, where); 153 } 154} 155 156/* Return a variable of type ELEM_TYPE[NELEMS]. */ 157 158static tree 159create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems) 160{ 161 return create_tmp_var (build_array_type_nelts (elem_type, nelems), 162 "vect_array"); 163} 164 165/* ARRAY is an array of vectors created by create_vector_array. 166 Return an SSA_NAME for the vector in index N. The reference 167 is part of the vectorization of STMT and the vector is associated 168 with scalar destination SCALAR_DEST. */ 169 170static tree 171read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest, 172 tree array, unsigned HOST_WIDE_INT n) 173{ 174 tree vect_type, vect, vect_name, array_ref; 175 gimple new_stmt; 176 177 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE); 178 vect_type = TREE_TYPE (TREE_TYPE (array)); 179 vect = vect_create_destination_var (scalar_dest, vect_type); 180 array_ref = build4 (ARRAY_REF, vect_type, array, 181 build_int_cst (size_type_node, n), 182 NULL_TREE, NULL_TREE); 183 184 new_stmt = gimple_build_assign (vect, array_ref); 185 vect_name = make_ssa_name (vect, new_stmt); 186 gimple_assign_set_lhs (new_stmt, vect_name); 187 vect_finish_stmt_generation (stmt, new_stmt, gsi); 188 189 return vect_name; 190} 191 192/* ARRAY is an array of vectors created by create_vector_array. 193 Emit code to store SSA_NAME VECT in index N of the array. 194 The store is part of the vectorization of STMT. */ 195 196static void 197write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect, 198 tree array, unsigned HOST_WIDE_INT n) 199{ 200 tree array_ref; 201 gimple new_stmt; 202 203 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array, 204 build_int_cst (size_type_node, n), 205 NULL_TREE, NULL_TREE); 206 207 new_stmt = gimple_build_assign (array_ref, vect); 208 vect_finish_stmt_generation (stmt, new_stmt, gsi); 209} 210 211/* PTR is a pointer to an array of type TYPE. Return a representation 212 of *PTR. The memory reference replaces those in FIRST_DR 213 (and its group). */ 214 215static tree 216create_array_ref (tree type, tree ptr, struct data_reference *first_dr) 217{ 218 tree mem_ref, alias_ptr_type; 219 220 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr)); 221 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0)); 222 /* Arrays have the same alignment as their type. */ 223 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0); 224 return mem_ref; 225} 226 227/* Utility functions used by vect_mark_stmts_to_be_vectorized. */ 228 229/* Function vect_mark_relevant. 230 231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ 232 233static void 234vect_mark_relevant (vec<gimple> *worklist, gimple stmt, 235 enum vect_relevant relevant, bool live_p, 236 bool used_in_pattern) 237{ 238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); 240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); 241 gimple pattern_stmt; 242 243 if (dump_enabled_p ()) 244 dump_printf_loc (MSG_NOTE, vect_location, 245 "mark relevant %d, live %d.\n", relevant, live_p); 246 247 /* If this stmt is an original stmt in a pattern, we might need to mark its 248 related pattern stmt instead of the original stmt. However, such stmts 249 may have their own uses that are not in any pattern, in such cases the 250 stmt itself should be marked. */ 251 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 252 { 253 bool found = false; 254 if (!used_in_pattern) 255 { 256 imm_use_iterator imm_iter; 257 use_operand_p use_p; 258 gimple use_stmt; 259 tree lhs; 260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 262 263 if (is_gimple_assign (stmt)) 264 lhs = gimple_assign_lhs (stmt); 265 else 266 lhs = gimple_call_lhs (stmt); 267 268 /* This use is out of pattern use, if LHS has other uses that are 269 pattern uses, we should mark the stmt itself, and not the pattern 270 stmt. */ 271 if (lhs && TREE_CODE (lhs) == SSA_NAME) 272 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 273 { 274 if (is_gimple_debug (USE_STMT (use_p))) 275 continue; 276 use_stmt = USE_STMT (use_p); 277 278 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) 279 continue; 280 281 if (vinfo_for_stmt (use_stmt) 282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) 283 { 284 found = true; 285 break; 286 } 287 } 288 } 289 290 if (!found) 291 { 292 /* This is the last stmt in a sequence that was detected as a 293 pattern that can potentially be vectorized. Don't mark the stmt 294 as relevant/live because it's not going to be vectorized. 295 Instead mark the pattern-stmt that replaces it. */ 296 297 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 298 299 if (dump_enabled_p ()) 300 dump_printf_loc (MSG_NOTE, vect_location, 301 "last stmt in pattern. don't mark" 302 " relevant/live.\n"); 303 stmt_info = vinfo_for_stmt (pattern_stmt); 304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); 305 save_relevant = STMT_VINFO_RELEVANT (stmt_info); 306 save_live_p = STMT_VINFO_LIVE_P (stmt_info); 307 stmt = pattern_stmt; 308 } 309 } 310 311 STMT_VINFO_LIVE_P (stmt_info) |= live_p; 312 if (relevant > STMT_VINFO_RELEVANT (stmt_info)) 313 STMT_VINFO_RELEVANT (stmt_info) = relevant; 314 315 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant 316 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p) 317 { 318 if (dump_enabled_p ()) 319 dump_printf_loc (MSG_NOTE, vect_location, 320 "already marked relevant/live.\n"); 321 return; 322 } 323 324 worklist->safe_push (stmt); 325} 326 327 328/* Function vect_stmt_relevant_p. 329 330 Return true if STMT in loop that is represented by LOOP_VINFO is 331 "relevant for vectorization". 332 333 A stmt is considered "relevant for vectorization" if: 334 - it has uses outside the loop. 335 - it has vdefs (it alters memory). 336 - control stmts in the loop (except for the exit condition). 337 338 CHECKME: what other side effects would the vectorizer allow? */ 339 340static bool 341vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo, 342 enum vect_relevant *relevant, bool *live_p) 343{ 344 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 345 ssa_op_iter op_iter; 346 imm_use_iterator imm_iter; 347 use_operand_p use_p; 348 def_operand_p def_p; 349 350 *relevant = vect_unused_in_scope; 351 *live_p = false; 352 353 /* cond stmt other than loop exit cond. */ 354 if (is_ctrl_stmt (stmt) 355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) 356 != loop_exit_ctrl_vec_info_type) 357 *relevant = vect_used_in_scope; 358 359 /* changing memory. */ 360 if (gimple_code (stmt) != GIMPLE_PHI) 361 if (gimple_vdef (stmt) 362 && !gimple_clobber_p (stmt)) 363 { 364 if (dump_enabled_p ()) 365 dump_printf_loc (MSG_NOTE, vect_location, 366 "vec_stmt_relevant_p: stmt has vdefs.\n"); 367 *relevant = vect_used_in_scope; 368 } 369 370 /* uses outside the loop. */ 371 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF) 372 { 373 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) 374 { 375 basic_block bb = gimple_bb (USE_STMT (use_p)); 376 if (!flow_bb_inside_loop_p (loop, bb)) 377 { 378 if (dump_enabled_p ()) 379 dump_printf_loc (MSG_NOTE, vect_location, 380 "vec_stmt_relevant_p: used out of loop.\n"); 381 382 if (is_gimple_debug (USE_STMT (use_p))) 383 continue; 384 385 /* We expect all such uses to be in the loop exit phis 386 (because of loop closed form) */ 387 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); 388 gcc_assert (bb == single_exit (loop)->dest); 389 390 *live_p = true; 391 } 392 } 393 } 394 395 return (*live_p || *relevant); 396} 397 398 399/* Function exist_non_indexing_operands_for_use_p 400 401 USE is one of the uses attached to STMT. Check if USE is 402 used in STMT for anything other than indexing an array. */ 403 404static bool 405exist_non_indexing_operands_for_use_p (tree use, gimple stmt) 406{ 407 tree operand; 408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 409 410 /* USE corresponds to some operand in STMT. If there is no data 411 reference in STMT, then any operand that corresponds to USE 412 is not indexing an array. */ 413 if (!STMT_VINFO_DATA_REF (stmt_info)) 414 return true; 415 416 /* STMT has a data_ref. FORNOW this means that its of one of 417 the following forms: 418 -1- ARRAY_REF = var 419 -2- var = ARRAY_REF 420 (This should have been verified in analyze_data_refs). 421 422 'var' in the second case corresponds to a def, not a use, 423 so USE cannot correspond to any operands that are not used 424 for array indexing. 425 426 Therefore, all we need to check is if STMT falls into the 427 first case, and whether var corresponds to USE. */ 428 429 if (!gimple_assign_copy_p (stmt)) 430 { 431 if (is_gimple_call (stmt) 432 && gimple_call_internal_p (stmt)) 433 switch (gimple_call_internal_fn (stmt)) 434 { 435 case IFN_MASK_STORE: 436 operand = gimple_call_arg (stmt, 3); 437 if (operand == use) 438 return true; 439 /* FALLTHRU */ 440 case IFN_MASK_LOAD: 441 operand = gimple_call_arg (stmt, 2); 442 if (operand == use) 443 return true; 444 break; 445 default: 446 break; 447 } 448 return false; 449 } 450 451 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME) 452 return false; 453 operand = gimple_assign_rhs1 (stmt); 454 if (TREE_CODE (operand) != SSA_NAME) 455 return false; 456 457 if (operand == use) 458 return true; 459 460 return false; 461} 462 463 464/* 465 Function process_use. 466 467 Inputs: 468 - a USE in STMT in a loop represented by LOOP_VINFO 469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt 470 that defined USE. This is done by calling mark_relevant and passing it 471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant). 472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't 473 be performed. 474 475 Outputs: 476 Generally, LIVE_P and RELEVANT are used to define the liveness and 477 relevance info of the DEF_STMT of this USE: 478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p 479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant 480 Exceptions: 481 - case 1: If USE is used only for address computations (e.g. array indexing), 482 which does not need to be directly vectorized, then the liveness/relevance 483 of the respective DEF_STMT is left unchanged. 484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we 485 skip DEF_STMT cause it had already been processed. 486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will 487 be modified accordingly. 488 489 Return true if everything is as expected. Return false otherwise. */ 490 491static bool 492process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, 493 enum vect_relevant relevant, vec<gimple> *worklist, 494 bool force) 495{ 496 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 497 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 498 stmt_vec_info dstmt_vinfo; 499 basic_block bb, def_bb; 500 tree def; 501 gimple def_stmt; 502 enum vect_def_type dt; 503 504 /* case 1: we are only interested in uses that need to be vectorized. Uses 505 that are used for address computation are not considered relevant. */ 506 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt)) 507 return true; 508 509 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt)) 510 { 511 if (dump_enabled_p ()) 512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 513 "not vectorized: unsupported use in stmt.\n"); 514 return false; 515 } 516 517 if (!def_stmt || gimple_nop_p (def_stmt)) 518 return true; 519 520 def_bb = gimple_bb (def_stmt); 521 if (!flow_bb_inside_loop_p (loop, def_bb)) 522 { 523 if (dump_enabled_p ()) 524 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n"); 525 return true; 526 } 527 528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT). 529 DEF_STMT must have already been processed, because this should be the 530 only way that STMT, which is a reduction-phi, was put in the worklist, 531 as there should be no other uses for DEF_STMT in the loop. So we just 532 check that everything is as expected, and we are done. */ 533 dstmt_vinfo = vinfo_for_stmt (def_stmt); 534 bb = gimple_bb (stmt); 535 if (gimple_code (stmt) == GIMPLE_PHI 536 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 537 && gimple_code (def_stmt) != GIMPLE_PHI 538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def 539 && bb->loop_father == def_bb->loop_father) 540 { 541 if (dump_enabled_p ()) 542 dump_printf_loc (MSG_NOTE, vect_location, 543 "reduc-stmt defining reduc-phi in the same nest.\n"); 544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo)) 545 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo)); 546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction); 547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo) 548 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope); 549 return true; 550 } 551 552 /* case 3a: outer-loop stmt defining an inner-loop stmt: 553 outer-loop-header-bb: 554 d = def_stmt 555 inner-loop: 556 stmt # use (d) 557 outer-loop-tail-bb: 558 ... */ 559 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father)) 560 { 561 if (dump_enabled_p ()) 562 dump_printf_loc (MSG_NOTE, vect_location, 563 "outer-loop def-stmt defining inner-loop stmt.\n"); 564 565 switch (relevant) 566 { 567 case vect_unused_in_scope: 568 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ? 569 vect_used_in_scope : vect_unused_in_scope; 570 break; 571 572 case vect_used_in_outer_by_reduction: 573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 574 relevant = vect_used_by_reduction; 575 break; 576 577 case vect_used_in_outer: 578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 579 relevant = vect_used_in_scope; 580 break; 581 582 case vect_used_in_scope: 583 break; 584 585 default: 586 gcc_unreachable (); 587 } 588 } 589 590 /* case 3b: inner-loop stmt defining an outer-loop stmt: 591 outer-loop-header-bb: 592 ... 593 inner-loop: 594 d = def_stmt 595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction): 596 stmt # use (d) */ 597 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father)) 598 { 599 if (dump_enabled_p ()) 600 dump_printf_loc (MSG_NOTE, vect_location, 601 "inner-loop def-stmt defining outer-loop stmt.\n"); 602 603 switch (relevant) 604 { 605 case vect_unused_in_scope: 606 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 607 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ? 608 vect_used_in_outer_by_reduction : vect_unused_in_scope; 609 break; 610 611 case vect_used_by_reduction: 612 relevant = vect_used_in_outer_by_reduction; 613 break; 614 615 case vect_used_in_scope: 616 relevant = vect_used_in_outer; 617 break; 618 619 default: 620 gcc_unreachable (); 621 } 622 } 623 624 vect_mark_relevant (worklist, def_stmt, relevant, live_p, 625 is_pattern_stmt_p (stmt_vinfo)); 626 return true; 627} 628 629 630/* Function vect_mark_stmts_to_be_vectorized. 631 632 Not all stmts in the loop need to be vectorized. For example: 633 634 for i... 635 for j... 636 1. T0 = i + j 637 2. T1 = a[T0] 638 639 3. j = j + 1 640 641 Stmt 1 and 3 do not need to be vectorized, because loop control and 642 addressing of vectorized data-refs are handled differently. 643 644 This pass detects such stmts. */ 645 646bool 647vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) 648{ 649 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 650 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 651 unsigned int nbbs = loop->num_nodes; 652 gimple_stmt_iterator si; 653 gimple stmt; 654 unsigned int i; 655 stmt_vec_info stmt_vinfo; 656 basic_block bb; 657 gimple phi; 658 bool live_p; 659 enum vect_relevant relevant, tmp_relevant; 660 enum vect_def_type def_type; 661 662 if (dump_enabled_p ()) 663 dump_printf_loc (MSG_NOTE, vect_location, 664 "=== vect_mark_stmts_to_be_vectorized ===\n"); 665 666 auto_vec<gimple, 64> worklist; 667 668 /* 1. Init worklist. */ 669 for (i = 0; i < nbbs; i++) 670 { 671 bb = bbs[i]; 672 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) 673 { 674 phi = gsi_stmt (si); 675 if (dump_enabled_p ()) 676 { 677 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? "); 678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); 679 } 680 681 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) 682 vect_mark_relevant (&worklist, phi, relevant, live_p, false); 683 } 684 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 685 { 686 stmt = gsi_stmt (si); 687 if (dump_enabled_p ()) 688 { 689 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? "); 690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 691 } 692 693 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) 694 vect_mark_relevant (&worklist, stmt, relevant, live_p, false); 695 } 696 } 697 698 /* 2. Process_worklist */ 699 while (worklist.length () > 0) 700 { 701 use_operand_p use_p; 702 ssa_op_iter iter; 703 704 stmt = worklist.pop (); 705 if (dump_enabled_p ()) 706 { 707 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: "); 708 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 709 } 710 711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it 712 (DEF_STMT) as relevant/irrelevant and live/dead according to the 713 liveness and relevance properties of STMT. */ 714 stmt_vinfo = vinfo_for_stmt (stmt); 715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo); 716 live_p = STMT_VINFO_LIVE_P (stmt_vinfo); 717 718 /* Generally, the liveness and relevance properties of STMT are 719 propagated as is to the DEF_STMTs of its USEs: 720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO) 721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO) 722 723 One exception is when STMT has been identified as defining a reduction 724 variable; in this case we set the liveness/relevance as follows: 725 live_p = false 726 relevant = vect_used_by_reduction 727 This is because we distinguish between two kinds of relevant stmts - 728 those that are used by a reduction computation, and those that are 729 (also) used by a regular computation. This allows us later on to 730 identify stmts that are used solely by a reduction, and therefore the 731 order of the results that they produce does not have to be kept. */ 732 733 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo); 734 tmp_relevant = relevant; 735 switch (def_type) 736 { 737 case vect_reduction_def: 738 switch (tmp_relevant) 739 { 740 case vect_unused_in_scope: 741 relevant = vect_used_by_reduction; 742 break; 743 744 case vect_used_by_reduction: 745 if (gimple_code (stmt) == GIMPLE_PHI) 746 break; 747 /* fall through */ 748 749 default: 750 if (dump_enabled_p ()) 751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 752 "unsupported use of reduction.\n"); 753 return false; 754 } 755 756 live_p = false; 757 break; 758 759 case vect_nested_cycle: 760 if (tmp_relevant != vect_unused_in_scope 761 && tmp_relevant != vect_used_in_outer_by_reduction 762 && tmp_relevant != vect_used_in_outer) 763 { 764 if (dump_enabled_p ()) 765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 766 "unsupported use of nested cycle.\n"); 767 768 return false; 769 } 770 771 live_p = false; 772 break; 773 774 case vect_double_reduction_def: 775 if (tmp_relevant != vect_unused_in_scope 776 && tmp_relevant != vect_used_by_reduction) 777 { 778 if (dump_enabled_p ()) 779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 780 "unsupported use of double reduction.\n"); 781 782 return false; 783 } 784 785 live_p = false; 786 break; 787 788 default: 789 break; 790 } 791 792 if (is_pattern_stmt_p (stmt_vinfo)) 793 { 794 /* Pattern statements are not inserted into the code, so 795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we 796 have to scan the RHS or function arguments instead. */ 797 if (is_gimple_assign (stmt)) 798 { 799 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 800 tree op = gimple_assign_rhs1 (stmt); 801 802 i = 1; 803 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op)) 804 { 805 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo, 806 live_p, relevant, &worklist, false) 807 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo, 808 live_p, relevant, &worklist, false)) 809 return false; 810 i = 2; 811 } 812 for (; i < gimple_num_ops (stmt); i++) 813 { 814 op = gimple_op (stmt, i); 815 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, 816 &worklist, false)) 817 return false; 818 } 819 } 820 else if (is_gimple_call (stmt)) 821 { 822 for (i = 0; i < gimple_call_num_args (stmt); i++) 823 { 824 tree arg = gimple_call_arg (stmt, i); 825 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant, 826 &worklist, false)) 827 return false; 828 } 829 } 830 } 831 else 832 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 833 { 834 tree op = USE_FROM_PTR (use_p); 835 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, 836 &worklist, false)) 837 return false; 838 } 839 840 if (STMT_VINFO_GATHER_P (stmt_vinfo)) 841 { 842 tree off; 843 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL); 844 gcc_assert (decl); 845 if (!process_use (stmt, off, loop_vinfo, live_p, relevant, 846 &worklist, true)) 847 return false; 848 } 849 } /* while worklist */ 850 851 return true; 852} 853 854 855/* Function vect_model_simple_cost. 856 857 Models cost for simple operations, i.e. those that only emit ncopies of a 858 single op. Right now, this does not account for multiple insns that could 859 be generated for the single vector op. We will handle that shortly. */ 860 861void 862vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, 863 enum vect_def_type *dt, 864 stmt_vector_for_cost *prologue_cost_vec, 865 stmt_vector_for_cost *body_cost_vec) 866{ 867 int i; 868 int inside_cost = 0, prologue_cost = 0; 869 870 /* The SLP costs were already calculated during SLP tree build. */ 871 if (PURE_SLP_STMT (stmt_info)) 872 return; 873 874 /* FORNOW: Assuming maximum 2 args per stmts. */ 875 for (i = 0; i < 2; i++) 876 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 877 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt, 878 stmt_info, 0, vect_prologue); 879 880 /* Pass the inside-of-loop statements to the target-specific cost model. */ 881 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt, 882 stmt_info, 0, vect_body); 883 884 if (dump_enabled_p ()) 885 dump_printf_loc (MSG_NOTE, vect_location, 886 "vect_model_simple_cost: inside_cost = %d, " 887 "prologue_cost = %d .\n", inside_cost, prologue_cost); 888} 889 890 891/* Model cost for type demotion and promotion operations. PWR is normally 892 zero for single-step promotions and demotions. It will be one if 893 two-step promotion/demotion is required, and so on. Each additional 894 step doubles the number of instructions required. */ 895 896static void 897vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, 898 enum vect_def_type *dt, int pwr) 899{ 900 int i, tmp; 901 int inside_cost = 0, prologue_cost = 0; 902 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 903 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 904 void *target_cost_data; 905 906 /* The SLP costs were already calculated during SLP tree build. */ 907 if (PURE_SLP_STMT (stmt_info)) 908 return; 909 910 if (loop_vinfo) 911 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); 912 else 913 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo); 914 915 for (i = 0; i < pwr + 1; i++) 916 { 917 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ? 918 (i + 1) : i; 919 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp), 920 vec_promote_demote, stmt_info, 0, 921 vect_body); 922 } 923 924 /* FORNOW: Assuming maximum 2 args per stmts. */ 925 for (i = 0; i < 2; i++) 926 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 927 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt, 928 stmt_info, 0, vect_prologue); 929 930 if (dump_enabled_p ()) 931 dump_printf_loc (MSG_NOTE, vect_location, 932 "vect_model_promotion_demotion_cost: inside_cost = %d, " 933 "prologue_cost = %d .\n", inside_cost, prologue_cost); 934} 935 936/* Function vect_cost_group_size 937 938 For grouped load or store, return the group_size only if it is the first 939 load or store of a group, else return 1. This ensures that group size is 940 only returned once per group. */ 941 942static int 943vect_cost_group_size (stmt_vec_info stmt_info) 944{ 945 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 946 947 if (first_stmt == STMT_VINFO_STMT (stmt_info)) 948 return GROUP_SIZE (stmt_info); 949 950 return 1; 951} 952 953 954/* Function vect_model_store_cost 955 956 Models cost for stores. In the case of grouped accesses, one access 957 has the overhead of the grouped access attributed to it. */ 958 959void 960vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, 961 bool store_lanes_p, enum vect_def_type dt, 962 slp_tree slp_node, 963 stmt_vector_for_cost *prologue_cost_vec, 964 stmt_vector_for_cost *body_cost_vec) 965{ 966 int group_size; 967 unsigned int inside_cost = 0, prologue_cost = 0; 968 struct data_reference *first_dr; 969 gimple first_stmt; 970 971 /* The SLP costs were already calculated during SLP tree build. */ 972 if (PURE_SLP_STMT (stmt_info)) 973 return; 974 975 if (dt == vect_constant_def || dt == vect_external_def) 976 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec, 977 stmt_info, 0, vect_prologue); 978 979 /* Grouped access? */ 980 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 981 { 982 if (slp_node) 983 { 984 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 985 group_size = 1; 986 } 987 else 988 { 989 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 990 group_size = vect_cost_group_size (stmt_info); 991 } 992 993 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 994 } 995 /* Not a grouped access. */ 996 else 997 { 998 group_size = 1; 999 first_dr = STMT_VINFO_DATA_REF (stmt_info); 1000 } 1001 1002 /* We assume that the cost of a single store-lanes instruction is 1003 equivalent to the cost of GROUP_SIZE separate stores. If a grouped 1004 access is instead being provided by a permute-and-store operation, 1005 include the cost of the permutes. */ 1006 if (!store_lanes_p && group_size > 1) 1007 { 1008 /* Uses a high and low interleave or shuffle operations for each 1009 needed permute. */ 1010 int nstmts = ncopies * ceil_log2 (group_size) * group_size; 1011 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm, 1012 stmt_info, 0, vect_body); 1013 1014 if (dump_enabled_p ()) 1015 dump_printf_loc (MSG_NOTE, vect_location, 1016 "vect_model_store_cost: strided group_size = %d .\n", 1017 group_size); 1018 } 1019 1020 /* Costs of the stores. */ 1021 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec); 1022 1023 if (dump_enabled_p ()) 1024 dump_printf_loc (MSG_NOTE, vect_location, 1025 "vect_model_store_cost: inside_cost = %d, " 1026 "prologue_cost = %d .\n", inside_cost, prologue_cost); 1027} 1028 1029 1030/* Calculate cost of DR's memory access. */ 1031void 1032vect_get_store_cost (struct data_reference *dr, int ncopies, 1033 unsigned int *inside_cost, 1034 stmt_vector_for_cost *body_cost_vec) 1035{ 1036 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 1037 gimple stmt = DR_STMT (dr); 1038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1039 1040 switch (alignment_support_scheme) 1041 { 1042 case dr_aligned: 1043 { 1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1045 vector_store, stmt_info, 0, 1046 vect_body); 1047 1048 if (dump_enabled_p ()) 1049 dump_printf_loc (MSG_NOTE, vect_location, 1050 "vect_model_store_cost: aligned.\n"); 1051 break; 1052 } 1053 1054 case dr_unaligned_supported: 1055 { 1056 /* Here, we assign an additional cost for the unaligned store. */ 1057 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1058 unaligned_store, stmt_info, 1059 DR_MISALIGNMENT (dr), vect_body); 1060 if (dump_enabled_p ()) 1061 dump_printf_loc (MSG_NOTE, vect_location, 1062 "vect_model_store_cost: unaligned supported by " 1063 "hardware.\n"); 1064 break; 1065 } 1066 1067 case dr_unaligned_unsupported: 1068 { 1069 *inside_cost = VECT_MAX_COST; 1070 1071 if (dump_enabled_p ()) 1072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1073 "vect_model_store_cost: unsupported access.\n"); 1074 break; 1075 } 1076 1077 default: 1078 gcc_unreachable (); 1079 } 1080} 1081 1082 1083/* Function vect_model_load_cost 1084 1085 Models cost for loads. In the case of grouped accesses, the last access 1086 has the overhead of the grouped access attributed to it. Since unaligned 1087 accesses are supported for loads, we also account for the costs of the 1088 access scheme chosen. */ 1089 1090void 1091vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, 1092 bool load_lanes_p, slp_tree slp_node, 1093 stmt_vector_for_cost *prologue_cost_vec, 1094 stmt_vector_for_cost *body_cost_vec) 1095{ 1096 int group_size; 1097 gimple first_stmt; 1098 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; 1099 unsigned int inside_cost = 0, prologue_cost = 0; 1100 1101 /* The SLP costs were already calculated during SLP tree build. */ 1102 if (PURE_SLP_STMT (stmt_info)) 1103 return; 1104 1105 /* Grouped accesses? */ 1106 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 1107 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node) 1108 { 1109 group_size = vect_cost_group_size (stmt_info); 1110 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 1111 } 1112 /* Not a grouped access. */ 1113 else 1114 { 1115 group_size = 1; 1116 first_dr = dr; 1117 } 1118 1119 /* We assume that the cost of a single load-lanes instruction is 1120 equivalent to the cost of GROUP_SIZE separate loads. If a grouped 1121 access is instead being provided by a load-and-permute operation, 1122 include the cost of the permutes. */ 1123 if (!load_lanes_p && group_size > 1) 1124 { 1125 /* Uses an even and odd extract operations or shuffle operations 1126 for each needed permute. */ 1127 int nstmts = ncopies * ceil_log2 (group_size) * group_size; 1128 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm, 1129 stmt_info, 0, vect_body); 1130 1131 if (dump_enabled_p ()) 1132 dump_printf_loc (MSG_NOTE, vect_location, 1133 "vect_model_load_cost: strided group_size = %d .\n", 1134 group_size); 1135 } 1136 1137 /* The loads themselves. */ 1138 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) 1139 { 1140 /* N scalar loads plus gathering them into a vector. */ 1141 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1142 inside_cost += record_stmt_cost (body_cost_vec, 1143 ncopies * TYPE_VECTOR_SUBPARTS (vectype), 1144 scalar_load, stmt_info, 0, vect_body); 1145 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct, 1146 stmt_info, 0, vect_body); 1147 } 1148 else 1149 vect_get_load_cost (first_dr, ncopies, 1150 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info)) 1151 || group_size > 1 || slp_node), 1152 &inside_cost, &prologue_cost, 1153 prologue_cost_vec, body_cost_vec, true); 1154 1155 if (dump_enabled_p ()) 1156 dump_printf_loc (MSG_NOTE, vect_location, 1157 "vect_model_load_cost: inside_cost = %d, " 1158 "prologue_cost = %d .\n", inside_cost, prologue_cost); 1159} 1160 1161 1162/* Calculate cost of DR's memory access. */ 1163void 1164vect_get_load_cost (struct data_reference *dr, int ncopies, 1165 bool add_realign_cost, unsigned int *inside_cost, 1166 unsigned int *prologue_cost, 1167 stmt_vector_for_cost *prologue_cost_vec, 1168 stmt_vector_for_cost *body_cost_vec, 1169 bool record_prologue_costs) 1170{ 1171 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 1172 gimple stmt = DR_STMT (dr); 1173 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1174 1175 switch (alignment_support_scheme) 1176 { 1177 case dr_aligned: 1178 { 1179 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, 1180 stmt_info, 0, vect_body); 1181 1182 if (dump_enabled_p ()) 1183 dump_printf_loc (MSG_NOTE, vect_location, 1184 "vect_model_load_cost: aligned.\n"); 1185 1186 break; 1187 } 1188 case dr_unaligned_supported: 1189 { 1190 /* Here, we assign an additional cost for the unaligned load. */ 1191 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1192 unaligned_load, stmt_info, 1193 DR_MISALIGNMENT (dr), vect_body); 1194 1195 if (dump_enabled_p ()) 1196 dump_printf_loc (MSG_NOTE, vect_location, 1197 "vect_model_load_cost: unaligned supported by " 1198 "hardware.\n"); 1199 1200 break; 1201 } 1202 case dr_explicit_realign: 1203 { 1204 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2, 1205 vector_load, stmt_info, 0, vect_body); 1206 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1207 vec_perm, stmt_info, 0, vect_body); 1208 1209 /* FIXME: If the misalignment remains fixed across the iterations of 1210 the containing loop, the following cost should be added to the 1211 prologue costs. */ 1212 if (targetm.vectorize.builtin_mask_for_load) 1213 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt, 1214 stmt_info, 0, vect_body); 1215 1216 if (dump_enabled_p ()) 1217 dump_printf_loc (MSG_NOTE, vect_location, 1218 "vect_model_load_cost: explicit realign\n"); 1219 1220 break; 1221 } 1222 case dr_explicit_realign_optimized: 1223 { 1224 if (dump_enabled_p ()) 1225 dump_printf_loc (MSG_NOTE, vect_location, 1226 "vect_model_load_cost: unaligned software " 1227 "pipelined.\n"); 1228 1229 /* Unaligned software pipeline has a load of an address, an initial 1230 load, and possibly a mask operation to "prime" the loop. However, 1231 if this is an access in a group of loads, which provide grouped 1232 access, then the above cost should only be considered for one 1233 access in the group. Inside the loop, there is a load op 1234 and a realignment op. */ 1235 1236 if (add_realign_cost && record_prologue_costs) 1237 { 1238 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2, 1239 vector_stmt, stmt_info, 1240 0, vect_prologue); 1241 if (targetm.vectorize.builtin_mask_for_load) 1242 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1, 1243 vector_stmt, stmt_info, 1244 0, vect_prologue); 1245 } 1246 1247 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, 1248 stmt_info, 0, vect_body); 1249 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm, 1250 stmt_info, 0, vect_body); 1251 1252 if (dump_enabled_p ()) 1253 dump_printf_loc (MSG_NOTE, vect_location, 1254 "vect_model_load_cost: explicit realign optimized" 1255 "\n"); 1256 1257 break; 1258 } 1259 1260 case dr_unaligned_unsupported: 1261 { 1262 *inside_cost = VECT_MAX_COST; 1263 1264 if (dump_enabled_p ()) 1265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1266 "vect_model_load_cost: unsupported access.\n"); 1267 break; 1268 } 1269 1270 default: 1271 gcc_unreachable (); 1272 } 1273} 1274 1275/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in 1276 the loop preheader for the vectorized stmt STMT. */ 1277 1278static void 1279vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi) 1280{ 1281 if (gsi) 1282 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1283 else 1284 { 1285 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1286 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1287 1288 if (loop_vinfo) 1289 { 1290 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1291 basic_block new_bb; 1292 edge pe; 1293 1294 if (nested_in_vect_loop_p (loop, stmt)) 1295 loop = loop->inner; 1296 1297 pe = loop_preheader_edge (loop); 1298 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); 1299 gcc_assert (!new_bb); 1300 } 1301 else 1302 { 1303 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 1304 basic_block bb; 1305 gimple_stmt_iterator gsi_bb_start; 1306 1307 gcc_assert (bb_vinfo); 1308 bb = BB_VINFO_BB (bb_vinfo); 1309 gsi_bb_start = gsi_after_labels (bb); 1310 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT); 1311 } 1312 } 1313 1314 if (dump_enabled_p ()) 1315 { 1316 dump_printf_loc (MSG_NOTE, vect_location, 1317 "created new init_stmt: "); 1318 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0); 1319 } 1320} 1321 1322/* Function vect_init_vector. 1323 1324 Insert a new stmt (INIT_STMT) that initializes a new variable of type 1325 TYPE with the value VAL. If TYPE is a vector type and VAL does not have 1326 vector type a vector with all elements equal to VAL is created first. 1327 Place the initialization at BSI if it is not NULL. Otherwise, place the 1328 initialization at the loop preheader. 1329 Return the DEF of INIT_STMT. 1330 It will be used in the vectorization of STMT. */ 1331 1332tree 1333vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi) 1334{ 1335 tree new_var; 1336 gimple init_stmt; 1337 tree vec_oprnd; 1338 tree new_temp; 1339 1340 if (TREE_CODE (type) == VECTOR_TYPE 1341 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE) 1342 { 1343 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val))) 1344 { 1345 if (CONSTANT_CLASS_P (val)) 1346 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val); 1347 else 1348 { 1349 new_temp = make_ssa_name (TREE_TYPE (type)); 1350 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val); 1351 vect_init_vector_1 (stmt, init_stmt, gsi); 1352 val = new_temp; 1353 } 1354 } 1355 val = build_vector_from_val (type, val); 1356 } 1357 1358 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_"); 1359 init_stmt = gimple_build_assign (new_var, val); 1360 new_temp = make_ssa_name (new_var, init_stmt); 1361 gimple_assign_set_lhs (init_stmt, new_temp); 1362 vect_init_vector_1 (stmt, init_stmt, gsi); 1363 vec_oprnd = gimple_assign_lhs (init_stmt); 1364 return vec_oprnd; 1365} 1366 1367 1368/* Function vect_get_vec_def_for_operand. 1369 1370 OP is an operand in STMT. This function returns a (vector) def that will be 1371 used in the vectorized stmt for STMT. 1372 1373 In the case that OP is an SSA_NAME which is defined in the loop, then 1374 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. 1375 1376 In case OP is an invariant or constant, a new stmt that creates a vector def 1377 needs to be introduced. */ 1378 1379tree 1380vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def) 1381{ 1382 tree vec_oprnd; 1383 gimple vec_stmt; 1384 gimple def_stmt; 1385 stmt_vec_info def_stmt_info = NULL; 1386 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1387 unsigned int nunits; 1388 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1389 tree def; 1390 enum vect_def_type dt; 1391 bool is_simple_use; 1392 tree vector_type; 1393 1394 if (dump_enabled_p ()) 1395 { 1396 dump_printf_loc (MSG_NOTE, vect_location, 1397 "vect_get_vec_def_for_operand: "); 1398 dump_generic_expr (MSG_NOTE, TDF_SLIM, op); 1399 dump_printf (MSG_NOTE, "\n"); 1400 } 1401 1402 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL, 1403 &def_stmt, &def, &dt); 1404 gcc_assert (is_simple_use); 1405 if (dump_enabled_p ()) 1406 { 1407 int loc_printed = 0; 1408 if (def) 1409 { 1410 dump_printf_loc (MSG_NOTE, vect_location, "def = "); 1411 loc_printed = 1; 1412 dump_generic_expr (MSG_NOTE, TDF_SLIM, def); 1413 dump_printf (MSG_NOTE, "\n"); 1414 } 1415 if (def_stmt) 1416 { 1417 if (loc_printed) 1418 dump_printf (MSG_NOTE, " def_stmt = "); 1419 else 1420 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = "); 1421 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); 1422 } 1423 } 1424 1425 switch (dt) 1426 { 1427 /* Case 1: operand is a constant. */ 1428 case vect_constant_def: 1429 { 1430 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); 1431 gcc_assert (vector_type); 1432 nunits = TYPE_VECTOR_SUBPARTS (vector_type); 1433 1434 if (scalar_def) 1435 *scalar_def = op; 1436 1437 /* Create 'vect_cst_ = {cst,cst,...,cst}' */ 1438 if (dump_enabled_p ()) 1439 dump_printf_loc (MSG_NOTE, vect_location, 1440 "Create vector_cst. nunits = %d\n", nunits); 1441 1442 return vect_init_vector (stmt, op, vector_type, NULL); 1443 } 1444 1445 /* Case 2: operand is defined outside the loop - loop invariant. */ 1446 case vect_external_def: 1447 { 1448 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def)); 1449 gcc_assert (vector_type); 1450 1451 if (scalar_def) 1452 *scalar_def = def; 1453 1454 /* Create 'vec_inv = {inv,inv,..,inv}' */ 1455 if (dump_enabled_p ()) 1456 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n"); 1457 1458 return vect_init_vector (stmt, def, vector_type, NULL); 1459 } 1460 1461 /* Case 3: operand is defined inside the loop. */ 1462 case vect_internal_def: 1463 { 1464 if (scalar_def) 1465 *scalar_def = NULL/* FIXME tuples: def_stmt*/; 1466 1467 /* Get the def from the vectorized stmt. */ 1468 def_stmt_info = vinfo_for_stmt (def_stmt); 1469 1470 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 1471 /* Get vectorized pattern statement. */ 1472 if (!vec_stmt 1473 && STMT_VINFO_IN_PATTERN_P (def_stmt_info) 1474 && !STMT_VINFO_RELEVANT (def_stmt_info)) 1475 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt ( 1476 STMT_VINFO_RELATED_STMT (def_stmt_info))); 1477 gcc_assert (vec_stmt); 1478 if (gimple_code (vec_stmt) == GIMPLE_PHI) 1479 vec_oprnd = PHI_RESULT (vec_stmt); 1480 else if (is_gimple_call (vec_stmt)) 1481 vec_oprnd = gimple_call_lhs (vec_stmt); 1482 else 1483 vec_oprnd = gimple_assign_lhs (vec_stmt); 1484 return vec_oprnd; 1485 } 1486 1487 /* Case 4: operand is defined by a loop header phi - reduction */ 1488 case vect_reduction_def: 1489 case vect_double_reduction_def: 1490 case vect_nested_cycle: 1491 { 1492 struct loop *loop; 1493 1494 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); 1495 loop = (gimple_bb (def_stmt))->loop_father; 1496 1497 /* Get the def before the loop */ 1498 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); 1499 return get_initial_def_for_reduction (stmt, op, scalar_def); 1500 } 1501 1502 /* Case 5: operand is defined by loop-header phi - induction. */ 1503 case vect_induction_def: 1504 { 1505 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); 1506 1507 /* Get the def from the vectorized stmt. */ 1508 def_stmt_info = vinfo_for_stmt (def_stmt); 1509 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 1510 if (gimple_code (vec_stmt) == GIMPLE_PHI) 1511 vec_oprnd = PHI_RESULT (vec_stmt); 1512 else 1513 vec_oprnd = gimple_get_lhs (vec_stmt); 1514 return vec_oprnd; 1515 } 1516 1517 default: 1518 gcc_unreachable (); 1519 } 1520} 1521 1522 1523/* Function vect_get_vec_def_for_stmt_copy 1524 1525 Return a vector-def for an operand. This function is used when the 1526 vectorized stmt to be created (by the caller to this function) is a "copy" 1527 created in case the vectorized result cannot fit in one vector, and several 1528 copies of the vector-stmt are required. In this case the vector-def is 1529 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field 1530 of the stmt that defines VEC_OPRND. 1531 DT is the type of the vector def VEC_OPRND. 1532 1533 Context: 1534 In case the vectorization factor (VF) is bigger than the number 1535 of elements that can fit in a vectype (nunits), we have to generate 1536 more than one vector stmt to vectorize the scalar stmt. This situation 1537 arises when there are multiple data-types operated upon in the loop; the 1538 smallest data-type determines the VF, and as a result, when vectorizing 1539 stmts operating on wider types we need to create 'VF/nunits' "copies" of the 1540 vector stmt (each computing a vector of 'nunits' results, and together 1541 computing 'VF' results in each iteration). This function is called when 1542 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in 1543 which VF=16 and nunits=4, so the number of copies required is 4): 1544 1545 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT 1546 1547 S1: x = load VS1.0: vx.0 = memref0 VS1.1 1548 VS1.1: vx.1 = memref1 VS1.2 1549 VS1.2: vx.2 = memref2 VS1.3 1550 VS1.3: vx.3 = memref3 1551 1552 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1 1553 VSnew.1: vz1 = vx.1 + ... VSnew.2 1554 VSnew.2: vz2 = vx.2 + ... VSnew.3 1555 VSnew.3: vz3 = vx.3 + ... 1556 1557 The vectorization of S1 is explained in vectorizable_load. 1558 The vectorization of S2: 1559 To create the first vector-stmt out of the 4 copies - VSnew.0 - 1560 the function 'vect_get_vec_def_for_operand' is called to 1561 get the relevant vector-def for each operand of S2. For operand x it 1562 returns the vector-def 'vx.0'. 1563 1564 To create the remaining copies of the vector-stmt (VSnew.j), this 1565 function is called to get the relevant vector-def for each operand. It is 1566 obtained from the respective VS1.j stmt, which is recorded in the 1567 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND. 1568 1569 For example, to obtain the vector-def 'vx.1' in order to create the 1570 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'. 1571 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the 1572 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1', 1573 and return its def ('vx.1'). 1574 Overall, to create the above sequence this function will be called 3 times: 1575 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0); 1576 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1); 1577 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */ 1578 1579tree 1580vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) 1581{ 1582 gimple vec_stmt_for_operand; 1583 stmt_vec_info def_stmt_info; 1584 1585 /* Do nothing; can reuse same def. */ 1586 if (dt == vect_external_def || dt == vect_constant_def ) 1587 return vec_oprnd; 1588 1589 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd); 1590 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand); 1591 gcc_assert (def_stmt_info); 1592 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); 1593 gcc_assert (vec_stmt_for_operand); 1594 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); 1595 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI) 1596 vec_oprnd = PHI_RESULT (vec_stmt_for_operand); 1597 else 1598 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); 1599 return vec_oprnd; 1600} 1601 1602 1603/* Get vectorized definitions for the operands to create a copy of an original 1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */ 1605 1606static void 1607vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, 1608 vec<tree> *vec_oprnds0, 1609 vec<tree> *vec_oprnds1) 1610{ 1611 tree vec_oprnd = vec_oprnds0->pop (); 1612 1613 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd); 1614 vec_oprnds0->quick_push (vec_oprnd); 1615 1616 if (vec_oprnds1 && vec_oprnds1->length ()) 1617 { 1618 vec_oprnd = vec_oprnds1->pop (); 1619 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd); 1620 vec_oprnds1->quick_push (vec_oprnd); 1621 } 1622} 1623 1624 1625/* Get vectorized definitions for OP0 and OP1. 1626 REDUC_INDEX is the index of reduction operand in case of reduction, 1627 and -1 otherwise. */ 1628 1629void 1630vect_get_vec_defs (tree op0, tree op1, gimple stmt, 1631 vec<tree> *vec_oprnds0, 1632 vec<tree> *vec_oprnds1, 1633 slp_tree slp_node, int reduc_index) 1634{ 1635 if (slp_node) 1636 { 1637 int nops = (op1 == NULL_TREE) ? 1 : 2; 1638 auto_vec<tree> ops (nops); 1639 auto_vec<vec<tree> > vec_defs (nops); 1640 1641 ops.quick_push (op0); 1642 if (op1) 1643 ops.quick_push (op1); 1644 1645 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index); 1646 1647 *vec_oprnds0 = vec_defs[0]; 1648 if (op1) 1649 *vec_oprnds1 = vec_defs[1]; 1650 } 1651 else 1652 { 1653 tree vec_oprnd; 1654 1655 vec_oprnds0->create (1); 1656 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL); 1657 vec_oprnds0->quick_push (vec_oprnd); 1658 1659 if (op1) 1660 { 1661 vec_oprnds1->create (1); 1662 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL); 1663 vec_oprnds1->quick_push (vec_oprnd); 1664 } 1665 } 1666} 1667 1668 1669/* Function vect_finish_stmt_generation. 1670 1671 Insert a new stmt. */ 1672 1673void 1674vect_finish_stmt_generation (gimple stmt, gimple vec_stmt, 1675 gimple_stmt_iterator *gsi) 1676{ 1677 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1678 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1679 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 1680 1681 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL); 1682 1683 if (!gsi_end_p (*gsi) 1684 && gimple_has_mem_ops (vec_stmt)) 1685 { 1686 gimple at_stmt = gsi_stmt (*gsi); 1687 tree vuse = gimple_vuse (at_stmt); 1688 if (vuse && TREE_CODE (vuse) == SSA_NAME) 1689 { 1690 tree vdef = gimple_vdef (at_stmt); 1691 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt)); 1692 /* If we have an SSA vuse and insert a store, update virtual 1693 SSA form to avoid triggering the renamer. Do so only 1694 if we can easily see all uses - which is what almost always 1695 happens with the way vectorized stmts are inserted. */ 1696 if ((vdef && TREE_CODE (vdef) == SSA_NAME) 1697 && ((is_gimple_assign (vec_stmt) 1698 && !is_gimple_reg (gimple_assign_lhs (vec_stmt))) 1699 || (is_gimple_call (vec_stmt) 1700 && !(gimple_call_flags (vec_stmt) 1701 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))))) 1702 { 1703 tree new_vdef = copy_ssa_name (vuse, vec_stmt); 1704 gimple_set_vdef (vec_stmt, new_vdef); 1705 SET_USE (gimple_vuse_op (at_stmt), new_vdef); 1706 } 1707 } 1708 } 1709 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT); 1710 1711 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo, 1712 bb_vinfo)); 1713 1714 if (dump_enabled_p ()) 1715 { 1716 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: "); 1717 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0); 1718 } 1719 1720 gimple_set_location (vec_stmt, gimple_location (stmt)); 1721 1722 /* While EH edges will generally prevent vectorization, stmt might 1723 e.g. be in a must-not-throw region. Ensure newly created stmts 1724 that could throw are part of the same region. */ 1725 int lp_nr = lookup_stmt_eh_lp (stmt); 1726 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt)) 1727 add_stmt_to_eh_lp (vec_stmt, lp_nr); 1728} 1729 1730/* Checks if CALL can be vectorized in type VECTYPE. Returns 1731 a function declaration if the target has a vectorized version 1732 of the function, or NULL_TREE if the function cannot be vectorized. */ 1733 1734tree 1735vectorizable_function (gcall *call, tree vectype_out, tree vectype_in) 1736{ 1737 tree fndecl = gimple_call_fndecl (call); 1738 1739 /* We only handle functions that do not read or clobber memory -- i.e. 1740 const or novops ones. */ 1741 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS))) 1742 return NULL_TREE; 1743 1744 if (!fndecl 1745 || TREE_CODE (fndecl) != FUNCTION_DECL 1746 || !DECL_BUILT_IN (fndecl)) 1747 return NULL_TREE; 1748 1749 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out, 1750 vectype_in); 1751} 1752 1753 1754static tree permute_vec_elements (tree, tree, tree, gimple, 1755 gimple_stmt_iterator *); 1756 1757 1758/* Function vectorizable_mask_load_store. 1759 1760 Check if STMT performs a conditional load or store that can be vectorized. 1761 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1762 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 1763 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1764 1765static bool 1766vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi, 1767 gimple *vec_stmt, slp_tree slp_node) 1768{ 1769 tree vec_dest = NULL; 1770 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1771 stmt_vec_info prev_stmt_info; 1772 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1773 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1774 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); 1775 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 1776 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1777 tree elem_type; 1778 gimple new_stmt; 1779 tree dummy; 1780 tree dataref_ptr = NULL_TREE; 1781 gimple ptr_incr; 1782 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 1783 int ncopies; 1784 int i, j; 1785 bool inv_p; 1786 tree gather_base = NULL_TREE, gather_off = NULL_TREE; 1787 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE; 1788 int gather_scale = 1; 1789 enum vect_def_type gather_dt = vect_unknown_def_type; 1790 bool is_store; 1791 tree mask; 1792 gimple def_stmt; 1793 tree def; 1794 enum vect_def_type dt; 1795 1796 if (slp_node != NULL) 1797 return false; 1798 1799 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 1800 gcc_assert (ncopies >= 1); 1801 1802 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE; 1803 mask = gimple_call_arg (stmt, 2); 1804 if (TYPE_PRECISION (TREE_TYPE (mask)) 1805 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)))) 1806 return false; 1807 1808 /* FORNOW. This restriction should be relaxed. */ 1809 if (nested_in_vect_loop && ncopies > 1) 1810 { 1811 if (dump_enabled_p ()) 1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1813 "multiple types in nested loop."); 1814 return false; 1815 } 1816 1817 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1818 return false; 1819 1820 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 1821 return false; 1822 1823 if (!STMT_VINFO_DATA_REF (stmt_info)) 1824 return false; 1825 1826 elem_type = TREE_TYPE (vectype); 1827 1828 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 1829 return false; 1830 1831 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) 1832 return false; 1833 1834 if (STMT_VINFO_GATHER_P (stmt_info)) 1835 { 1836 gimple def_stmt; 1837 tree def; 1838 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base, 1839 &gather_off, &gather_scale); 1840 gcc_assert (gather_decl); 1841 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL, 1842 &def_stmt, &def, &gather_dt, 1843 &gather_off_vectype)) 1844 { 1845 if (dump_enabled_p ()) 1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1847 "gather index use not simple."); 1848 return false; 1849 } 1850 1851 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl)); 1852 tree masktype 1853 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))); 1854 if (TREE_CODE (masktype) == INTEGER_TYPE) 1855 { 1856 if (dump_enabled_p ()) 1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1858 "masked gather with integer mask not supported."); 1859 return false; 1860 } 1861 } 1862 else if (tree_int_cst_compare (nested_in_vect_loop 1863 ? STMT_VINFO_DR_STEP (stmt_info) 1864 : DR_STEP (dr), size_zero_node) <= 0) 1865 return false; 1866 else if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 1867 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store)) 1868 return false; 1869 1870 if (TREE_CODE (mask) != SSA_NAME) 1871 return false; 1872 1873 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL, 1874 &def_stmt, &def, &dt)) 1875 return false; 1876 1877 if (is_store) 1878 { 1879 tree rhs = gimple_call_arg (stmt, 3); 1880 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL, 1881 &def_stmt, &def, &dt)) 1882 return false; 1883 } 1884 1885 if (!vec_stmt) /* transformation not required. */ 1886 { 1887 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 1888 if (is_store) 1889 vect_model_store_cost (stmt_info, ncopies, false, dt, 1890 NULL, NULL, NULL); 1891 else 1892 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL); 1893 return true; 1894 } 1895 1896 /** Transform. **/ 1897 1898 if (STMT_VINFO_GATHER_P (stmt_info)) 1899 { 1900 tree vec_oprnd0 = NULL_TREE, op; 1901 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl)); 1902 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; 1903 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale; 1904 tree perm_mask = NULL_TREE, prev_res = NULL_TREE; 1905 tree mask_perm_mask = NULL_TREE; 1906 edge pe = loop_preheader_edge (loop); 1907 gimple_seq seq; 1908 basic_block new_bb; 1909 enum { NARROW, NONE, WIDEN } modifier; 1910 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype); 1911 1912 rettype = TREE_TYPE (TREE_TYPE (gather_decl)); 1913 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 1914 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 1915 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 1916 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 1917 scaletype = TREE_VALUE (arglist); 1918 gcc_checking_assert (types_compatible_p (srctype, rettype) 1919 && types_compatible_p (srctype, masktype)); 1920 1921 if (nunits == gather_off_nunits) 1922 modifier = NONE; 1923 else if (nunits == gather_off_nunits / 2) 1924 { 1925 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); 1926 modifier = WIDEN; 1927 1928 for (i = 0; i < gather_off_nunits; ++i) 1929 sel[i] = i | nunits; 1930 1931 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel); 1932 } 1933 else if (nunits == gather_off_nunits * 2) 1934 { 1935 unsigned char *sel = XALLOCAVEC (unsigned char, nunits); 1936 modifier = NARROW; 1937 1938 for (i = 0; i < nunits; ++i) 1939 sel[i] = i < gather_off_nunits 1940 ? i : i + nunits - gather_off_nunits; 1941 1942 perm_mask = vect_gen_perm_mask_checked (vectype, sel); 1943 ncopies *= 2; 1944 for (i = 0; i < nunits; ++i) 1945 sel[i] = i | gather_off_nunits; 1946 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel); 1947 } 1948 else 1949 gcc_unreachable (); 1950 1951 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype); 1952 1953 ptr = fold_convert (ptrtype, gather_base); 1954 if (!is_gimple_min_invariant (ptr)) 1955 { 1956 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); 1957 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 1958 gcc_assert (!new_bb); 1959 } 1960 1961 scale = build_int_cst (scaletype, gather_scale); 1962 1963 prev_stmt_info = NULL; 1964 for (j = 0; j < ncopies; ++j) 1965 { 1966 if (modifier == WIDEN && (j & 1)) 1967 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, 1968 perm_mask, stmt, gsi); 1969 else if (j == 0) 1970 op = vec_oprnd0 1971 = vect_get_vec_def_for_operand (gather_off, stmt, NULL); 1972 else 1973 op = vec_oprnd0 1974 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0); 1975 1976 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) 1977 { 1978 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)) 1979 == TYPE_VECTOR_SUBPARTS (idxtype)); 1980 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL); 1981 var = make_ssa_name (var); 1982 op = build1 (VIEW_CONVERT_EXPR, idxtype, op); 1983 new_stmt 1984 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 1985 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1986 op = var; 1987 } 1988 1989 if (mask_perm_mask && (j & 1)) 1990 mask_op = permute_vec_elements (mask_op, mask_op, 1991 mask_perm_mask, stmt, gsi); 1992 else 1993 { 1994 if (j == 0) 1995 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL); 1996 else 1997 { 1998 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, 1999 &def_stmt, &def, &dt); 2000 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask); 2001 } 2002 2003 mask_op = vec_mask; 2004 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask))) 2005 { 2006 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)) 2007 == TYPE_VECTOR_SUBPARTS (masktype)); 2008 var = vect_get_new_vect_var (masktype, vect_simple_var, 2009 NULL); 2010 var = make_ssa_name (var); 2011 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op); 2012 new_stmt 2013 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op); 2014 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2015 mask_op = var; 2016 } 2017 } 2018 2019 new_stmt 2020 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op, 2021 scale); 2022 2023 if (!useless_type_conversion_p (vectype, rettype)) 2024 { 2025 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype) 2026 == TYPE_VECTOR_SUBPARTS (rettype)); 2027 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL); 2028 op = make_ssa_name (var, new_stmt); 2029 gimple_call_set_lhs (new_stmt, op); 2030 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2031 var = make_ssa_name (vec_dest); 2032 op = build1 (VIEW_CONVERT_EXPR, vectype, op); 2033 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 2034 } 2035 else 2036 { 2037 var = make_ssa_name (vec_dest, new_stmt); 2038 gimple_call_set_lhs (new_stmt, var); 2039 } 2040 2041 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2042 2043 if (modifier == NARROW) 2044 { 2045 if ((j & 1) == 0) 2046 { 2047 prev_res = var; 2048 continue; 2049 } 2050 var = permute_vec_elements (prev_res, var, 2051 perm_mask, stmt, gsi); 2052 new_stmt = SSA_NAME_DEF_STMT (var); 2053 } 2054 2055 if (prev_stmt_info == NULL) 2056 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2057 else 2058 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2059 prev_stmt_info = vinfo_for_stmt (new_stmt); 2060 } 2061 2062 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed 2063 from the IL. */ 2064 tree lhs = gimple_call_lhs (stmt); 2065 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); 2066 set_vinfo_for_stmt (new_stmt, stmt_info); 2067 set_vinfo_for_stmt (stmt, NULL); 2068 STMT_VINFO_STMT (stmt_info) = new_stmt; 2069 gsi_replace (gsi, new_stmt, true); 2070 return true; 2071 } 2072 else if (is_store) 2073 { 2074 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE; 2075 prev_stmt_info = NULL; 2076 for (i = 0; i < ncopies; i++) 2077 { 2078 unsigned align, misalign; 2079 2080 if (i == 0) 2081 { 2082 tree rhs = gimple_call_arg (stmt, 3); 2083 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL); 2084 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL); 2085 /* We should have catched mismatched types earlier. */ 2086 gcc_assert (useless_type_conversion_p (vectype, 2087 TREE_TYPE (vec_rhs))); 2088 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL, 2089 NULL_TREE, &dummy, gsi, 2090 &ptr_incr, false, &inv_p); 2091 gcc_assert (!inv_p); 2092 } 2093 else 2094 { 2095 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt, 2096 &def, &dt); 2097 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs); 2098 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt, 2099 &def, &dt); 2100 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask); 2101 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 2102 TYPE_SIZE_UNIT (vectype)); 2103 } 2104 2105 align = TYPE_ALIGN_UNIT (vectype); 2106 if (aligned_access_p (dr)) 2107 misalign = 0; 2108 else if (DR_MISALIGNMENT (dr) == -1) 2109 { 2110 align = TYPE_ALIGN_UNIT (elem_type); 2111 misalign = 0; 2112 } 2113 else 2114 misalign = DR_MISALIGNMENT (dr); 2115 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, 2116 misalign); 2117 new_stmt 2118 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr, 2119 gimple_call_arg (stmt, 1), 2120 vec_mask, vec_rhs); 2121 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2122 if (i == 0) 2123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2124 else 2125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2126 prev_stmt_info = vinfo_for_stmt (new_stmt); 2127 } 2128 } 2129 else 2130 { 2131 tree vec_mask = NULL_TREE; 2132 prev_stmt_info = NULL; 2133 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype); 2134 for (i = 0; i < ncopies; i++) 2135 { 2136 unsigned align, misalign; 2137 2138 if (i == 0) 2139 { 2140 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL); 2141 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL, 2142 NULL_TREE, &dummy, gsi, 2143 &ptr_incr, false, &inv_p); 2144 gcc_assert (!inv_p); 2145 } 2146 else 2147 { 2148 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt, 2149 &def, &dt); 2150 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask); 2151 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 2152 TYPE_SIZE_UNIT (vectype)); 2153 } 2154 2155 align = TYPE_ALIGN_UNIT (vectype); 2156 if (aligned_access_p (dr)) 2157 misalign = 0; 2158 else if (DR_MISALIGNMENT (dr) == -1) 2159 { 2160 align = TYPE_ALIGN_UNIT (elem_type); 2161 misalign = 0; 2162 } 2163 else 2164 misalign = DR_MISALIGNMENT (dr); 2165 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, 2166 misalign); 2167 new_stmt 2168 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr, 2169 gimple_call_arg (stmt, 1), 2170 vec_mask); 2171 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest)); 2172 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2173 if (i == 0) 2174 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2175 else 2176 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2177 prev_stmt_info = vinfo_for_stmt (new_stmt); 2178 } 2179 } 2180 2181 if (!is_store) 2182 { 2183 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed 2184 from the IL. */ 2185 tree lhs = gimple_call_lhs (stmt); 2186 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); 2187 set_vinfo_for_stmt (new_stmt, stmt_info); 2188 set_vinfo_for_stmt (stmt, NULL); 2189 STMT_VINFO_STMT (stmt_info) = new_stmt; 2190 gsi_replace (gsi, new_stmt, true); 2191 } 2192 2193 return true; 2194} 2195 2196 2197/* Function vectorizable_call. 2198 2199 Check if GS performs a function call that can be vectorized. 2200 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2201 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 2202 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2203 2204static bool 2205vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt, 2206 slp_tree slp_node) 2207{ 2208 gcall *stmt; 2209 tree vec_dest; 2210 tree scalar_dest; 2211 tree op, type; 2212 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 2213 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info; 2214 tree vectype_out, vectype_in; 2215 int nunits_in; 2216 int nunits_out; 2217 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2218 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2219 tree fndecl, new_temp, def, rhs_type; 2220 gimple def_stmt; 2221 enum vect_def_type dt[3] 2222 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; 2223 gimple new_stmt = NULL; 2224 int ncopies, j; 2225 vec<tree> vargs = vNULL; 2226 enum { NARROW, NONE, WIDEN } modifier; 2227 size_t i, nargs; 2228 tree lhs; 2229 2230 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 2231 return false; 2232 2233 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 2234 return false; 2235 2236 /* Is GS a vectorizable call? */ 2237 stmt = dyn_cast <gcall *> (gs); 2238 if (!stmt) 2239 return false; 2240 2241 if (gimple_call_internal_p (stmt) 2242 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD 2243 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) 2244 return vectorizable_mask_load_store (stmt, gsi, vec_stmt, 2245 slp_node); 2246 2247 if (gimple_call_lhs (stmt) == NULL_TREE 2248 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 2249 return false; 2250 2251 gcc_checking_assert (!stmt_can_throw_internal (stmt)); 2252 2253 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 2254 2255 /* Process function arguments. */ 2256 rhs_type = NULL_TREE; 2257 vectype_in = NULL_TREE; 2258 nargs = gimple_call_num_args (stmt); 2259 2260 /* Bail out if the function has more than three arguments, we do not have 2261 interesting builtin functions to vectorize with more than two arguments 2262 except for fma. No arguments is also not good. */ 2263 if (nargs == 0 || nargs > 3) 2264 return false; 2265 2266 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */ 2267 if (gimple_call_internal_p (stmt) 2268 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE) 2269 { 2270 nargs = 0; 2271 rhs_type = unsigned_type_node; 2272 } 2273 2274 for (i = 0; i < nargs; i++) 2275 { 2276 tree opvectype; 2277 2278 op = gimple_call_arg (stmt, i); 2279 2280 /* We can only handle calls with arguments of the same type. */ 2281 if (rhs_type 2282 && !types_compatible_p (rhs_type, TREE_TYPE (op))) 2283 { 2284 if (dump_enabled_p ()) 2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2286 "argument types differ.\n"); 2287 return false; 2288 } 2289 if (!rhs_type) 2290 rhs_type = TREE_TYPE (op); 2291 2292 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo, 2293 &def_stmt, &def, &dt[i], &opvectype)) 2294 { 2295 if (dump_enabled_p ()) 2296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2297 "use not simple.\n"); 2298 return false; 2299 } 2300 2301 if (!vectype_in) 2302 vectype_in = opvectype; 2303 else if (opvectype 2304 && opvectype != vectype_in) 2305 { 2306 if (dump_enabled_p ()) 2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2308 "argument vector types differ.\n"); 2309 return false; 2310 } 2311 } 2312 /* If all arguments are external or constant defs use a vector type with 2313 the same size as the output vector type. */ 2314 if (!vectype_in) 2315 vectype_in = get_same_sized_vectype (rhs_type, vectype_out); 2316 if (vec_stmt) 2317 gcc_assert (vectype_in); 2318 if (!vectype_in) 2319 { 2320 if (dump_enabled_p ()) 2321 { 2322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2323 "no vectype for scalar type "); 2324 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type); 2325 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); 2326 } 2327 2328 return false; 2329 } 2330 2331 /* FORNOW */ 2332 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 2333 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 2334 if (nunits_in == nunits_out / 2) 2335 modifier = NARROW; 2336 else if (nunits_out == nunits_in) 2337 modifier = NONE; 2338 else if (nunits_out == nunits_in / 2) 2339 modifier = WIDEN; 2340 else 2341 return false; 2342 2343 /* For now, we only vectorize functions if a target specific builtin 2344 is available. TODO -- in some cases, it might be profitable to 2345 insert the calls for pieces of the vector, in order to be able 2346 to vectorize other operations in the loop. */ 2347 fndecl = vectorizable_function (stmt, vectype_out, vectype_in); 2348 if (fndecl == NULL_TREE) 2349 { 2350 if (gimple_call_internal_p (stmt) 2351 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE 2352 && !slp_node 2353 && loop_vinfo 2354 && LOOP_VINFO_LOOP (loop_vinfo)->simduid 2355 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME 2356 && LOOP_VINFO_LOOP (loop_vinfo)->simduid 2357 == SSA_NAME_VAR (gimple_call_arg (stmt, 0))) 2358 { 2359 /* We can handle IFN_GOMP_SIMD_LANE by returning a 2360 { 0, 1, 2, ... vf - 1 } vector. */ 2361 gcc_assert (nargs == 0); 2362 } 2363 else 2364 { 2365 if (dump_enabled_p ()) 2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2367 "function is not vectorizable.\n"); 2368 return false; 2369 } 2370 } 2371 2372 gcc_assert (!gimple_vuse (stmt)); 2373 2374 if (slp_node || PURE_SLP_STMT (stmt_info)) 2375 ncopies = 1; 2376 else if (modifier == NARROW) 2377 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 2378 else 2379 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 2380 2381 /* Sanity check: make sure that at least one copy of the vectorized stmt 2382 needs to be generated. */ 2383 gcc_assert (ncopies >= 1); 2384 2385 if (!vec_stmt) /* transformation not required. */ 2386 { 2387 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 2388 if (dump_enabled_p ()) 2389 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===" 2390 "\n"); 2391 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); 2392 return true; 2393 } 2394 2395 /** Transform. **/ 2396 2397 if (dump_enabled_p ()) 2398 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n"); 2399 2400 /* Handle def. */ 2401 scalar_dest = gimple_call_lhs (stmt); 2402 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 2403 2404 prev_stmt_info = NULL; 2405 switch (modifier) 2406 { 2407 case NONE: 2408 for (j = 0; j < ncopies; ++j) 2409 { 2410 /* Build argument list for the vectorized call. */ 2411 if (j == 0) 2412 vargs.create (nargs); 2413 else 2414 vargs.truncate (0); 2415 2416 if (slp_node) 2417 { 2418 auto_vec<vec<tree> > vec_defs (nargs); 2419 vec<tree> vec_oprnds0; 2420 2421 for (i = 0; i < nargs; i++) 2422 vargs.quick_push (gimple_call_arg (stmt, i)); 2423 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); 2424 vec_oprnds0 = vec_defs[0]; 2425 2426 /* Arguments are ready. Create the new vector stmt. */ 2427 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0) 2428 { 2429 size_t k; 2430 for (k = 0; k < nargs; k++) 2431 { 2432 vec<tree> vec_oprndsk = vec_defs[k]; 2433 vargs[k] = vec_oprndsk[i]; 2434 } 2435 new_stmt = gimple_build_call_vec (fndecl, vargs); 2436 new_temp = make_ssa_name (vec_dest, new_stmt); 2437 gimple_call_set_lhs (new_stmt, new_temp); 2438 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2439 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 2440 } 2441 2442 for (i = 0; i < nargs; i++) 2443 { 2444 vec<tree> vec_oprndsi = vec_defs[i]; 2445 vec_oprndsi.release (); 2446 } 2447 continue; 2448 } 2449 2450 for (i = 0; i < nargs; i++) 2451 { 2452 op = gimple_call_arg (stmt, i); 2453 if (j == 0) 2454 vec_oprnd0 2455 = vect_get_vec_def_for_operand (op, stmt, NULL); 2456 else 2457 { 2458 vec_oprnd0 = gimple_call_arg (new_stmt, i); 2459 vec_oprnd0 2460 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 2461 } 2462 2463 vargs.quick_push (vec_oprnd0); 2464 } 2465 2466 if (gimple_call_internal_p (stmt) 2467 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE) 2468 { 2469 tree *v = XALLOCAVEC (tree, nunits_out); 2470 int k; 2471 for (k = 0; k < nunits_out; ++k) 2472 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k); 2473 tree cst = build_vector (vectype_out, v); 2474 tree new_var 2475 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_"); 2476 gimple init_stmt = gimple_build_assign (new_var, cst); 2477 new_temp = make_ssa_name (new_var, init_stmt); 2478 gimple_assign_set_lhs (init_stmt, new_temp); 2479 vect_init_vector_1 (stmt, init_stmt, NULL); 2480 new_temp = make_ssa_name (vec_dest); 2481 new_stmt = gimple_build_assign (new_temp, 2482 gimple_assign_lhs (init_stmt)); 2483 } 2484 else 2485 { 2486 new_stmt = gimple_build_call_vec (fndecl, vargs); 2487 new_temp = make_ssa_name (vec_dest, new_stmt); 2488 gimple_call_set_lhs (new_stmt, new_temp); 2489 } 2490 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2491 2492 if (j == 0) 2493 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2494 else 2495 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2496 2497 prev_stmt_info = vinfo_for_stmt (new_stmt); 2498 } 2499 2500 break; 2501 2502 case NARROW: 2503 for (j = 0; j < ncopies; ++j) 2504 { 2505 /* Build argument list for the vectorized call. */ 2506 if (j == 0) 2507 vargs.create (nargs * 2); 2508 else 2509 vargs.truncate (0); 2510 2511 if (slp_node) 2512 { 2513 auto_vec<vec<tree> > vec_defs (nargs); 2514 vec<tree> vec_oprnds0; 2515 2516 for (i = 0; i < nargs; i++) 2517 vargs.quick_push (gimple_call_arg (stmt, i)); 2518 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); 2519 vec_oprnds0 = vec_defs[0]; 2520 2521 /* Arguments are ready. Create the new vector stmt. */ 2522 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2) 2523 { 2524 size_t k; 2525 vargs.truncate (0); 2526 for (k = 0; k < nargs; k++) 2527 { 2528 vec<tree> vec_oprndsk = vec_defs[k]; 2529 vargs.quick_push (vec_oprndsk[i]); 2530 vargs.quick_push (vec_oprndsk[i + 1]); 2531 } 2532 new_stmt = gimple_build_call_vec (fndecl, vargs); 2533 new_temp = make_ssa_name (vec_dest, new_stmt); 2534 gimple_call_set_lhs (new_stmt, new_temp); 2535 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2536 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 2537 } 2538 2539 for (i = 0; i < nargs; i++) 2540 { 2541 vec<tree> vec_oprndsi = vec_defs[i]; 2542 vec_oprndsi.release (); 2543 } 2544 continue; 2545 } 2546 2547 for (i = 0; i < nargs; i++) 2548 { 2549 op = gimple_call_arg (stmt, i); 2550 if (j == 0) 2551 { 2552 vec_oprnd0 2553 = vect_get_vec_def_for_operand (op, stmt, NULL); 2554 vec_oprnd1 2555 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 2556 } 2557 else 2558 { 2559 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1); 2560 vec_oprnd0 2561 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1); 2562 vec_oprnd1 2563 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 2564 } 2565 2566 vargs.quick_push (vec_oprnd0); 2567 vargs.quick_push (vec_oprnd1); 2568 } 2569 2570 new_stmt = gimple_build_call_vec (fndecl, vargs); 2571 new_temp = make_ssa_name (vec_dest, new_stmt); 2572 gimple_call_set_lhs (new_stmt, new_temp); 2573 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2574 2575 if (j == 0) 2576 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 2577 else 2578 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2579 2580 prev_stmt_info = vinfo_for_stmt (new_stmt); 2581 } 2582 2583 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 2584 2585 break; 2586 2587 case WIDEN: 2588 /* No current target implements this case. */ 2589 return false; 2590 } 2591 2592 vargs.release (); 2593 2594 /* The call in STMT might prevent it from being removed in dce. 2595 We however cannot remove it here, due to the way the ssa name 2596 it defines is mapped to the new definition. So just replace 2597 rhs of the statement with something harmless. */ 2598 2599 if (slp_node) 2600 return true; 2601 2602 type = TREE_TYPE (scalar_dest); 2603 if (is_pattern_stmt_p (stmt_info)) 2604 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); 2605 else 2606 lhs = gimple_call_lhs (stmt); 2607 new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); 2608 set_vinfo_for_stmt (new_stmt, stmt_info); 2609 set_vinfo_for_stmt (stmt, NULL); 2610 STMT_VINFO_STMT (stmt_info) = new_stmt; 2611 gsi_replace (gsi, new_stmt, false); 2612 2613 return true; 2614} 2615 2616 2617struct simd_call_arg_info 2618{ 2619 tree vectype; 2620 tree op; 2621 enum vect_def_type dt; 2622 HOST_WIDE_INT linear_step; 2623 unsigned int align; 2624}; 2625 2626/* Function vectorizable_simd_clone_call. 2627 2628 Check if STMT performs a function call that can be vectorized 2629 by calling a simd clone of the function. 2630 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2631 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 2632 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2633 2634static bool 2635vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi, 2636 gimple *vec_stmt, slp_tree slp_node) 2637{ 2638 tree vec_dest; 2639 tree scalar_dest; 2640 tree op, type; 2641 tree vec_oprnd0 = NULL_TREE; 2642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info; 2643 tree vectype; 2644 unsigned int nunits; 2645 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2646 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2647 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; 2648 tree fndecl, new_temp, def; 2649 gimple def_stmt; 2650 gimple new_stmt = NULL; 2651 int ncopies, j; 2652 vec<simd_call_arg_info> arginfo = vNULL; 2653 vec<tree> vargs = vNULL; 2654 size_t i, nargs; 2655 tree lhs, rtype, ratype; 2656 vec<constructor_elt, va_gc> *ret_ctor_elts; 2657 2658 /* Is STMT a vectorizable call? */ 2659 if (!is_gimple_call (stmt)) 2660 return false; 2661 2662 fndecl = gimple_call_fndecl (stmt); 2663 if (fndecl == NULL_TREE) 2664 return false; 2665 2666 struct cgraph_node *node = cgraph_node::get (fndecl); 2667 if (node == NULL || node->simd_clones == NULL) 2668 return false; 2669 2670 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 2671 return false; 2672 2673 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 2674 return false; 2675 2676 if (gimple_call_lhs (stmt) 2677 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 2678 return false; 2679 2680 gcc_checking_assert (!stmt_can_throw_internal (stmt)); 2681 2682 vectype = STMT_VINFO_VECTYPE (stmt_info); 2683 2684 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt)) 2685 return false; 2686 2687 /* FORNOW */ 2688 if (slp_node || PURE_SLP_STMT (stmt_info)) 2689 return false; 2690 2691 /* Process function arguments. */ 2692 nargs = gimple_call_num_args (stmt); 2693 2694 /* Bail out if the function has zero arguments. */ 2695 if (nargs == 0) 2696 return false; 2697 2698 arginfo.create (nargs); 2699 2700 for (i = 0; i < nargs; i++) 2701 { 2702 simd_call_arg_info thisarginfo; 2703 affine_iv iv; 2704 2705 thisarginfo.linear_step = 0; 2706 thisarginfo.align = 0; 2707 thisarginfo.op = NULL_TREE; 2708 2709 op = gimple_call_arg (stmt, i); 2710 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo, 2711 &def_stmt, &def, &thisarginfo.dt, 2712 &thisarginfo.vectype) 2713 || thisarginfo.dt == vect_uninitialized_def) 2714 { 2715 if (dump_enabled_p ()) 2716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2717 "use not simple.\n"); 2718 arginfo.release (); 2719 return false; 2720 } 2721 2722 if (thisarginfo.dt == vect_constant_def 2723 || thisarginfo.dt == vect_external_def) 2724 gcc_assert (thisarginfo.vectype == NULL_TREE); 2725 else 2726 gcc_assert (thisarginfo.vectype != NULL_TREE); 2727 2728 /* For linear arguments, the analyze phase should have saved 2729 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */ 2730 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length () 2731 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]) 2732 { 2733 gcc_assert (vec_stmt); 2734 thisarginfo.linear_step 2735 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]); 2736 thisarginfo.op 2737 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1]; 2738 /* If loop has been peeled for alignment, we need to adjust it. */ 2739 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo); 2740 tree n2 = LOOP_VINFO_NITERS (loop_vinfo); 2741 if (n1 != n2) 2742 { 2743 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2); 2744 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]; 2745 tree opt = TREE_TYPE (thisarginfo.op); 2746 bias = fold_convert (TREE_TYPE (step), bias); 2747 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step); 2748 thisarginfo.op 2749 = fold_build2 (POINTER_TYPE_P (opt) 2750 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt, 2751 thisarginfo.op, bias); 2752 } 2753 } 2754 else if (!vec_stmt 2755 && thisarginfo.dt != vect_constant_def 2756 && thisarginfo.dt != vect_external_def 2757 && loop_vinfo 2758 && TREE_CODE (op) == SSA_NAME 2759 && simple_iv (loop, loop_containing_stmt (stmt), op, 2760 &iv, false) 2761 && tree_fits_shwi_p (iv.step)) 2762 { 2763 thisarginfo.linear_step = tree_to_shwi (iv.step); 2764 thisarginfo.op = iv.base; 2765 } 2766 else if ((thisarginfo.dt == vect_constant_def 2767 || thisarginfo.dt == vect_external_def) 2768 && POINTER_TYPE_P (TREE_TYPE (op))) 2769 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT; 2770 2771 arginfo.quick_push (thisarginfo); 2772 } 2773 2774 unsigned int badness = 0; 2775 struct cgraph_node *bestn = NULL; 2776 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ()) 2777 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]); 2778 else 2779 for (struct cgraph_node *n = node->simd_clones; n != NULL; 2780 n = n->simdclone->next_clone) 2781 { 2782 unsigned int this_badness = 0; 2783 if (n->simdclone->simdlen 2784 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo) 2785 || n->simdclone->nargs != nargs) 2786 continue; 2787 if (n->simdclone->simdlen 2788 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)) 2789 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)) 2790 - exact_log2 (n->simdclone->simdlen)) * 1024; 2791 if (n->simdclone->inbranch) 2792 this_badness += 2048; 2793 int target_badness = targetm.simd_clone.usable (n); 2794 if (target_badness < 0) 2795 continue; 2796 this_badness += target_badness * 512; 2797 /* FORNOW: Have to add code to add the mask argument. */ 2798 if (n->simdclone->inbranch) 2799 continue; 2800 for (i = 0; i < nargs; i++) 2801 { 2802 switch (n->simdclone->args[i].arg_type) 2803 { 2804 case SIMD_CLONE_ARG_TYPE_VECTOR: 2805 if (!useless_type_conversion_p 2806 (n->simdclone->args[i].orig_type, 2807 TREE_TYPE (gimple_call_arg (stmt, i)))) 2808 i = -1; 2809 else if (arginfo[i].dt == vect_constant_def 2810 || arginfo[i].dt == vect_external_def 2811 || arginfo[i].linear_step) 2812 this_badness += 64; 2813 break; 2814 case SIMD_CLONE_ARG_TYPE_UNIFORM: 2815 if (arginfo[i].dt != vect_constant_def 2816 && arginfo[i].dt != vect_external_def) 2817 i = -1; 2818 break; 2819 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: 2820 if (arginfo[i].dt == vect_constant_def 2821 || arginfo[i].dt == vect_external_def 2822 || (arginfo[i].linear_step 2823 != n->simdclone->args[i].linear_step)) 2824 i = -1; 2825 break; 2826 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: 2827 /* FORNOW */ 2828 i = -1; 2829 break; 2830 case SIMD_CLONE_ARG_TYPE_MASK: 2831 gcc_unreachable (); 2832 } 2833 if (i == (size_t) -1) 2834 break; 2835 if (n->simdclone->args[i].alignment > arginfo[i].align) 2836 { 2837 i = -1; 2838 break; 2839 } 2840 if (arginfo[i].align) 2841 this_badness += (exact_log2 (arginfo[i].align) 2842 - exact_log2 (n->simdclone->args[i].alignment)); 2843 } 2844 if (i == (size_t) -1) 2845 continue; 2846 if (bestn == NULL || this_badness < badness) 2847 { 2848 bestn = n; 2849 badness = this_badness; 2850 } 2851 } 2852 2853 if (bestn == NULL) 2854 { 2855 arginfo.release (); 2856 return false; 2857 } 2858 2859 for (i = 0; i < nargs; i++) 2860 if ((arginfo[i].dt == vect_constant_def 2861 || arginfo[i].dt == vect_external_def) 2862 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) 2863 { 2864 arginfo[i].vectype 2865 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt, 2866 i))); 2867 if (arginfo[i].vectype == NULL 2868 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype) 2869 > bestn->simdclone->simdlen)) 2870 { 2871 arginfo.release (); 2872 return false; 2873 } 2874 } 2875 2876 fndecl = bestn->decl; 2877 nunits = bestn->simdclone->simdlen; 2878 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 2879 2880 /* If the function isn't const, only allow it in simd loops where user 2881 has asserted that at least nunits consecutive iterations can be 2882 performed using SIMD instructions. */ 2883 if ((loop == NULL || (unsigned) loop->safelen < nunits) 2884 && gimple_vuse (stmt)) 2885 { 2886 arginfo.release (); 2887 return false; 2888 } 2889 2890 /* Sanity check: make sure that at least one copy of the vectorized stmt 2891 needs to be generated. */ 2892 gcc_assert (ncopies >= 1); 2893 2894 if (!vec_stmt) /* transformation not required. */ 2895 { 2896 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl); 2897 for (i = 0; i < nargs; i++) 2898 if (bestn->simdclone->args[i].arg_type 2899 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP) 2900 { 2901 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2 2902 + 1); 2903 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op); 2904 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op)) 2905 ? size_type_node : TREE_TYPE (arginfo[i].op); 2906 tree ls = build_int_cst (lst, arginfo[i].linear_step); 2907 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls); 2908 } 2909 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; 2910 if (dump_enabled_p ()) 2911 dump_printf_loc (MSG_NOTE, vect_location, 2912 "=== vectorizable_simd_clone_call ===\n"); 2913/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */ 2914 arginfo.release (); 2915 return true; 2916 } 2917 2918 /** Transform. **/ 2919 2920 if (dump_enabled_p ()) 2921 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n"); 2922 2923 /* Handle def. */ 2924 scalar_dest = gimple_call_lhs (stmt); 2925 vec_dest = NULL_TREE; 2926 rtype = NULL_TREE; 2927 ratype = NULL_TREE; 2928 if (scalar_dest) 2929 { 2930 vec_dest = vect_create_destination_var (scalar_dest, vectype); 2931 rtype = TREE_TYPE (TREE_TYPE (fndecl)); 2932 if (TREE_CODE (rtype) == ARRAY_TYPE) 2933 { 2934 ratype = rtype; 2935 rtype = TREE_TYPE (ratype); 2936 } 2937 } 2938 2939 prev_stmt_info = NULL; 2940 for (j = 0; j < ncopies; ++j) 2941 { 2942 /* Build argument list for the vectorized call. */ 2943 if (j == 0) 2944 vargs.create (nargs); 2945 else 2946 vargs.truncate (0); 2947 2948 for (i = 0; i < nargs; i++) 2949 { 2950 unsigned int k, l, m, o; 2951 tree atype; 2952 op = gimple_call_arg (stmt, i); 2953 switch (bestn->simdclone->args[i].arg_type) 2954 { 2955 case SIMD_CLONE_ARG_TYPE_VECTOR: 2956 atype = bestn->simdclone->args[i].vector_type; 2957 o = nunits / TYPE_VECTOR_SUBPARTS (atype); 2958 for (m = j * o; m < (j + 1) * o; m++) 2959 { 2960 if (TYPE_VECTOR_SUBPARTS (atype) 2961 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)) 2962 { 2963 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype)); 2964 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype) 2965 / TYPE_VECTOR_SUBPARTS (atype)); 2966 gcc_assert ((k & (k - 1)) == 0); 2967 if (m == 0) 2968 vec_oprnd0 2969 = vect_get_vec_def_for_operand (op, stmt, NULL); 2970 else 2971 { 2972 vec_oprnd0 = arginfo[i].op; 2973 if ((m & (k - 1)) == 0) 2974 vec_oprnd0 2975 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt, 2976 vec_oprnd0); 2977 } 2978 arginfo[i].op = vec_oprnd0; 2979 vec_oprnd0 2980 = build3 (BIT_FIELD_REF, atype, vec_oprnd0, 2981 size_int (prec), 2982 bitsize_int ((m & (k - 1)) * prec)); 2983 new_stmt 2984 = gimple_build_assign (make_ssa_name (atype), 2985 vec_oprnd0); 2986 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2987 vargs.safe_push (gimple_assign_lhs (new_stmt)); 2988 } 2989 else 2990 { 2991 k = (TYPE_VECTOR_SUBPARTS (atype) 2992 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)); 2993 gcc_assert ((k & (k - 1)) == 0); 2994 vec<constructor_elt, va_gc> *ctor_elts; 2995 if (k != 1) 2996 vec_alloc (ctor_elts, k); 2997 else 2998 ctor_elts = NULL; 2999 for (l = 0; l < k; l++) 3000 { 3001 if (m == 0 && l == 0) 3002 vec_oprnd0 3003 = vect_get_vec_def_for_operand (op, stmt, NULL); 3004 else 3005 vec_oprnd0 3006 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt, 3007 arginfo[i].op); 3008 arginfo[i].op = vec_oprnd0; 3009 if (k == 1) 3010 break; 3011 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, 3012 vec_oprnd0); 3013 } 3014 if (k == 1) 3015 vargs.safe_push (vec_oprnd0); 3016 else 3017 { 3018 vec_oprnd0 = build_constructor (atype, ctor_elts); 3019 new_stmt 3020 = gimple_build_assign (make_ssa_name (atype), 3021 vec_oprnd0); 3022 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3023 vargs.safe_push (gimple_assign_lhs (new_stmt)); 3024 } 3025 } 3026 } 3027 break; 3028 case SIMD_CLONE_ARG_TYPE_UNIFORM: 3029 vargs.safe_push (op); 3030 break; 3031 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: 3032 if (j == 0) 3033 { 3034 gimple_seq stmts; 3035 arginfo[i].op 3036 = force_gimple_operand (arginfo[i].op, &stmts, true, 3037 NULL_TREE); 3038 if (stmts != NULL) 3039 { 3040 basic_block new_bb; 3041 edge pe = loop_preheader_edge (loop); 3042 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 3043 gcc_assert (!new_bb); 3044 } 3045 tree phi_res = copy_ssa_name (op); 3046 gphi *new_phi = create_phi_node (phi_res, loop->header); 3047 set_vinfo_for_stmt (new_phi, 3048 new_stmt_vec_info (new_phi, loop_vinfo, 3049 NULL)); 3050 add_phi_arg (new_phi, arginfo[i].op, 3051 loop_preheader_edge (loop), UNKNOWN_LOCATION); 3052 enum tree_code code 3053 = POINTER_TYPE_P (TREE_TYPE (op)) 3054 ? POINTER_PLUS_EXPR : PLUS_EXPR; 3055 tree type = POINTER_TYPE_P (TREE_TYPE (op)) 3056 ? sizetype : TREE_TYPE (op); 3057 widest_int cst 3058 = wi::mul (bestn->simdclone->args[i].linear_step, 3059 ncopies * nunits); 3060 tree tcst = wide_int_to_tree (type, cst); 3061 tree phi_arg = copy_ssa_name (op); 3062 new_stmt 3063 = gimple_build_assign (phi_arg, code, phi_res, tcst); 3064 gimple_stmt_iterator si = gsi_after_labels (loop->header); 3065 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT); 3066 set_vinfo_for_stmt (new_stmt, 3067 new_stmt_vec_info (new_stmt, loop_vinfo, 3068 NULL)); 3069 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop), 3070 UNKNOWN_LOCATION); 3071 arginfo[i].op = phi_res; 3072 vargs.safe_push (phi_res); 3073 } 3074 else 3075 { 3076 enum tree_code code 3077 = POINTER_TYPE_P (TREE_TYPE (op)) 3078 ? POINTER_PLUS_EXPR : PLUS_EXPR; 3079 tree type = POINTER_TYPE_P (TREE_TYPE (op)) 3080 ? sizetype : TREE_TYPE (op); 3081 widest_int cst 3082 = wi::mul (bestn->simdclone->args[i].linear_step, 3083 j * nunits); 3084 tree tcst = wide_int_to_tree (type, cst); 3085 new_temp = make_ssa_name (TREE_TYPE (op)); 3086 new_stmt = gimple_build_assign (new_temp, code, 3087 arginfo[i].op, tcst); 3088 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3089 vargs.safe_push (new_temp); 3090 } 3091 break; 3092 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: 3093 default: 3094 gcc_unreachable (); 3095 } 3096 } 3097 3098 new_stmt = gimple_build_call_vec (fndecl, vargs); 3099 if (vec_dest) 3100 { 3101 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits); 3102 if (ratype) 3103 new_temp = create_tmp_var (ratype); 3104 else if (TYPE_VECTOR_SUBPARTS (vectype) 3105 == TYPE_VECTOR_SUBPARTS (rtype)) 3106 new_temp = make_ssa_name (vec_dest, new_stmt); 3107 else 3108 new_temp = make_ssa_name (rtype, new_stmt); 3109 gimple_call_set_lhs (new_stmt, new_temp); 3110 } 3111 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3112 3113 if (vec_dest) 3114 { 3115 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits) 3116 { 3117 unsigned int k, l; 3118 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype)); 3119 k = nunits / TYPE_VECTOR_SUBPARTS (vectype); 3120 gcc_assert ((k & (k - 1)) == 0); 3121 for (l = 0; l < k; l++) 3122 { 3123 tree t; 3124 if (ratype) 3125 { 3126 t = build_fold_addr_expr (new_temp); 3127 t = build2 (MEM_REF, vectype, t, 3128 build_int_cst (TREE_TYPE (t), 3129 l * prec / BITS_PER_UNIT)); 3130 } 3131 else 3132 t = build3 (BIT_FIELD_REF, vectype, new_temp, 3133 size_int (prec), bitsize_int (l * prec)); 3134 new_stmt 3135 = gimple_build_assign (make_ssa_name (vectype), t); 3136 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3137 if (j == 0 && l == 0) 3138 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3139 else 3140 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3141 3142 prev_stmt_info = vinfo_for_stmt (new_stmt); 3143 } 3144 3145 if (ratype) 3146 { 3147 tree clobber = build_constructor (ratype, NULL); 3148 TREE_THIS_VOLATILE (clobber) = 1; 3149 new_stmt = gimple_build_assign (new_temp, clobber); 3150 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3151 } 3152 continue; 3153 } 3154 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits) 3155 { 3156 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype) 3157 / TYPE_VECTOR_SUBPARTS (rtype)); 3158 gcc_assert ((k & (k - 1)) == 0); 3159 if ((j & (k - 1)) == 0) 3160 vec_alloc (ret_ctor_elts, k); 3161 if (ratype) 3162 { 3163 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype); 3164 for (m = 0; m < o; m++) 3165 { 3166 tree tem = build4 (ARRAY_REF, rtype, new_temp, 3167 size_int (m), NULL_TREE, NULL_TREE); 3168 new_stmt 3169 = gimple_build_assign (make_ssa_name (rtype), tem); 3170 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3171 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, 3172 gimple_assign_lhs (new_stmt)); 3173 } 3174 tree clobber = build_constructor (ratype, NULL); 3175 TREE_THIS_VOLATILE (clobber) = 1; 3176 new_stmt = gimple_build_assign (new_temp, clobber); 3177 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3178 } 3179 else 3180 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp); 3181 if ((j & (k - 1)) != k - 1) 3182 continue; 3183 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts); 3184 new_stmt 3185 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0); 3186 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3187 3188 if ((unsigned) j == k - 1) 3189 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3190 else 3191 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3192 3193 prev_stmt_info = vinfo_for_stmt (new_stmt); 3194 continue; 3195 } 3196 else if (ratype) 3197 { 3198 tree t = build_fold_addr_expr (new_temp); 3199 t = build2 (MEM_REF, vectype, t, 3200 build_int_cst (TREE_TYPE (t), 0)); 3201 new_stmt 3202 = gimple_build_assign (make_ssa_name (vec_dest), t); 3203 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3204 tree clobber = build_constructor (ratype, NULL); 3205 TREE_THIS_VOLATILE (clobber) = 1; 3206 vect_finish_stmt_generation (stmt, 3207 gimple_build_assign (new_temp, 3208 clobber), gsi); 3209 } 3210 } 3211 3212 if (j == 0) 3213 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3214 else 3215 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3216 3217 prev_stmt_info = vinfo_for_stmt (new_stmt); 3218 } 3219 3220 vargs.release (); 3221 3222 /* The call in STMT might prevent it from being removed in dce. 3223 We however cannot remove it here, due to the way the ssa name 3224 it defines is mapped to the new definition. So just replace 3225 rhs of the statement with something harmless. */ 3226 3227 if (slp_node) 3228 return true; 3229 3230 if (scalar_dest) 3231 { 3232 type = TREE_TYPE (scalar_dest); 3233 if (is_pattern_stmt_p (stmt_info)) 3234 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); 3235 else 3236 lhs = gimple_call_lhs (stmt); 3237 new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); 3238 } 3239 else 3240 new_stmt = gimple_build_nop (); 3241 set_vinfo_for_stmt (new_stmt, stmt_info); 3242 set_vinfo_for_stmt (stmt, NULL); 3243 STMT_VINFO_STMT (stmt_info) = new_stmt; 3244 gsi_replace (gsi, new_stmt, true); 3245 unlink_stmt_vdef (stmt); 3246 3247 return true; 3248} 3249 3250 3251/* Function vect_gen_widened_results_half 3252 3253 Create a vector stmt whose code, type, number of arguments, and result 3254 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are 3255 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI. 3256 In the case that CODE is a CALL_EXPR, this means that a call to DECL 3257 needs to be created (DECL is a function-decl of a target-builtin). 3258 STMT is the original scalar stmt that we are vectorizing. */ 3259 3260static gimple 3261vect_gen_widened_results_half (enum tree_code code, 3262 tree decl, 3263 tree vec_oprnd0, tree vec_oprnd1, int op_type, 3264 tree vec_dest, gimple_stmt_iterator *gsi, 3265 gimple stmt) 3266{ 3267 gimple new_stmt; 3268 tree new_temp; 3269 3270 /* Generate half of the widened result: */ 3271 if (code == CALL_EXPR) 3272 { 3273 /* Target specific support */ 3274 if (op_type == binary_op) 3275 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1); 3276 else 3277 new_stmt = gimple_build_call (decl, 1, vec_oprnd0); 3278 new_temp = make_ssa_name (vec_dest, new_stmt); 3279 gimple_call_set_lhs (new_stmt, new_temp); 3280 } 3281 else 3282 { 3283 /* Generic support */ 3284 gcc_assert (op_type == TREE_CODE_LENGTH (code)); 3285 if (op_type != binary_op) 3286 vec_oprnd1 = NULL; 3287 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1); 3288 new_temp = make_ssa_name (vec_dest, new_stmt); 3289 gimple_assign_set_lhs (new_stmt, new_temp); 3290 } 3291 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3292 3293 return new_stmt; 3294} 3295 3296 3297/* Get vectorized definitions for loop-based vectorization. For the first 3298 operand we call vect_get_vec_def_for_operand() (with OPRND containing 3299 scalar operand), and for the rest we get a copy with 3300 vect_get_vec_def_for_stmt_copy() using the previous vector definition 3301 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details. 3302 The vectors are collected into VEC_OPRNDS. */ 3303 3304static void 3305vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt, 3306 vec<tree> *vec_oprnds, int multi_step_cvt) 3307{ 3308 tree vec_oprnd; 3309 3310 /* Get first vector operand. */ 3311 /* All the vector operands except the very first one (that is scalar oprnd) 3312 are stmt copies. */ 3313 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE) 3314 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL); 3315 else 3316 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd); 3317 3318 vec_oprnds->quick_push (vec_oprnd); 3319 3320 /* Get second vector operand. */ 3321 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd); 3322 vec_oprnds->quick_push (vec_oprnd); 3323 3324 *oprnd = vec_oprnd; 3325 3326 /* For conversion in multiple steps, continue to get operands 3327 recursively. */ 3328 if (multi_step_cvt) 3329 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1); 3330} 3331 3332 3333/* Create vectorized demotion statements for vector operands from VEC_OPRNDS. 3334 For multi-step conversions store the resulting vectors and call the function 3335 recursively. */ 3336 3337static void 3338vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds, 3339 int multi_step_cvt, gimple stmt, 3340 vec<tree> vec_dsts, 3341 gimple_stmt_iterator *gsi, 3342 slp_tree slp_node, enum tree_code code, 3343 stmt_vec_info *prev_stmt_info) 3344{ 3345 unsigned int i; 3346 tree vop0, vop1, new_tmp, vec_dest; 3347 gimple new_stmt; 3348 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3349 3350 vec_dest = vec_dsts.pop (); 3351 3352 for (i = 0; i < vec_oprnds->length (); i += 2) 3353 { 3354 /* Create demotion operation. */ 3355 vop0 = (*vec_oprnds)[i]; 3356 vop1 = (*vec_oprnds)[i + 1]; 3357 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1); 3358 new_tmp = make_ssa_name (vec_dest, new_stmt); 3359 gimple_assign_set_lhs (new_stmt, new_tmp); 3360 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3361 3362 if (multi_step_cvt) 3363 /* Store the resulting vector for next recursive call. */ 3364 (*vec_oprnds)[i/2] = new_tmp; 3365 else 3366 { 3367 /* This is the last step of the conversion sequence. Store the 3368 vectors in SLP_NODE or in vector info of the scalar statement 3369 (or in STMT_VINFO_RELATED_STMT chain). */ 3370 if (slp_node) 3371 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 3372 else 3373 { 3374 if (!*prev_stmt_info) 3375 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 3376 else 3377 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt; 3378 3379 *prev_stmt_info = vinfo_for_stmt (new_stmt); 3380 } 3381 } 3382 } 3383 3384 /* For multi-step demotion operations we first generate demotion operations 3385 from the source type to the intermediate types, and then combine the 3386 results (stored in VEC_OPRNDS) in demotion operation to the destination 3387 type. */ 3388 if (multi_step_cvt) 3389 { 3390 /* At each level of recursion we have half of the operands we had at the 3391 previous level. */ 3392 vec_oprnds->truncate ((i+1)/2); 3393 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1, 3394 stmt, vec_dsts, gsi, slp_node, 3395 VEC_PACK_TRUNC_EXPR, 3396 prev_stmt_info); 3397 } 3398 3399 vec_dsts.quick_push (vec_dest); 3400} 3401 3402 3403/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0 3404 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store 3405 the resulting vectors and call the function recursively. */ 3406 3407static void 3408vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0, 3409 vec<tree> *vec_oprnds1, 3410 gimple stmt, tree vec_dest, 3411 gimple_stmt_iterator *gsi, 3412 enum tree_code code1, 3413 enum tree_code code2, tree decl1, 3414 tree decl2, int op_type) 3415{ 3416 int i; 3417 tree vop0, vop1, new_tmp1, new_tmp2; 3418 gimple new_stmt1, new_stmt2; 3419 vec<tree> vec_tmp = vNULL; 3420 3421 vec_tmp.create (vec_oprnds0->length () * 2); 3422 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0) 3423 { 3424 if (op_type == binary_op) 3425 vop1 = (*vec_oprnds1)[i]; 3426 else 3427 vop1 = NULL_TREE; 3428 3429 /* Generate the two halves of promotion operation. */ 3430 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1, 3431 op_type, vec_dest, gsi, stmt); 3432 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1, 3433 op_type, vec_dest, gsi, stmt); 3434 if (is_gimple_call (new_stmt1)) 3435 { 3436 new_tmp1 = gimple_call_lhs (new_stmt1); 3437 new_tmp2 = gimple_call_lhs (new_stmt2); 3438 } 3439 else 3440 { 3441 new_tmp1 = gimple_assign_lhs (new_stmt1); 3442 new_tmp2 = gimple_assign_lhs (new_stmt2); 3443 } 3444 3445 /* Store the results for the next step. */ 3446 vec_tmp.quick_push (new_tmp1); 3447 vec_tmp.quick_push (new_tmp2); 3448 } 3449 3450 vec_oprnds0->release (); 3451 *vec_oprnds0 = vec_tmp; 3452} 3453 3454 3455/* Check if STMT performs a conversion operation, that can be vectorized. 3456 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 3457 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 3458 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 3459 3460static bool 3461vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, 3462 gimple *vec_stmt, slp_tree slp_node) 3463{ 3464 tree vec_dest; 3465 tree scalar_dest; 3466 tree op0, op1 = NULL_TREE; 3467 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 3468 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3469 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3470 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; 3471 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; 3472 tree decl1 = NULL_TREE, decl2 = NULL_TREE; 3473 tree new_temp; 3474 tree def; 3475 gimple def_stmt; 3476 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 3477 gimple new_stmt = NULL; 3478 stmt_vec_info prev_stmt_info; 3479 int nunits_in; 3480 int nunits_out; 3481 tree vectype_out, vectype_in; 3482 int ncopies, i, j; 3483 tree lhs_type, rhs_type; 3484 enum { NARROW, NONE, WIDEN } modifier; 3485 vec<tree> vec_oprnds0 = vNULL; 3486 vec<tree> vec_oprnds1 = vNULL; 3487 tree vop0; 3488 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3489 int multi_step_cvt = 0; 3490 vec<tree> vec_dsts = vNULL; 3491 vec<tree> interm_types = vNULL; 3492 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE; 3493 int op_type; 3494 machine_mode rhs_mode; 3495 unsigned short fltsz; 3496 3497 /* Is STMT a vectorizable conversion? */ 3498 3499 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3500 return false; 3501 3502 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 3503 return false; 3504 3505 if (!is_gimple_assign (stmt)) 3506 return false; 3507 3508 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 3509 return false; 3510 3511 code = gimple_assign_rhs_code (stmt); 3512 if (!CONVERT_EXPR_CODE_P (code) 3513 && code != FIX_TRUNC_EXPR 3514 && code != FLOAT_EXPR 3515 && code != WIDEN_MULT_EXPR 3516 && code != WIDEN_LSHIFT_EXPR) 3517 return false; 3518 3519 op_type = TREE_CODE_LENGTH (code); 3520 3521 /* Check types of lhs and rhs. */ 3522 scalar_dest = gimple_assign_lhs (stmt); 3523 lhs_type = TREE_TYPE (scalar_dest); 3524 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 3525 3526 op0 = gimple_assign_rhs1 (stmt); 3527 rhs_type = TREE_TYPE (op0); 3528 3529 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 3530 && !((INTEGRAL_TYPE_P (lhs_type) 3531 && INTEGRAL_TYPE_P (rhs_type)) 3532 || (SCALAR_FLOAT_TYPE_P (lhs_type) 3533 && SCALAR_FLOAT_TYPE_P (rhs_type)))) 3534 return false; 3535 3536 if ((INTEGRAL_TYPE_P (lhs_type) 3537 && (TYPE_PRECISION (lhs_type) 3538 != GET_MODE_PRECISION (TYPE_MODE (lhs_type)))) 3539 || (INTEGRAL_TYPE_P (rhs_type) 3540 && (TYPE_PRECISION (rhs_type) 3541 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))) 3542 { 3543 if (dump_enabled_p ()) 3544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3545 "type conversion to/from bit-precision unsupported." 3546 "\n"); 3547 return false; 3548 } 3549 3550 /* Check the operands of the operation. */ 3551 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo, 3552 &def_stmt, &def, &dt[0], &vectype_in)) 3553 { 3554 if (dump_enabled_p ()) 3555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3556 "use not simple.\n"); 3557 return false; 3558 } 3559 if (op_type == binary_op) 3560 { 3561 bool ok; 3562 3563 op1 = gimple_assign_rhs2 (stmt); 3564 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR); 3565 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of 3566 OP1. */ 3567 if (CONSTANT_CLASS_P (op0)) 3568 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, 3569 &def_stmt, &def, &dt[1], &vectype_in); 3570 else 3571 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt, 3572 &def, &dt[1]); 3573 3574 if (!ok) 3575 { 3576 if (dump_enabled_p ()) 3577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3578 "use not simple.\n"); 3579 return false; 3580 } 3581 } 3582 3583 /* If op0 is an external or constant defs use a vector type of 3584 the same size as the output vector type. */ 3585 if (!vectype_in) 3586 vectype_in = get_same_sized_vectype (rhs_type, vectype_out); 3587 if (vec_stmt) 3588 gcc_assert (vectype_in); 3589 if (!vectype_in) 3590 { 3591 if (dump_enabled_p ()) 3592 { 3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3594 "no vectype for scalar type "); 3595 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type); 3596 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); 3597 } 3598 3599 return false; 3600 } 3601 3602 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 3603 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 3604 if (nunits_in < nunits_out) 3605 modifier = NARROW; 3606 else if (nunits_out == nunits_in) 3607 modifier = NONE; 3608 else 3609 modifier = WIDEN; 3610 3611 /* Multiple types in SLP are handled by creating the appropriate number of 3612 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3613 case of SLP. */ 3614 if (slp_node || PURE_SLP_STMT (stmt_info)) 3615 ncopies = 1; 3616 else if (modifier == NARROW) 3617 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 3618 else 3619 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 3620 3621 /* Sanity check: make sure that at least one copy of the vectorized stmt 3622 needs to be generated. */ 3623 gcc_assert (ncopies >= 1); 3624 3625 /* Supportable by target? */ 3626 switch (modifier) 3627 { 3628 case NONE: 3629 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 3630 return false; 3631 if (supportable_convert_operation (code, vectype_out, vectype_in, 3632 &decl1, &code1)) 3633 break; 3634 /* FALLTHRU */ 3635 unsupported: 3636 if (dump_enabled_p ()) 3637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3638 "conversion not supported by target.\n"); 3639 return false; 3640 3641 case WIDEN: 3642 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in, 3643 &code1, &code2, &multi_step_cvt, 3644 &interm_types)) 3645 { 3646 /* Binary widening operation can only be supported directly by the 3647 architecture. */ 3648 gcc_assert (!(multi_step_cvt && op_type == binary_op)); 3649 break; 3650 } 3651 3652 if (code != FLOAT_EXPR 3653 || (GET_MODE_SIZE (TYPE_MODE (lhs_type)) 3654 <= GET_MODE_SIZE (TYPE_MODE (rhs_type)))) 3655 goto unsupported; 3656 3657 rhs_mode = TYPE_MODE (rhs_type); 3658 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type)); 3659 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type)); 3660 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz; 3661 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode)) 3662 { 3663 cvt_type 3664 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 3665 cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 3666 if (cvt_type == NULL_TREE) 3667 goto unsupported; 3668 3669 if (GET_MODE_SIZE (rhs_mode) == fltsz) 3670 { 3671 if (!supportable_convert_operation (code, vectype_out, 3672 cvt_type, &decl1, &codecvt1)) 3673 goto unsupported; 3674 } 3675 else if (!supportable_widening_operation (code, stmt, vectype_out, 3676 cvt_type, &codecvt1, 3677 &codecvt2, &multi_step_cvt, 3678 &interm_types)) 3679 continue; 3680 else 3681 gcc_assert (multi_step_cvt == 0); 3682 3683 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type, 3684 vectype_in, &code1, &code2, 3685 &multi_step_cvt, &interm_types)) 3686 break; 3687 } 3688 3689 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz) 3690 goto unsupported; 3691 3692 if (GET_MODE_SIZE (rhs_mode) == fltsz) 3693 codecvt2 = ERROR_MARK; 3694 else 3695 { 3696 multi_step_cvt++; 3697 interm_types.safe_push (cvt_type); 3698 cvt_type = NULL_TREE; 3699 } 3700 break; 3701 3702 case NARROW: 3703 gcc_assert (op_type == unary_op); 3704 if (supportable_narrowing_operation (code, vectype_out, vectype_in, 3705 &code1, &multi_step_cvt, 3706 &interm_types)) 3707 break; 3708 3709 if (code != FIX_TRUNC_EXPR 3710 || (GET_MODE_SIZE (TYPE_MODE (lhs_type)) 3711 >= GET_MODE_SIZE (TYPE_MODE (rhs_type)))) 3712 goto unsupported; 3713 3714 rhs_mode = TYPE_MODE (rhs_type); 3715 cvt_type 3716 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 3717 cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 3718 if (cvt_type == NULL_TREE) 3719 goto unsupported; 3720 if (!supportable_convert_operation (code, cvt_type, vectype_in, 3721 &decl1, &codecvt1)) 3722 goto unsupported; 3723 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, 3724 &code1, &multi_step_cvt, 3725 &interm_types)) 3726 break; 3727 goto unsupported; 3728 3729 default: 3730 gcc_unreachable (); 3731 } 3732 3733 if (!vec_stmt) /* transformation not required. */ 3734 { 3735 if (dump_enabled_p ()) 3736 dump_printf_loc (MSG_NOTE, vect_location, 3737 "=== vectorizable_conversion ===\n"); 3738 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR) 3739 { 3740 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; 3741 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); 3742 } 3743 else if (modifier == NARROW) 3744 { 3745 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; 3746 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); 3747 } 3748 else 3749 { 3750 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; 3751 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); 3752 } 3753 interm_types.release (); 3754 return true; 3755 } 3756 3757 /** Transform. **/ 3758 if (dump_enabled_p ()) 3759 dump_printf_loc (MSG_NOTE, vect_location, 3760 "transform conversion. ncopies = %d.\n", ncopies); 3761 3762 if (op_type == binary_op) 3763 { 3764 if (CONSTANT_CLASS_P (op0)) 3765 op0 = fold_convert (TREE_TYPE (op1), op0); 3766 else if (CONSTANT_CLASS_P (op1)) 3767 op1 = fold_convert (TREE_TYPE (op0), op1); 3768 } 3769 3770 /* In case of multi-step conversion, we first generate conversion operations 3771 to the intermediate types, and then from that types to the final one. 3772 We create vector destinations for the intermediate type (TYPES) received 3773 from supportable_*_operation, and store them in the correct order 3774 for future use in vect_create_vectorized_*_stmts (). */ 3775 vec_dsts.create (multi_step_cvt + 1); 3776 vec_dest = vect_create_destination_var (scalar_dest, 3777 (cvt_type && modifier == WIDEN) 3778 ? cvt_type : vectype_out); 3779 vec_dsts.quick_push (vec_dest); 3780 3781 if (multi_step_cvt) 3782 { 3783 for (i = interm_types.length () - 1; 3784 interm_types.iterate (i, &intermediate_type); i--) 3785 { 3786 vec_dest = vect_create_destination_var (scalar_dest, 3787 intermediate_type); 3788 vec_dsts.quick_push (vec_dest); 3789 } 3790 } 3791 3792 if (cvt_type) 3793 vec_dest = vect_create_destination_var (scalar_dest, 3794 modifier == WIDEN 3795 ? vectype_out : cvt_type); 3796 3797 if (!slp_node) 3798 { 3799 if (modifier == WIDEN) 3800 { 3801 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1); 3802 if (op_type == binary_op) 3803 vec_oprnds1.create (1); 3804 } 3805 else if (modifier == NARROW) 3806 vec_oprnds0.create ( 3807 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1)); 3808 } 3809 else if (code == WIDEN_LSHIFT_EXPR) 3810 vec_oprnds1.create (slp_node->vec_stmts_size); 3811 3812 last_oprnd = op0; 3813 prev_stmt_info = NULL; 3814 switch (modifier) 3815 { 3816 case NONE: 3817 for (j = 0; j < ncopies; j++) 3818 { 3819 if (j == 0) 3820 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node, 3821 -1); 3822 else 3823 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); 3824 3825 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 3826 { 3827 /* Arguments are ready, create the new vector stmt. */ 3828 if (code1 == CALL_EXPR) 3829 { 3830 new_stmt = gimple_build_call (decl1, 1, vop0); 3831 new_temp = make_ssa_name (vec_dest, new_stmt); 3832 gimple_call_set_lhs (new_stmt, new_temp); 3833 } 3834 else 3835 { 3836 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); 3837 new_stmt = gimple_build_assign (vec_dest, code1, vop0); 3838 new_temp = make_ssa_name (vec_dest, new_stmt); 3839 gimple_assign_set_lhs (new_stmt, new_temp); 3840 } 3841 3842 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3843 if (slp_node) 3844 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 3845 } 3846 3847 if (j == 0) 3848 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3849 else 3850 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3851 prev_stmt_info = vinfo_for_stmt (new_stmt); 3852 } 3853 break; 3854 3855 case WIDEN: 3856 /* In case the vectorization factor (VF) is bigger than the number 3857 of elements that we can fit in a vectype (nunits), we have to 3858 generate more than one vector stmt - i.e - we need to "unroll" 3859 the vector stmt by a factor VF/nunits. */ 3860 for (j = 0; j < ncopies; j++) 3861 { 3862 /* Handle uses. */ 3863 if (j == 0) 3864 { 3865 if (slp_node) 3866 { 3867 if (code == WIDEN_LSHIFT_EXPR) 3868 { 3869 unsigned int k; 3870 3871 vec_oprnd1 = op1; 3872 /* Store vec_oprnd1 for every vector stmt to be created 3873 for SLP_NODE. We check during the analysis that all 3874 the shift arguments are the same. */ 3875 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 3876 vec_oprnds1.quick_push (vec_oprnd1); 3877 3878 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 3879 slp_node, -1); 3880 } 3881 else 3882 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, 3883 &vec_oprnds1, slp_node, -1); 3884 } 3885 else 3886 { 3887 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); 3888 vec_oprnds0.quick_push (vec_oprnd0); 3889 if (op_type == binary_op) 3890 { 3891 if (code == WIDEN_LSHIFT_EXPR) 3892 vec_oprnd1 = op1; 3893 else 3894 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, 3895 NULL); 3896 vec_oprnds1.quick_push (vec_oprnd1); 3897 } 3898 } 3899 } 3900 else 3901 { 3902 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); 3903 vec_oprnds0.truncate (0); 3904 vec_oprnds0.quick_push (vec_oprnd0); 3905 if (op_type == binary_op) 3906 { 3907 if (code == WIDEN_LSHIFT_EXPR) 3908 vec_oprnd1 = op1; 3909 else 3910 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], 3911 vec_oprnd1); 3912 vec_oprnds1.truncate (0); 3913 vec_oprnds1.quick_push (vec_oprnd1); 3914 } 3915 } 3916 3917 /* Arguments are ready. Create the new vector stmts. */ 3918 for (i = multi_step_cvt; i >= 0; i--) 3919 { 3920 tree this_dest = vec_dsts[i]; 3921 enum tree_code c1 = code1, c2 = code2; 3922 if (i == 0 && codecvt2 != ERROR_MARK) 3923 { 3924 c1 = codecvt1; 3925 c2 = codecvt2; 3926 } 3927 vect_create_vectorized_promotion_stmts (&vec_oprnds0, 3928 &vec_oprnds1, 3929 stmt, this_dest, gsi, 3930 c1, c2, decl1, decl2, 3931 op_type); 3932 } 3933 3934 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 3935 { 3936 if (cvt_type) 3937 { 3938 if (codecvt1 == CALL_EXPR) 3939 { 3940 new_stmt = gimple_build_call (decl1, 1, vop0); 3941 new_temp = make_ssa_name (vec_dest, new_stmt); 3942 gimple_call_set_lhs (new_stmt, new_temp); 3943 } 3944 else 3945 { 3946 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 3947 new_temp = make_ssa_name (vec_dest); 3948 new_stmt = gimple_build_assign (new_temp, codecvt1, 3949 vop0); 3950 } 3951 3952 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3953 } 3954 else 3955 new_stmt = SSA_NAME_DEF_STMT (vop0); 3956 3957 if (slp_node) 3958 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 3959 else 3960 { 3961 if (!prev_stmt_info) 3962 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 3963 else 3964 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3965 prev_stmt_info = vinfo_for_stmt (new_stmt); 3966 } 3967 } 3968 } 3969 3970 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 3971 break; 3972 3973 case NARROW: 3974 /* In case the vectorization factor (VF) is bigger than the number 3975 of elements that we can fit in a vectype (nunits), we have to 3976 generate more than one vector stmt - i.e - we need to "unroll" 3977 the vector stmt by a factor VF/nunits. */ 3978 for (j = 0; j < ncopies; j++) 3979 { 3980 /* Handle uses. */ 3981 if (slp_node) 3982 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 3983 slp_node, -1); 3984 else 3985 { 3986 vec_oprnds0.truncate (0); 3987 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0, 3988 vect_pow2 (multi_step_cvt) - 1); 3989 } 3990 3991 /* Arguments are ready. Create the new vector stmts. */ 3992 if (cvt_type) 3993 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 3994 { 3995 if (codecvt1 == CALL_EXPR) 3996 { 3997 new_stmt = gimple_build_call (decl1, 1, vop0); 3998 new_temp = make_ssa_name (vec_dest, new_stmt); 3999 gimple_call_set_lhs (new_stmt, new_temp); 4000 } 4001 else 4002 { 4003 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 4004 new_temp = make_ssa_name (vec_dest); 4005 new_stmt = gimple_build_assign (new_temp, codecvt1, 4006 vop0); 4007 } 4008 4009 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4010 vec_oprnds0[i] = new_temp; 4011 } 4012 4013 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt, 4014 stmt, vec_dsts, gsi, 4015 slp_node, code1, 4016 &prev_stmt_info); 4017 } 4018 4019 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 4020 break; 4021 } 4022 4023 vec_oprnds0.release (); 4024 vec_oprnds1.release (); 4025 vec_dsts.release (); 4026 interm_types.release (); 4027 4028 return true; 4029} 4030 4031 4032/* Function vectorizable_assignment. 4033 4034 Check if STMT performs an assignment (copy) that can be vectorized. 4035 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 4036 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 4037 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 4038 4039static bool 4040vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi, 4041 gimple *vec_stmt, slp_tree slp_node) 4042{ 4043 tree vec_dest; 4044 tree scalar_dest; 4045 tree op; 4046 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4047 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 4048 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4049 tree new_temp; 4050 tree def; 4051 gimple def_stmt; 4052 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 4053 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype); 4054 int ncopies; 4055 int i, j; 4056 vec<tree> vec_oprnds = vNULL; 4057 tree vop; 4058 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 4059 gimple new_stmt = NULL; 4060 stmt_vec_info prev_stmt_info = NULL; 4061 enum tree_code code; 4062 tree vectype_in; 4063 4064 /* Multiple types in SLP are handled by creating the appropriate number of 4065 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 4066 case of SLP. */ 4067 if (slp_node || PURE_SLP_STMT (stmt_info)) 4068 ncopies = 1; 4069 else 4070 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 4071 4072 gcc_assert (ncopies >= 1); 4073 4074 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 4075 return false; 4076 4077 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 4078 return false; 4079 4080 /* Is vectorizable assignment? */ 4081 if (!is_gimple_assign (stmt)) 4082 return false; 4083 4084 scalar_dest = gimple_assign_lhs (stmt); 4085 if (TREE_CODE (scalar_dest) != SSA_NAME) 4086 return false; 4087 4088 code = gimple_assign_rhs_code (stmt); 4089 if (gimple_assign_single_p (stmt) 4090 || code == PAREN_EXPR 4091 || CONVERT_EXPR_CODE_P (code)) 4092 op = gimple_assign_rhs1 (stmt); 4093 else 4094 return false; 4095 4096 if (code == VIEW_CONVERT_EXPR) 4097 op = TREE_OPERAND (op, 0); 4098 4099 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo, 4100 &def_stmt, &def, &dt[0], &vectype_in)) 4101 { 4102 if (dump_enabled_p ()) 4103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4104 "use not simple.\n"); 4105 return false; 4106 } 4107 4108 /* We can handle NOP_EXPR conversions that do not change the number 4109 of elements or the vector size. */ 4110 if ((CONVERT_EXPR_CODE_P (code) 4111 || code == VIEW_CONVERT_EXPR) 4112 && (!vectype_in 4113 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits 4114 || (GET_MODE_SIZE (TYPE_MODE (vectype)) 4115 != GET_MODE_SIZE (TYPE_MODE (vectype_in))))) 4116 return false; 4117 4118 /* We do not handle bit-precision changes. */ 4119 if ((CONVERT_EXPR_CODE_P (code) 4120 || code == VIEW_CONVERT_EXPR) 4121 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) 4122 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 4123 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) 4124 || ((TYPE_PRECISION (TREE_TYPE (op)) 4125 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op)))))) 4126 /* But a conversion that does not change the bit-pattern is ok. */ 4127 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 4128 > TYPE_PRECISION (TREE_TYPE (op))) 4129 && TYPE_UNSIGNED (TREE_TYPE (op)))) 4130 { 4131 if (dump_enabled_p ()) 4132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4133 "type conversion to/from bit-precision " 4134 "unsupported.\n"); 4135 return false; 4136 } 4137 4138 if (!vec_stmt) /* transformation not required. */ 4139 { 4140 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 4141 if (dump_enabled_p ()) 4142 dump_printf_loc (MSG_NOTE, vect_location, 4143 "=== vectorizable_assignment ===\n"); 4144 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); 4145 return true; 4146 } 4147 4148 /** Transform. **/ 4149 if (dump_enabled_p ()) 4150 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n"); 4151 4152 /* Handle def. */ 4153 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4154 4155 /* Handle use. */ 4156 for (j = 0; j < ncopies; j++) 4157 { 4158 /* Handle uses. */ 4159 if (j == 0) 4160 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); 4161 else 4162 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); 4163 4164 /* Arguments are ready. create the new vector stmt. */ 4165 FOR_EACH_VEC_ELT (vec_oprnds, i, vop) 4166 { 4167 if (CONVERT_EXPR_CODE_P (code) 4168 || code == VIEW_CONVERT_EXPR) 4169 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop); 4170 new_stmt = gimple_build_assign (vec_dest, vop); 4171 new_temp = make_ssa_name (vec_dest, new_stmt); 4172 gimple_assign_set_lhs (new_stmt, new_temp); 4173 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4174 if (slp_node) 4175 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 4176 } 4177 4178 if (slp_node) 4179 continue; 4180 4181 if (j == 0) 4182 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4183 else 4184 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4185 4186 prev_stmt_info = vinfo_for_stmt (new_stmt); 4187 } 4188 4189 vec_oprnds.release (); 4190 return true; 4191} 4192 4193 4194/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE 4195 either as shift by a scalar or by a vector. */ 4196 4197bool 4198vect_supportable_shift (enum tree_code code, tree scalar_type) 4199{ 4200 4201 machine_mode vec_mode; 4202 optab optab; 4203 int icode; 4204 tree vectype; 4205 4206 vectype = get_vectype_for_scalar_type (scalar_type); 4207 if (!vectype) 4208 return false; 4209 4210 optab = optab_for_tree_code (code, vectype, optab_scalar); 4211 if (!optab 4212 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) 4213 { 4214 optab = optab_for_tree_code (code, vectype, optab_vector); 4215 if (!optab 4216 || (optab_handler (optab, TYPE_MODE (vectype)) 4217 == CODE_FOR_nothing)) 4218 return false; 4219 } 4220 4221 vec_mode = TYPE_MODE (vectype); 4222 icode = (int) optab_handler (optab, vec_mode); 4223 if (icode == CODE_FOR_nothing) 4224 return false; 4225 4226 return true; 4227} 4228 4229 4230/* Function vectorizable_shift. 4231 4232 Check if STMT performs a shift operation that can be vectorized. 4233 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 4234 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 4235 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 4236 4237static bool 4238vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi, 4239 gimple *vec_stmt, slp_tree slp_node) 4240{ 4241 tree vec_dest; 4242 tree scalar_dest; 4243 tree op0, op1 = NULL; 4244 tree vec_oprnd1 = NULL_TREE; 4245 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4246 tree vectype; 4247 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4248 enum tree_code code; 4249 machine_mode vec_mode; 4250 tree new_temp; 4251 optab optab; 4252 int icode; 4253 machine_mode optab_op2_mode; 4254 tree def; 4255 gimple def_stmt; 4256 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 4257 gimple new_stmt = NULL; 4258 stmt_vec_info prev_stmt_info; 4259 int nunits_in; 4260 int nunits_out; 4261 tree vectype_out; 4262 tree op1_vectype; 4263 int ncopies; 4264 int j, i; 4265 vec<tree> vec_oprnds0 = vNULL; 4266 vec<tree> vec_oprnds1 = vNULL; 4267 tree vop0, vop1; 4268 unsigned int k; 4269 bool scalar_shift_arg = true; 4270 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 4271 int vf; 4272 4273 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 4274 return false; 4275 4276 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 4277 return false; 4278 4279 /* Is STMT a vectorizable binary/unary operation? */ 4280 if (!is_gimple_assign (stmt)) 4281 return false; 4282 4283 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 4284 return false; 4285 4286 code = gimple_assign_rhs_code (stmt); 4287 4288 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 4289 || code == RROTATE_EXPR)) 4290 return false; 4291 4292 scalar_dest = gimple_assign_lhs (stmt); 4293 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 4294 if (TYPE_PRECISION (TREE_TYPE (scalar_dest)) 4295 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) 4296 { 4297 if (dump_enabled_p ()) 4298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4299 "bit-precision shifts not supported.\n"); 4300 return false; 4301 } 4302 4303 op0 = gimple_assign_rhs1 (stmt); 4304 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo, 4305 &def_stmt, &def, &dt[0], &vectype)) 4306 { 4307 if (dump_enabled_p ()) 4308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4309 "use not simple.\n"); 4310 return false; 4311 } 4312 /* If op0 is an external or constant def use a vector type with 4313 the same size as the output vector type. */ 4314 if (!vectype) 4315 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out); 4316 if (vec_stmt) 4317 gcc_assert (vectype); 4318 if (!vectype) 4319 { 4320 if (dump_enabled_p ()) 4321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4322 "no vectype for scalar type\n"); 4323 return false; 4324 } 4325 4326 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 4327 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 4328 if (nunits_out != nunits_in) 4329 return false; 4330 4331 op1 = gimple_assign_rhs2 (stmt); 4332 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt, 4333 &def, &dt[1], &op1_vectype)) 4334 { 4335 if (dump_enabled_p ()) 4336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4337 "use not simple.\n"); 4338 return false; 4339 } 4340 4341 if (loop_vinfo) 4342 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 4343 else 4344 vf = 1; 4345 4346 /* Multiple types in SLP are handled by creating the appropriate number of 4347 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 4348 case of SLP. */ 4349 if (slp_node || PURE_SLP_STMT (stmt_info)) 4350 ncopies = 1; 4351 else 4352 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 4353 4354 gcc_assert (ncopies >= 1); 4355 4356 /* Determine whether the shift amount is a vector, or scalar. If the 4357 shift/rotate amount is a vector, use the vector/vector shift optabs. */ 4358 4359 if (dt[1] == vect_internal_def && !slp_node) 4360 scalar_shift_arg = false; 4361 else if (dt[1] == vect_constant_def 4362 || dt[1] == vect_external_def 4363 || dt[1] == vect_internal_def) 4364 { 4365 /* In SLP, need to check whether the shift count is the same, 4366 in loops if it is a constant or invariant, it is always 4367 a scalar shift. */ 4368 if (slp_node) 4369 { 4370 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node); 4371 gimple slpstmt; 4372 4373 FOR_EACH_VEC_ELT (stmts, k, slpstmt) 4374 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0)) 4375 scalar_shift_arg = false; 4376 } 4377 } 4378 else 4379 { 4380 if (dump_enabled_p ()) 4381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4382 "operand mode requires invariant argument.\n"); 4383 return false; 4384 } 4385 4386 /* Vector shifted by vector. */ 4387 if (!scalar_shift_arg) 4388 { 4389 optab = optab_for_tree_code (code, vectype, optab_vector); 4390 if (dump_enabled_p ()) 4391 dump_printf_loc (MSG_NOTE, vect_location, 4392 "vector/vector shift/rotate found.\n"); 4393 4394 if (!op1_vectype) 4395 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out); 4396 if (op1_vectype == NULL_TREE 4397 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)) 4398 { 4399 if (dump_enabled_p ()) 4400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4401 "unusable type for last operand in" 4402 " vector/vector shift/rotate.\n"); 4403 return false; 4404 } 4405 } 4406 /* See if the machine has a vector shifted by scalar insn and if not 4407 then see if it has a vector shifted by vector insn. */ 4408 else 4409 { 4410 optab = optab_for_tree_code (code, vectype, optab_scalar); 4411 if (optab 4412 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing) 4413 { 4414 if (dump_enabled_p ()) 4415 dump_printf_loc (MSG_NOTE, vect_location, 4416 "vector/scalar shift/rotate found.\n"); 4417 } 4418 else 4419 { 4420 optab = optab_for_tree_code (code, vectype, optab_vector); 4421 if (optab 4422 && (optab_handler (optab, TYPE_MODE (vectype)) 4423 != CODE_FOR_nothing)) 4424 { 4425 scalar_shift_arg = false; 4426 4427 if (dump_enabled_p ()) 4428 dump_printf_loc (MSG_NOTE, vect_location, 4429 "vector/vector shift/rotate found.\n"); 4430 4431 /* Unlike the other binary operators, shifts/rotates have 4432 the rhs being int, instead of the same type as the lhs, 4433 so make sure the scalar is the right type if we are 4434 dealing with vectors of long long/long/short/char. */ 4435 if (dt[1] == vect_constant_def) 4436 op1 = fold_convert (TREE_TYPE (vectype), op1); 4437 else if (!useless_type_conversion_p (TREE_TYPE (vectype), 4438 TREE_TYPE (op1))) 4439 { 4440 if (slp_node 4441 && TYPE_MODE (TREE_TYPE (vectype)) 4442 != TYPE_MODE (TREE_TYPE (op1))) 4443 { 4444 if (dump_enabled_p ()) 4445 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4446 "unusable type for last operand in" 4447 " vector/vector shift/rotate.\n"); 4448 return false; 4449 } 4450 if (vec_stmt && !slp_node) 4451 { 4452 op1 = fold_convert (TREE_TYPE (vectype), op1); 4453 op1 = vect_init_vector (stmt, op1, 4454 TREE_TYPE (vectype), NULL); 4455 } 4456 } 4457 } 4458 } 4459 } 4460 4461 /* Supportable by target? */ 4462 if (!optab) 4463 { 4464 if (dump_enabled_p ()) 4465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4466 "no optab.\n"); 4467 return false; 4468 } 4469 vec_mode = TYPE_MODE (vectype); 4470 icode = (int) optab_handler (optab, vec_mode); 4471 if (icode == CODE_FOR_nothing) 4472 { 4473 if (dump_enabled_p ()) 4474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4475 "op not supported by target.\n"); 4476 /* Check only during analysis. */ 4477 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 4478 || (vf < vect_min_worthwhile_factor (code) 4479 && !vec_stmt)) 4480 return false; 4481 if (dump_enabled_p ()) 4482 dump_printf_loc (MSG_NOTE, vect_location, 4483 "proceeding using word mode.\n"); 4484 } 4485 4486 /* Worthwhile without SIMD support? Check only during analysis. */ 4487 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 4488 && vf < vect_min_worthwhile_factor (code) 4489 && !vec_stmt) 4490 { 4491 if (dump_enabled_p ()) 4492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4493 "not worthwhile without SIMD support.\n"); 4494 return false; 4495 } 4496 4497 if (!vec_stmt) /* transformation not required. */ 4498 { 4499 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; 4500 if (dump_enabled_p ()) 4501 dump_printf_loc (MSG_NOTE, vect_location, 4502 "=== vectorizable_shift ===\n"); 4503 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); 4504 return true; 4505 } 4506 4507 /** Transform. **/ 4508 4509 if (dump_enabled_p ()) 4510 dump_printf_loc (MSG_NOTE, vect_location, 4511 "transform binary/unary operation.\n"); 4512 4513 /* Handle def. */ 4514 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4515 4516 prev_stmt_info = NULL; 4517 for (j = 0; j < ncopies; j++) 4518 { 4519 /* Handle uses. */ 4520 if (j == 0) 4521 { 4522 if (scalar_shift_arg) 4523 { 4524 /* Vector shl and shr insn patterns can be defined with scalar 4525 operand 2 (shift operand). In this case, use constant or loop 4526 invariant op1 directly, without extending it to vector mode 4527 first. */ 4528 optab_op2_mode = insn_data[icode].operand[2].mode; 4529 if (!VECTOR_MODE_P (optab_op2_mode)) 4530 { 4531 if (dump_enabled_p ()) 4532 dump_printf_loc (MSG_NOTE, vect_location, 4533 "operand 1 using scalar mode.\n"); 4534 vec_oprnd1 = op1; 4535 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1); 4536 vec_oprnds1.quick_push (vec_oprnd1); 4537 if (slp_node) 4538 { 4539 /* Store vec_oprnd1 for every vector stmt to be created 4540 for SLP_NODE. We check during the analysis that all 4541 the shift arguments are the same. 4542 TODO: Allow different constants for different vector 4543 stmts generated for an SLP instance. */ 4544 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 4545 vec_oprnds1.quick_push (vec_oprnd1); 4546 } 4547 } 4548 } 4549 4550 /* vec_oprnd1 is available if operand 1 should be of a scalar-type 4551 (a special case for certain kind of vector shifts); otherwise, 4552 operand 1 should be of a vector type (the usual case). */ 4553 if (vec_oprnd1) 4554 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 4555 slp_node, -1); 4556 else 4557 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 4558 slp_node, -1); 4559 } 4560 else 4561 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 4562 4563 /* Arguments are ready. Create the new vector stmt. */ 4564 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 4565 { 4566 vop1 = vec_oprnds1[i]; 4567 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1); 4568 new_temp = make_ssa_name (vec_dest, new_stmt); 4569 gimple_assign_set_lhs (new_stmt, new_temp); 4570 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4571 if (slp_node) 4572 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 4573 } 4574 4575 if (slp_node) 4576 continue; 4577 4578 if (j == 0) 4579 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4580 else 4581 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4582 prev_stmt_info = vinfo_for_stmt (new_stmt); 4583 } 4584 4585 vec_oprnds0.release (); 4586 vec_oprnds1.release (); 4587 4588 return true; 4589} 4590 4591 4592/* Function vectorizable_operation. 4593 4594 Check if STMT performs a binary, unary or ternary operation that can 4595 be vectorized. 4596 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 4597 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 4598 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 4599 4600static bool 4601vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, 4602 gimple *vec_stmt, slp_tree slp_node) 4603{ 4604 tree vec_dest; 4605 tree scalar_dest; 4606 tree op0, op1 = NULL_TREE, op2 = NULL_TREE; 4607 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4608 tree vectype; 4609 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4610 enum tree_code code; 4611 machine_mode vec_mode; 4612 tree new_temp; 4613 int op_type; 4614 optab optab; 4615 int icode; 4616 tree def; 4617 gimple def_stmt; 4618 enum vect_def_type dt[3] 4619 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; 4620 gimple new_stmt = NULL; 4621 stmt_vec_info prev_stmt_info; 4622 int nunits_in; 4623 int nunits_out; 4624 tree vectype_out; 4625 int ncopies; 4626 int j, i; 4627 vec<tree> vec_oprnds0 = vNULL; 4628 vec<tree> vec_oprnds1 = vNULL; 4629 vec<tree> vec_oprnds2 = vNULL; 4630 tree vop0, vop1, vop2; 4631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 4632 int vf; 4633 4634 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 4635 return false; 4636 4637 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 4638 return false; 4639 4640 /* Is STMT a vectorizable binary/unary operation? */ 4641 if (!is_gimple_assign (stmt)) 4642 return false; 4643 4644 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 4645 return false; 4646 4647 code = gimple_assign_rhs_code (stmt); 4648 4649 /* For pointer addition, we should use the normal plus for 4650 the vector addition. */ 4651 if (code == POINTER_PLUS_EXPR) 4652 code = PLUS_EXPR; 4653 4654 /* Support only unary or binary operations. */ 4655 op_type = TREE_CODE_LENGTH (code); 4656 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op) 4657 { 4658 if (dump_enabled_p ()) 4659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4660 "num. args = %d (not unary/binary/ternary op).\n", 4661 op_type); 4662 return false; 4663 } 4664 4665 scalar_dest = gimple_assign_lhs (stmt); 4666 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 4667 4668 /* Most operations cannot handle bit-precision types without extra 4669 truncations. */ 4670 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 4671 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) 4672 /* Exception are bitwise binary operations. */ 4673 && code != BIT_IOR_EXPR 4674 && code != BIT_XOR_EXPR 4675 && code != BIT_AND_EXPR) 4676 { 4677 if (dump_enabled_p ()) 4678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4679 "bit-precision arithmetic not supported.\n"); 4680 return false; 4681 } 4682 4683 op0 = gimple_assign_rhs1 (stmt); 4684 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo, 4685 &def_stmt, &def, &dt[0], &vectype)) 4686 { 4687 if (dump_enabled_p ()) 4688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4689 "use not simple.\n"); 4690 return false; 4691 } 4692 /* If op0 is an external or constant def use a vector type with 4693 the same size as the output vector type. */ 4694 if (!vectype) 4695 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out); 4696 if (vec_stmt) 4697 gcc_assert (vectype); 4698 if (!vectype) 4699 { 4700 if (dump_enabled_p ()) 4701 { 4702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4703 "no vectype for scalar type "); 4704 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, 4705 TREE_TYPE (op0)); 4706 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); 4707 } 4708 4709 return false; 4710 } 4711 4712 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 4713 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 4714 if (nunits_out != nunits_in) 4715 return false; 4716 4717 if (op_type == binary_op || op_type == ternary_op) 4718 { 4719 op1 = gimple_assign_rhs2 (stmt); 4720 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt, 4721 &def, &dt[1])) 4722 { 4723 if (dump_enabled_p ()) 4724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4725 "use not simple.\n"); 4726 return false; 4727 } 4728 } 4729 if (op_type == ternary_op) 4730 { 4731 op2 = gimple_assign_rhs3 (stmt); 4732 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt, 4733 &def, &dt[2])) 4734 { 4735 if (dump_enabled_p ()) 4736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4737 "use not simple.\n"); 4738 return false; 4739 } 4740 } 4741 4742 if (loop_vinfo) 4743 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 4744 else 4745 vf = 1; 4746 4747 /* Multiple types in SLP are handled by creating the appropriate number of 4748 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 4749 case of SLP. */ 4750 if (slp_node || PURE_SLP_STMT (stmt_info)) 4751 ncopies = 1; 4752 else 4753 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 4754 4755 gcc_assert (ncopies >= 1); 4756 4757 /* Shifts are handled in vectorizable_shift (). */ 4758 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 4759 || code == RROTATE_EXPR) 4760 return false; 4761 4762 /* Supportable by target? */ 4763 4764 vec_mode = TYPE_MODE (vectype); 4765 if (code == MULT_HIGHPART_EXPR) 4766 { 4767 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype))) 4768 icode = LAST_INSN_CODE; 4769 else 4770 icode = CODE_FOR_nothing; 4771 } 4772 else 4773 { 4774 optab = optab_for_tree_code (code, vectype, optab_default); 4775 if (!optab) 4776 { 4777 if (dump_enabled_p ()) 4778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4779 "no optab.\n"); 4780 return false; 4781 } 4782 icode = (int) optab_handler (optab, vec_mode); 4783 } 4784 4785 if (icode == CODE_FOR_nothing) 4786 { 4787 if (dump_enabled_p ()) 4788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4789 "op not supported by target.\n"); 4790 /* Check only during analysis. */ 4791 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 4792 || (!vec_stmt && vf < vect_min_worthwhile_factor (code))) 4793 return false; 4794 if (dump_enabled_p ()) 4795 dump_printf_loc (MSG_NOTE, vect_location, 4796 "proceeding using word mode.\n"); 4797 } 4798 4799 /* Worthwhile without SIMD support? Check only during analysis. */ 4800 if (!VECTOR_MODE_P (vec_mode) 4801 && !vec_stmt 4802 && vf < vect_min_worthwhile_factor (code)) 4803 { 4804 if (dump_enabled_p ()) 4805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4806 "not worthwhile without SIMD support.\n"); 4807 return false; 4808 } 4809 4810 if (!vec_stmt) /* transformation not required. */ 4811 { 4812 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; 4813 if (dump_enabled_p ()) 4814 dump_printf_loc (MSG_NOTE, vect_location, 4815 "=== vectorizable_operation ===\n"); 4816 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); 4817 return true; 4818 } 4819 4820 /** Transform. **/ 4821 4822 if (dump_enabled_p ()) 4823 dump_printf_loc (MSG_NOTE, vect_location, 4824 "transform binary/unary operation.\n"); 4825 4826 /* Handle def. */ 4827 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4828 4829 /* In case the vectorization factor (VF) is bigger than the number 4830 of elements that we can fit in a vectype (nunits), we have to generate 4831 more than one vector stmt - i.e - we need to "unroll" the 4832 vector stmt by a factor VF/nunits. In doing so, we record a pointer 4833 from one copy of the vector stmt to the next, in the field 4834 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 4835 stages to find the correct vector defs to be used when vectorizing 4836 stmts that use the defs of the current stmt. The example below 4837 illustrates the vectorization process when VF=16 and nunits=4 (i.e., 4838 we need to create 4 vectorized stmts): 4839 4840 before vectorization: 4841 RELATED_STMT VEC_STMT 4842 S1: x = memref - - 4843 S2: z = x + 1 - - 4844 4845 step 1: vectorize stmt S1 (done in vectorizable_load. See more details 4846 there): 4847 RELATED_STMT VEC_STMT 4848 VS1_0: vx0 = memref0 VS1_1 - 4849 VS1_1: vx1 = memref1 VS1_2 - 4850 VS1_2: vx2 = memref2 VS1_3 - 4851 VS1_3: vx3 = memref3 - - 4852 S1: x = load - VS1_0 4853 S2: z = x + 1 - - 4854 4855 step2: vectorize stmt S2 (done here): 4856 To vectorize stmt S2 we first need to find the relevant vector 4857 def for the first operand 'x'. This is, as usual, obtained from 4858 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt 4859 that defines 'x' (S1). This way we find the stmt VS1_0, and the 4860 relevant vector def 'vx0'. Having found 'vx0' we can generate 4861 the vector stmt VS2_0, and as usual, record it in the 4862 STMT_VINFO_VEC_STMT of stmt S2. 4863 When creating the second copy (VS2_1), we obtain the relevant vector 4864 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of 4865 stmt VS1_0. This way we find the stmt VS1_1 and the relevant 4866 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a 4867 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0. 4868 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting 4869 chain of stmts and pointers: 4870 RELATED_STMT VEC_STMT 4871 VS1_0: vx0 = memref0 VS1_1 - 4872 VS1_1: vx1 = memref1 VS1_2 - 4873 VS1_2: vx2 = memref2 VS1_3 - 4874 VS1_3: vx3 = memref3 - - 4875 S1: x = load - VS1_0 4876 VS2_0: vz0 = vx0 + v1 VS2_1 - 4877 VS2_1: vz1 = vx1 + v1 VS2_2 - 4878 VS2_2: vz2 = vx2 + v1 VS2_3 - 4879 VS2_3: vz3 = vx3 + v1 - - 4880 S2: z = x + 1 - VS2_0 */ 4881 4882 prev_stmt_info = NULL; 4883 for (j = 0; j < ncopies; j++) 4884 { 4885 /* Handle uses. */ 4886 if (j == 0) 4887 { 4888 if (op_type == binary_op || op_type == ternary_op) 4889 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 4890 slp_node, -1); 4891 else 4892 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 4893 slp_node, -1); 4894 if (op_type == ternary_op) 4895 { 4896 vec_oprnds2.create (1); 4897 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2, 4898 stmt, 4899 NULL)); 4900 } 4901 } 4902 else 4903 { 4904 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 4905 if (op_type == ternary_op) 4906 { 4907 tree vec_oprnd = vec_oprnds2.pop (); 4908 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2], 4909 vec_oprnd)); 4910 } 4911 } 4912 4913 /* Arguments are ready. Create the new vector stmt. */ 4914 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 4915 { 4916 vop1 = ((op_type == binary_op || op_type == ternary_op) 4917 ? vec_oprnds1[i] : NULL_TREE); 4918 vop2 = ((op_type == ternary_op) 4919 ? vec_oprnds2[i] : NULL_TREE); 4920 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2); 4921 new_temp = make_ssa_name (vec_dest, new_stmt); 4922 gimple_assign_set_lhs (new_stmt, new_temp); 4923 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4924 if (slp_node) 4925 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 4926 } 4927 4928 if (slp_node) 4929 continue; 4930 4931 if (j == 0) 4932 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4933 else 4934 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4935 prev_stmt_info = vinfo_for_stmt (new_stmt); 4936 } 4937 4938 vec_oprnds0.release (); 4939 vec_oprnds1.release (); 4940 vec_oprnds2.release (); 4941 4942 return true; 4943} 4944 4945/* A helper function to ensure data reference DR's base alignment 4946 for STMT_INFO. */ 4947 4948static void 4949ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr) 4950{ 4951 if (!dr->aux) 4952 return; 4953 4954 if (DR_VECT_AUX (dr)->base_misaligned) 4955 { 4956 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 4957 tree base_decl = DR_VECT_AUX (dr)->base_decl; 4958 4959 if (decl_in_symtab_p (base_decl)) 4960 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype)); 4961 else 4962 { 4963 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype); 4964 DECL_USER_ALIGN (base_decl) = 1; 4965 } 4966 DR_VECT_AUX (dr)->base_misaligned = false; 4967 } 4968} 4969 4970 4971/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements 4972 reversal of the vector elements. If that is impossible to do, 4973 returns NULL. */ 4974 4975static tree 4976perm_mask_for_reverse (tree vectype) 4977{ 4978 int i, nunits; 4979 unsigned char *sel; 4980 4981 nunits = TYPE_VECTOR_SUBPARTS (vectype); 4982 sel = XALLOCAVEC (unsigned char, nunits); 4983 4984 for (i = 0; i < nunits; ++i) 4985 sel[i] = nunits - 1 - i; 4986 4987 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) 4988 return NULL_TREE; 4989 return vect_gen_perm_mask_checked (vectype, sel); 4990} 4991 4992/* Function vectorizable_store. 4993 4994 Check if STMT defines a non scalar data-ref (array/pointer/structure) that 4995 can be vectorized. 4996 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 4997 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 4998 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 4999 5000static bool 5001vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, 5002 slp_tree slp_node) 5003{ 5004 tree scalar_dest; 5005 tree data_ref; 5006 tree op; 5007 tree vec_oprnd = NULL_TREE; 5008 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5009 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; 5010 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 5011 tree elem_type; 5012 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5013 struct loop *loop = NULL; 5014 machine_mode vec_mode; 5015 tree dummy; 5016 enum dr_alignment_support alignment_support_scheme; 5017 tree def; 5018 gimple def_stmt; 5019 enum vect_def_type dt; 5020 stmt_vec_info prev_stmt_info = NULL; 5021 tree dataref_ptr = NULL_TREE; 5022 tree dataref_offset = NULL_TREE; 5023 gimple ptr_incr = NULL; 5024 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 5025 int ncopies; 5026 int j; 5027 gimple next_stmt, first_stmt = NULL; 5028 bool grouped_store = false; 5029 bool store_lanes_p = false; 5030 unsigned int group_size, i; 5031 vec<tree> dr_chain = vNULL; 5032 vec<tree> oprnds = vNULL; 5033 vec<tree> result_chain = vNULL; 5034 bool inv_p; 5035 bool negative = false; 5036 tree offset = NULL_TREE; 5037 vec<tree> vec_oprnds = vNULL; 5038 bool slp = (slp_node != NULL); 5039 unsigned int vec_num; 5040 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5041 tree aggr_type; 5042 5043 if (loop_vinfo) 5044 loop = LOOP_VINFO_LOOP (loop_vinfo); 5045 5046 /* Multiple types in SLP are handled by creating the appropriate number of 5047 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 5048 case of SLP. */ 5049 if (slp || PURE_SLP_STMT (stmt_info)) 5050 ncopies = 1; 5051 else 5052 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 5053 5054 gcc_assert (ncopies >= 1); 5055 5056 /* FORNOW. This restriction should be relaxed. */ 5057 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1) 5058 { 5059 if (dump_enabled_p ()) 5060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5061 "multiple types in nested loop.\n"); 5062 return false; 5063 } 5064 5065 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5066 return false; 5067 5068 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 5069 return false; 5070 5071 /* Is vectorizable store? */ 5072 5073 if (!is_gimple_assign (stmt)) 5074 return false; 5075 5076 scalar_dest = gimple_assign_lhs (stmt); 5077 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR 5078 && is_pattern_stmt_p (stmt_info)) 5079 scalar_dest = TREE_OPERAND (scalar_dest, 0); 5080 if (TREE_CODE (scalar_dest) != ARRAY_REF 5081 && TREE_CODE (scalar_dest) != BIT_FIELD_REF 5082 && TREE_CODE (scalar_dest) != INDIRECT_REF 5083 && TREE_CODE (scalar_dest) != COMPONENT_REF 5084 && TREE_CODE (scalar_dest) != IMAGPART_EXPR 5085 && TREE_CODE (scalar_dest) != REALPART_EXPR 5086 && TREE_CODE (scalar_dest) != MEM_REF) 5087 return false; 5088 5089 gcc_assert (gimple_assign_single_p (stmt)); 5090 op = gimple_assign_rhs1 (stmt); 5091 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt, 5092 &def, &dt)) 5093 { 5094 if (dump_enabled_p ()) 5095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5096 "use not simple.\n"); 5097 return false; 5098 } 5099 5100 elem_type = TREE_TYPE (vectype); 5101 vec_mode = TYPE_MODE (vectype); 5102 5103 /* FORNOW. In some cases can vectorize even if data-type not supported 5104 (e.g. - array initialization with 0). */ 5105 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing) 5106 return false; 5107 5108 if (!STMT_VINFO_DATA_REF (stmt_info)) 5109 return false; 5110 5111 negative = 5112 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt) 5113 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr), 5114 size_zero_node) < 0; 5115 if (negative && ncopies > 1) 5116 { 5117 if (dump_enabled_p ()) 5118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5119 "multiple types with negative step.\n"); 5120 return false; 5121 } 5122 5123 if (negative) 5124 { 5125 gcc_assert (!grouped_store); 5126 alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 5127 if (alignment_support_scheme != dr_aligned 5128 && alignment_support_scheme != dr_unaligned_supported) 5129 { 5130 if (dump_enabled_p ()) 5131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5132 "negative step but alignment required.\n"); 5133 return false; 5134 } 5135 if (dt != vect_constant_def 5136 && dt != vect_external_def 5137 && !perm_mask_for_reverse (vectype)) 5138 { 5139 if (dump_enabled_p ()) 5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5141 "negative step and reversing not supported.\n"); 5142 return false; 5143 } 5144 } 5145 5146 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 5147 { 5148 grouped_store = true; 5149 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 5150 if (!slp && !PURE_SLP_STMT (stmt_info)) 5151 { 5152 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 5153 if (vect_store_lanes_supported (vectype, group_size)) 5154 store_lanes_p = true; 5155 else if (!vect_grouped_store_supported (vectype, group_size)) 5156 return false; 5157 } 5158 5159 if (first_stmt == stmt) 5160 { 5161 /* STMT is the leader of the group. Check the operands of all the 5162 stmts of the group. */ 5163 next_stmt = GROUP_NEXT_ELEMENT (stmt_info); 5164 while (next_stmt) 5165 { 5166 gcc_assert (gimple_assign_single_p (next_stmt)); 5167 op = gimple_assign_rhs1 (next_stmt); 5168 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo, 5169 &def_stmt, &def, &dt)) 5170 { 5171 if (dump_enabled_p ()) 5172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5173 "use not simple.\n"); 5174 return false; 5175 } 5176 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 5177 } 5178 } 5179 } 5180 5181 if (!vec_stmt) /* transformation not required. */ 5182 { 5183 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; 5184 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, 5185 NULL, NULL, NULL); 5186 return true; 5187 } 5188 5189 /** Transform. **/ 5190 5191 ensure_base_align (stmt_info, dr); 5192 5193 if (grouped_store) 5194 { 5195 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 5196 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 5197 5198 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++; 5199 5200 /* FORNOW */ 5201 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt)); 5202 5203 /* We vectorize all the stmts of the interleaving group when we 5204 reach the last stmt in the group. */ 5205 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt)) 5206 < GROUP_SIZE (vinfo_for_stmt (first_stmt)) 5207 && !slp) 5208 { 5209 *vec_stmt = NULL; 5210 return true; 5211 } 5212 5213 if (slp) 5214 { 5215 grouped_store = false; 5216 /* VEC_NUM is the number of vect stmts to be created for this 5217 group. */ 5218 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 5219 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 5220 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 5221 op = gimple_assign_rhs1 (first_stmt); 5222 } 5223 else 5224 /* VEC_NUM is the number of vect stmts to be created for this 5225 group. */ 5226 vec_num = group_size; 5227 } 5228 else 5229 { 5230 first_stmt = stmt; 5231 first_dr = dr; 5232 group_size = vec_num = 1; 5233 } 5234 5235 if (dump_enabled_p ()) 5236 dump_printf_loc (MSG_NOTE, vect_location, 5237 "transform store. ncopies = %d\n", ncopies); 5238 5239 dr_chain.create (group_size); 5240 oprnds.create (group_size); 5241 5242 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); 5243 gcc_assert (alignment_support_scheme); 5244 /* Targets with store-lane instructions must not require explicit 5245 realignment. */ 5246 gcc_assert (!store_lanes_p 5247 || alignment_support_scheme == dr_aligned 5248 || alignment_support_scheme == dr_unaligned_supported); 5249 5250 if (negative) 5251 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); 5252 5253 if (store_lanes_p) 5254 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 5255 else 5256 aggr_type = vectype; 5257 5258 /* In case the vectorization factor (VF) is bigger than the number 5259 of elements that we can fit in a vectype (nunits), we have to generate 5260 more than one vector stmt - i.e - we need to "unroll" the 5261 vector stmt by a factor VF/nunits. For more details see documentation in 5262 vect_get_vec_def_for_copy_stmt. */ 5263 5264 /* In case of interleaving (non-unit grouped access): 5265 5266 S1: &base + 2 = x2 5267 S2: &base = x0 5268 S3: &base + 1 = x1 5269 S4: &base + 3 = x3 5270 5271 We create vectorized stores starting from base address (the access of the 5272 first stmt in the chain (S2 in the above example), when the last store stmt 5273 of the chain (S4) is reached: 5274 5275 VS1: &base = vx2 5276 VS2: &base + vec_size*1 = vx0 5277 VS3: &base + vec_size*2 = vx1 5278 VS4: &base + vec_size*3 = vx3 5279 5280 Then permutation statements are generated: 5281 5282 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} > 5283 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} > 5284 ... 5285 5286 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 5287 (the order of the data-refs in the output of vect_permute_store_chain 5288 corresponds to the order of scalar stmts in the interleaving chain - see 5289 the documentation of vect_permute_store_chain()). 5290 5291 In case of both multiple types and interleaving, above vector stores and 5292 permutation stmts are created for every copy. The result vector stmts are 5293 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding 5294 STMT_VINFO_RELATED_STMT for the next copies. 5295 */ 5296 5297 prev_stmt_info = NULL; 5298 for (j = 0; j < ncopies; j++) 5299 { 5300 gimple new_stmt; 5301 5302 if (j == 0) 5303 { 5304 if (slp) 5305 { 5306 /* Get vectorized arguments for SLP_NODE. */ 5307 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, 5308 NULL, slp_node, -1); 5309 5310 vec_oprnd = vec_oprnds[0]; 5311 } 5312 else 5313 { 5314 /* For interleaved stores we collect vectorized defs for all the 5315 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then 5316 used as an input to vect_permute_store_chain(), and OPRNDS as 5317 an input to vect_get_vec_def_for_stmt_copy() for the next copy. 5318 5319 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and 5320 OPRNDS are of size 1. */ 5321 next_stmt = first_stmt; 5322 for (i = 0; i < group_size; i++) 5323 { 5324 /* Since gaps are not supported for interleaved stores, 5325 GROUP_SIZE is the exact number of stmts in the chain. 5326 Therefore, NEXT_STMT can't be NULL_TREE. In case that 5327 there is no interleaving, GROUP_SIZE is 1, and only one 5328 iteration of the loop will be executed. */ 5329 gcc_assert (next_stmt 5330 && gimple_assign_single_p (next_stmt)); 5331 op = gimple_assign_rhs1 (next_stmt); 5332 5333 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, 5334 NULL); 5335 dr_chain.quick_push (vec_oprnd); 5336 oprnds.quick_push (vec_oprnd); 5337 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 5338 } 5339 } 5340 5341 /* We should have catched mismatched types earlier. */ 5342 gcc_assert (useless_type_conversion_p (vectype, 5343 TREE_TYPE (vec_oprnd))); 5344 bool simd_lane_access_p 5345 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info); 5346 if (simd_lane_access_p 5347 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR 5348 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0)) 5349 && integer_zerop (DR_OFFSET (first_dr)) 5350 && integer_zerop (DR_INIT (first_dr)) 5351 && alias_sets_conflict_p (get_alias_set (aggr_type), 5352 get_alias_set (DR_REF (first_dr)))) 5353 { 5354 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr)); 5355 dataref_offset = build_int_cst (reference_alias_ptr_type 5356 (DR_REF (first_dr)), 0); 5357 inv_p = false; 5358 } 5359 else 5360 dataref_ptr 5361 = vect_create_data_ref_ptr (first_stmt, aggr_type, 5362 simd_lane_access_p ? loop : NULL, 5363 offset, &dummy, gsi, &ptr_incr, 5364 simd_lane_access_p, &inv_p); 5365 gcc_assert (bb_vinfo || !inv_p); 5366 } 5367 else 5368 { 5369 /* For interleaved stores we created vectorized defs for all the 5370 defs stored in OPRNDS in the previous iteration (previous copy). 5371 DR_CHAIN is then used as an input to vect_permute_store_chain(), 5372 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the 5373 next copy. 5374 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and 5375 OPRNDS are of size 1. */ 5376 for (i = 0; i < group_size; i++) 5377 { 5378 op = oprnds[i]; 5379 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt, 5380 &def, &dt); 5381 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op); 5382 dr_chain[i] = vec_oprnd; 5383 oprnds[i] = vec_oprnd; 5384 } 5385 if (dataref_offset) 5386 dataref_offset 5387 = int_const_binop (PLUS_EXPR, dataref_offset, 5388 TYPE_SIZE_UNIT (aggr_type)); 5389 else 5390 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 5391 TYPE_SIZE_UNIT (aggr_type)); 5392 } 5393 5394 if (store_lanes_p) 5395 { 5396 tree vec_array; 5397 5398 /* Combine all the vectors into an array. */ 5399 vec_array = create_vector_array (vectype, vec_num); 5400 for (i = 0; i < vec_num; i++) 5401 { 5402 vec_oprnd = dr_chain[i]; 5403 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i); 5404 } 5405 5406 /* Emit: 5407 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */ 5408 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); 5409 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array); 5410 gimple_call_set_lhs (new_stmt, data_ref); 5411 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5412 } 5413 else 5414 { 5415 new_stmt = NULL; 5416 if (grouped_store) 5417 { 5418 if (j == 0) 5419 result_chain.create (group_size); 5420 /* Permute. */ 5421 vect_permute_store_chain (dr_chain, group_size, stmt, gsi, 5422 &result_chain); 5423 } 5424 5425 next_stmt = first_stmt; 5426 for (i = 0; i < vec_num; i++) 5427 { 5428 unsigned align, misalign; 5429 5430 if (i > 0) 5431 /* Bump the vector pointer. */ 5432 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 5433 stmt, NULL_TREE); 5434 5435 if (slp) 5436 vec_oprnd = vec_oprnds[i]; 5437 else if (grouped_store) 5438 /* For grouped stores vectorized defs are interleaved in 5439 vect_permute_store_chain(). */ 5440 vec_oprnd = result_chain[i]; 5441 5442 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr, 5443 dataref_offset 5444 ? dataref_offset 5445 : build_int_cst (reference_alias_ptr_type 5446 (DR_REF (first_dr)), 0)); 5447 align = TYPE_ALIGN_UNIT (vectype); 5448 if (aligned_access_p (first_dr)) 5449 misalign = 0; 5450 else if (DR_MISALIGNMENT (first_dr) == -1) 5451 { 5452 if (DR_VECT_AUX (first_dr)->base_element_aligned) 5453 align = TYPE_ALIGN_UNIT (elem_type); 5454 else 5455 align = get_object_alignment (DR_REF (first_dr)) 5456 / BITS_PER_UNIT; 5457 misalign = 0; 5458 TREE_TYPE (data_ref) 5459 = build_aligned_type (TREE_TYPE (data_ref), 5460 align * BITS_PER_UNIT); 5461 } 5462 else 5463 { 5464 TREE_TYPE (data_ref) 5465 = build_aligned_type (TREE_TYPE (data_ref), 5466 TYPE_ALIGN (elem_type)); 5467 misalign = DR_MISALIGNMENT (first_dr); 5468 } 5469 if (dataref_offset == NULL_TREE) 5470 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, 5471 misalign); 5472 5473 if (negative 5474 && dt != vect_constant_def 5475 && dt != vect_external_def) 5476 { 5477 tree perm_mask = perm_mask_for_reverse (vectype); 5478 tree perm_dest 5479 = vect_create_destination_var (gimple_assign_rhs1 (stmt), 5480 vectype); 5481 tree new_temp = make_ssa_name (perm_dest); 5482 5483 /* Generate the permute statement. */ 5484 gimple perm_stmt 5485 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd, 5486 vec_oprnd, perm_mask); 5487 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 5488 5489 perm_stmt = SSA_NAME_DEF_STMT (new_temp); 5490 vec_oprnd = new_temp; 5491 } 5492 5493 /* Arguments are ready. Create the new vector stmt. */ 5494 new_stmt = gimple_build_assign (data_ref, vec_oprnd); 5495 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5496 5497 if (slp) 5498 continue; 5499 5500 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 5501 if (!next_stmt) 5502 break; 5503 } 5504 } 5505 if (!slp) 5506 { 5507 if (j == 0) 5508 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 5509 else 5510 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 5511 prev_stmt_info = vinfo_for_stmt (new_stmt); 5512 } 5513 } 5514 5515 dr_chain.release (); 5516 oprnds.release (); 5517 result_chain.release (); 5518 vec_oprnds.release (); 5519 5520 return true; 5521} 5522 5523/* Given a vector type VECTYPE, turns permutation SEL into the equivalent 5524 VECTOR_CST mask. No checks are made that the target platform supports the 5525 mask, so callers may wish to test can_vec_perm_p separately, or use 5526 vect_gen_perm_mask_checked. */ 5527 5528tree 5529vect_gen_perm_mask_any (tree vectype, const unsigned char *sel) 5530{ 5531 tree mask_elt_type, mask_type, mask_vec, *mask_elts; 5532 int i, nunits; 5533 5534 nunits = TYPE_VECTOR_SUBPARTS (vectype); 5535 5536 mask_elt_type = lang_hooks.types.type_for_mode 5537 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1); 5538 mask_type = get_vectype_for_scalar_type (mask_elt_type); 5539 5540 mask_elts = XALLOCAVEC (tree, nunits); 5541 for (i = nunits - 1; i >= 0; i--) 5542 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]); 5543 mask_vec = build_vector (mask_type, mask_elts); 5544 5545 return mask_vec; 5546} 5547 5548/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p, 5549 i.e. that the target supports the pattern _for arbitrary input vectors_. */ 5550 5551tree 5552vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel) 5553{ 5554 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel)); 5555 return vect_gen_perm_mask_any (vectype, sel); 5556} 5557 5558/* Given a vector variable X and Y, that was generated for the scalar 5559 STMT, generate instructions to permute the vector elements of X and Y 5560 using permutation mask MASK_VEC, insert them at *GSI and return the 5561 permuted vector variable. */ 5562 5563static tree 5564permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt, 5565 gimple_stmt_iterator *gsi) 5566{ 5567 tree vectype = TREE_TYPE (x); 5568 tree perm_dest, data_ref; 5569 gimple perm_stmt; 5570 5571 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype); 5572 data_ref = make_ssa_name (perm_dest); 5573 5574 /* Generate the permute statement. */ 5575 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec); 5576 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 5577 5578 return data_ref; 5579} 5580 5581/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP, 5582 inserting them on the loops preheader edge. Returns true if we 5583 were successful in doing so (and thus STMT can be moved then), 5584 otherwise returns false. */ 5585 5586static bool 5587hoist_defs_of_uses (gimple stmt, struct loop *loop) 5588{ 5589 ssa_op_iter i; 5590 tree op; 5591 bool any = false; 5592 5593 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE) 5594 { 5595 gimple def_stmt = SSA_NAME_DEF_STMT (op); 5596 if (!gimple_nop_p (def_stmt) 5597 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) 5598 { 5599 /* Make sure we don't need to recurse. While we could do 5600 so in simple cases when there are more complex use webs 5601 we don't have an easy way to preserve stmt order to fulfil 5602 dependencies within them. */ 5603 tree op2; 5604 ssa_op_iter i2; 5605 if (gimple_code (def_stmt) == GIMPLE_PHI) 5606 return false; 5607 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE) 5608 { 5609 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2); 5610 if (!gimple_nop_p (def_stmt2) 5611 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2))) 5612 return false; 5613 } 5614 any = true; 5615 } 5616 } 5617 5618 if (!any) 5619 return true; 5620 5621 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE) 5622 { 5623 gimple def_stmt = SSA_NAME_DEF_STMT (op); 5624 if (!gimple_nop_p (def_stmt) 5625 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) 5626 { 5627 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 5628 gsi_remove (&gsi, false); 5629 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt); 5630 } 5631 } 5632 5633 return true; 5634} 5635 5636/* vectorizable_load. 5637 5638 Check if STMT reads a non scalar data-ref (array/pointer/structure) that 5639 can be vectorized. 5640 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 5641 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 5642 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 5643 5644static bool 5645vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, 5646 slp_tree slp_node, slp_instance slp_node_instance) 5647{ 5648 tree scalar_dest; 5649 tree vec_dest = NULL; 5650 tree data_ref = NULL; 5651 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5652 stmt_vec_info prev_stmt_info; 5653 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5654 struct loop *loop = NULL; 5655 struct loop *containing_loop = (gimple_bb (stmt))->loop_father; 5656 bool nested_in_vect_loop = false; 5657 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; 5658 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 5659 tree elem_type; 5660 tree new_temp; 5661 machine_mode mode; 5662 gimple new_stmt = NULL; 5663 tree dummy; 5664 enum dr_alignment_support alignment_support_scheme; 5665 tree dataref_ptr = NULL_TREE; 5666 tree dataref_offset = NULL_TREE; 5667 gimple ptr_incr = NULL; 5668 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 5669 int ncopies; 5670 int i, j, group_size, group_gap; 5671 tree msq = NULL_TREE, lsq; 5672 tree offset = NULL_TREE; 5673 tree byte_offset = NULL_TREE; 5674 tree realignment_token = NULL_TREE; 5675 gphi *phi = NULL; 5676 vec<tree> dr_chain = vNULL; 5677 bool grouped_load = false; 5678 bool load_lanes_p = false; 5679 gimple first_stmt; 5680 bool inv_p; 5681 bool negative = false; 5682 bool compute_in_loop = false; 5683 struct loop *at_loop; 5684 int vec_num; 5685 bool slp = (slp_node != NULL); 5686 bool slp_perm = false; 5687 enum tree_code code; 5688 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5689 int vf; 5690 tree aggr_type; 5691 tree gather_base = NULL_TREE, gather_off = NULL_TREE; 5692 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE; 5693 int gather_scale = 1; 5694 enum vect_def_type gather_dt = vect_unknown_def_type; 5695 5696 if (loop_vinfo) 5697 { 5698 loop = LOOP_VINFO_LOOP (loop_vinfo); 5699 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); 5700 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 5701 } 5702 else 5703 vf = 1; 5704 5705 /* Multiple types in SLP are handled by creating the appropriate number of 5706 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 5707 case of SLP. */ 5708 if (slp || PURE_SLP_STMT (stmt_info)) 5709 ncopies = 1; 5710 else 5711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 5712 5713 gcc_assert (ncopies >= 1); 5714 5715 /* FORNOW. This restriction should be relaxed. */ 5716 if (nested_in_vect_loop && ncopies > 1) 5717 { 5718 if (dump_enabled_p ()) 5719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5720 "multiple types in nested loop.\n"); 5721 return false; 5722 } 5723 5724 /* Invalidate assumptions made by dependence analysis when vectorization 5725 on the unrolled body effectively re-orders stmts. */ 5726 if (ncopies > 1 5727 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 5728 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo) 5729 > STMT_VINFO_MIN_NEG_DIST (stmt_info))) 5730 { 5731 if (dump_enabled_p ()) 5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5733 "cannot perform implicit CSE when unrolling " 5734 "with negative dependence distance\n"); 5735 return false; 5736 } 5737 5738 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5739 return false; 5740 5741 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 5742 return false; 5743 5744 /* Is vectorizable load? */ 5745 if (!is_gimple_assign (stmt)) 5746 return false; 5747 5748 scalar_dest = gimple_assign_lhs (stmt); 5749 if (TREE_CODE (scalar_dest) != SSA_NAME) 5750 return false; 5751 5752 code = gimple_assign_rhs_code (stmt); 5753 if (code != ARRAY_REF 5754 && code != BIT_FIELD_REF 5755 && code != INDIRECT_REF 5756 && code != COMPONENT_REF 5757 && code != IMAGPART_EXPR 5758 && code != REALPART_EXPR 5759 && code != MEM_REF 5760 && TREE_CODE_CLASS (code) != tcc_declaration) 5761 return false; 5762 5763 if (!STMT_VINFO_DATA_REF (stmt_info)) 5764 return false; 5765 5766 elem_type = TREE_TYPE (vectype); 5767 mode = TYPE_MODE (vectype); 5768 5769 /* FORNOW. In some cases can vectorize even if data-type not supported 5770 (e.g. - data copies). */ 5771 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) 5772 { 5773 if (dump_enabled_p ()) 5774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5775 "Aligned load, but unsupported type.\n"); 5776 return false; 5777 } 5778 5779 /* Check if the load is a part of an interleaving chain. */ 5780 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 5781 { 5782 grouped_load = true; 5783 /* FORNOW */ 5784 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info)); 5785 5786 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 5787 5788 /* If this is single-element interleaving with an element distance 5789 that leaves unused vector loads around punt - we at least create 5790 very sub-optimal code in that case (and blow up memory, 5791 see PR65518). */ 5792 if (first_stmt == stmt 5793 && !GROUP_NEXT_ELEMENT (stmt_info) 5794 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype)) 5795 { 5796 if (dump_enabled_p ()) 5797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5798 "single-element interleaving not supported " 5799 "for not adjacent vector loads\n"); 5800 return false; 5801 } 5802 5803 if (!slp && !PURE_SLP_STMT (stmt_info)) 5804 { 5805 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 5806 if (vect_load_lanes_supported (vectype, group_size)) 5807 load_lanes_p = true; 5808 else if (!vect_grouped_load_supported (vectype, group_size)) 5809 return false; 5810 } 5811 5812 /* Invalidate assumptions made by dependence analysis when vectorization 5813 on the unrolled body effectively re-orders stmts. */ 5814 if (!PURE_SLP_STMT (stmt_info) 5815 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 5816 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo) 5817 > STMT_VINFO_MIN_NEG_DIST (stmt_info))) 5818 { 5819 if (dump_enabled_p ()) 5820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5821 "cannot perform implicit CSE when performing " 5822 "group loads with negative dependence distance\n"); 5823 return false; 5824 } 5825 5826 /* Similarly when the stmt is a load that is both part of a SLP 5827 instance and a loop vectorized stmt via the same-dr mechanism 5828 we have to give up. */ 5829 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info) 5830 && (STMT_SLP_TYPE (stmt_info) 5831 != STMT_SLP_TYPE (vinfo_for_stmt 5832 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info))))) 5833 { 5834 if (dump_enabled_p ()) 5835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5836 "conflicting SLP types for CSEd load\n"); 5837 return false; 5838 } 5839 } 5840 5841 5842 if (STMT_VINFO_GATHER_P (stmt_info)) 5843 { 5844 gimple def_stmt; 5845 tree def; 5846 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base, 5847 &gather_off, &gather_scale); 5848 gcc_assert (gather_decl); 5849 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo, 5850 &def_stmt, &def, &gather_dt, 5851 &gather_off_vectype)) 5852 { 5853 if (dump_enabled_p ()) 5854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5855 "gather index use not simple.\n"); 5856 return false; 5857 } 5858 } 5859 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) 5860 ; 5861 else 5862 { 5863 negative = tree_int_cst_compare (nested_in_vect_loop 5864 ? STMT_VINFO_DR_STEP (stmt_info) 5865 : DR_STEP (dr), 5866 size_zero_node) < 0; 5867 if (negative && ncopies > 1) 5868 { 5869 if (dump_enabled_p ()) 5870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5871 "multiple types with negative step.\n"); 5872 return false; 5873 } 5874 5875 if (negative) 5876 { 5877 if (grouped_load) 5878 { 5879 if (dump_enabled_p ()) 5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5881 "negative step for group load not supported" 5882 "\n"); 5883 return false; 5884 } 5885 alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 5886 if (alignment_support_scheme != dr_aligned 5887 && alignment_support_scheme != dr_unaligned_supported) 5888 { 5889 if (dump_enabled_p ()) 5890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5891 "negative step but alignment required.\n"); 5892 return false; 5893 } 5894 if (!perm_mask_for_reverse (vectype)) 5895 { 5896 if (dump_enabled_p ()) 5897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5898 "negative step and reversing not supported." 5899 "\n"); 5900 return false; 5901 } 5902 } 5903 } 5904 5905 if (!vec_stmt) /* transformation not required. */ 5906 { 5907 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 5908 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL); 5909 return true; 5910 } 5911 5912 if (dump_enabled_p ()) 5913 dump_printf_loc (MSG_NOTE, vect_location, 5914 "transform load. ncopies = %d\n", ncopies); 5915 5916 /** Transform. **/ 5917 5918 ensure_base_align (stmt_info, dr); 5919 5920 if (STMT_VINFO_GATHER_P (stmt_info)) 5921 { 5922 tree vec_oprnd0 = NULL_TREE, op; 5923 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl)); 5924 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; 5925 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE; 5926 edge pe = loop_preheader_edge (loop); 5927 gimple_seq seq; 5928 basic_block new_bb; 5929 enum { NARROW, NONE, WIDEN } modifier; 5930 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype); 5931 5932 if (nunits == gather_off_nunits) 5933 modifier = NONE; 5934 else if (nunits == gather_off_nunits / 2) 5935 { 5936 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); 5937 modifier = WIDEN; 5938 5939 for (i = 0; i < gather_off_nunits; ++i) 5940 sel[i] = i | nunits; 5941 5942 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel); 5943 } 5944 else if (nunits == gather_off_nunits * 2) 5945 { 5946 unsigned char *sel = XALLOCAVEC (unsigned char, nunits); 5947 modifier = NARROW; 5948 5949 for (i = 0; i < nunits; ++i) 5950 sel[i] = i < gather_off_nunits 5951 ? i : i + nunits - gather_off_nunits; 5952 5953 perm_mask = vect_gen_perm_mask_checked (vectype, sel); 5954 ncopies *= 2; 5955 } 5956 else 5957 gcc_unreachable (); 5958 5959 rettype = TREE_TYPE (TREE_TYPE (gather_decl)); 5960 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 5961 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 5962 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 5963 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 5964 scaletype = TREE_VALUE (arglist); 5965 gcc_checking_assert (types_compatible_p (srctype, rettype)); 5966 5967 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5968 5969 ptr = fold_convert (ptrtype, gather_base); 5970 if (!is_gimple_min_invariant (ptr)) 5971 { 5972 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); 5973 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 5974 gcc_assert (!new_bb); 5975 } 5976 5977 /* Currently we support only unconditional gather loads, 5978 so mask should be all ones. */ 5979 if (TREE_CODE (masktype) == INTEGER_TYPE) 5980 mask = build_int_cst (masktype, -1); 5981 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) 5982 { 5983 mask = build_int_cst (TREE_TYPE (masktype), -1); 5984 mask = build_vector_from_val (masktype, mask); 5985 mask = vect_init_vector (stmt, mask, masktype, NULL); 5986 } 5987 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype))) 5988 { 5989 REAL_VALUE_TYPE r; 5990 long tmp[6]; 5991 for (j = 0; j < 6; ++j) 5992 tmp[j] = -1; 5993 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype))); 5994 mask = build_real (TREE_TYPE (masktype), r); 5995 mask = build_vector_from_val (masktype, mask); 5996 mask = vect_init_vector (stmt, mask, masktype, NULL); 5997 } 5998 else 5999 gcc_unreachable (); 6000 6001 scale = build_int_cst (scaletype, gather_scale); 6002 6003 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE) 6004 merge = build_int_cst (TREE_TYPE (rettype), 0); 6005 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype))) 6006 { 6007 REAL_VALUE_TYPE r; 6008 long tmp[6]; 6009 for (j = 0; j < 6; ++j) 6010 tmp[j] = 0; 6011 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype))); 6012 merge = build_real (TREE_TYPE (rettype), r); 6013 } 6014 else 6015 gcc_unreachable (); 6016 merge = build_vector_from_val (rettype, merge); 6017 merge = vect_init_vector (stmt, merge, rettype, NULL); 6018 6019 prev_stmt_info = NULL; 6020 for (j = 0; j < ncopies; ++j) 6021 { 6022 if (modifier == WIDEN && (j & 1)) 6023 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, 6024 perm_mask, stmt, gsi); 6025 else if (j == 0) 6026 op = vec_oprnd0 6027 = vect_get_vec_def_for_operand (gather_off, stmt, NULL); 6028 else 6029 op = vec_oprnd0 6030 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0); 6031 6032 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) 6033 { 6034 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)) 6035 == TYPE_VECTOR_SUBPARTS (idxtype)); 6036 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL); 6037 var = make_ssa_name (var); 6038 op = build1 (VIEW_CONVERT_EXPR, idxtype, op); 6039 new_stmt 6040 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 6041 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6042 op = var; 6043 } 6044 6045 new_stmt 6046 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale); 6047 6048 if (!useless_type_conversion_p (vectype, rettype)) 6049 { 6050 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype) 6051 == TYPE_VECTOR_SUBPARTS (rettype)); 6052 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL); 6053 op = make_ssa_name (var, new_stmt); 6054 gimple_call_set_lhs (new_stmt, op); 6055 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6056 var = make_ssa_name (vec_dest); 6057 op = build1 (VIEW_CONVERT_EXPR, vectype, op); 6058 new_stmt 6059 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 6060 } 6061 else 6062 { 6063 var = make_ssa_name (vec_dest, new_stmt); 6064 gimple_call_set_lhs (new_stmt, var); 6065 } 6066 6067 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6068 6069 if (modifier == NARROW) 6070 { 6071 if ((j & 1) == 0) 6072 { 6073 prev_res = var; 6074 continue; 6075 } 6076 var = permute_vec_elements (prev_res, var, 6077 perm_mask, stmt, gsi); 6078 new_stmt = SSA_NAME_DEF_STMT (var); 6079 } 6080 6081 if (prev_stmt_info == NULL) 6082 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 6083 else 6084 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 6085 prev_stmt_info = vinfo_for_stmt (new_stmt); 6086 } 6087 return true; 6088 } 6089 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) 6090 { 6091 gimple_stmt_iterator incr_gsi; 6092 bool insert_after; 6093 gimple incr; 6094 tree offvar; 6095 tree ivstep; 6096 tree running_off; 6097 vec<constructor_elt, va_gc> *v = NULL; 6098 gimple_seq stmts = NULL; 6099 tree stride_base, stride_step, alias_off; 6100 6101 gcc_assert (!nested_in_vect_loop); 6102 6103 stride_base 6104 = fold_build_pointer_plus 6105 (unshare_expr (DR_BASE_ADDRESS (dr)), 6106 size_binop (PLUS_EXPR, 6107 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))), 6108 convert_to_ptrofftype (DR_INIT (dr)))); 6109 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr))); 6110 6111 /* For a load with loop-invariant (but other than power-of-2) 6112 stride (i.e. not a grouped access) like so: 6113 6114 for (i = 0; i < n; i += stride) 6115 ... = array[i]; 6116 6117 we generate a new induction variable and new accesses to 6118 form a new vector (or vectors, depending on ncopies): 6119 6120 for (j = 0; ; j += VF*stride) 6121 tmp1 = array[j]; 6122 tmp2 = array[j + stride]; 6123 ... 6124 vectemp = {tmp1, tmp2, ...} 6125 */ 6126 6127 ivstep = stride_step; 6128 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, 6129 build_int_cst (TREE_TYPE (ivstep), vf)); 6130 6131 standard_iv_increment_position (loop, &incr_gsi, &insert_after); 6132 6133 create_iv (stride_base, ivstep, NULL, 6134 loop, &incr_gsi, insert_after, 6135 &offvar, NULL); 6136 incr = gsi_stmt (incr_gsi); 6137 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); 6138 6139 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE); 6140 if (stmts) 6141 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 6142 6143 prev_stmt_info = NULL; 6144 running_off = offvar; 6145 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0); 6146 for (j = 0; j < ncopies; j++) 6147 { 6148 tree vec_inv; 6149 6150 vec_alloc (v, nunits); 6151 for (i = 0; i < nunits; i++) 6152 { 6153 tree newref, newoff; 6154 gimple incr; 6155 newref = build2 (MEM_REF, TREE_TYPE (vectype), 6156 running_off, alias_off); 6157 6158 newref = force_gimple_operand_gsi (gsi, newref, true, 6159 NULL_TREE, true, 6160 GSI_SAME_STMT); 6161 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref); 6162 newoff = copy_ssa_name (running_off); 6163 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, 6164 running_off, stride_step); 6165 vect_finish_stmt_generation (stmt, incr, gsi); 6166 6167 running_off = newoff; 6168 } 6169 6170 vec_inv = build_constructor (vectype, v); 6171 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); 6172 new_stmt = SSA_NAME_DEF_STMT (new_temp); 6173 6174 if (j == 0) 6175 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 6176 else 6177 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 6178 prev_stmt_info = vinfo_for_stmt (new_stmt); 6179 } 6180 return true; 6181 } 6182 6183 if (grouped_load) 6184 { 6185 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 6186 if (slp 6187 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists () 6188 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0]) 6189 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 6190 6191 /* Check if the chain of loads is already vectorized. */ 6192 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)) 6193 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS. 6194 ??? But we can only do so if there is exactly one 6195 as we have no way to get at the rest. Leave the CSE 6196 opportunity alone. 6197 ??? With the group load eventually participating 6198 in multiple different permutations (having multiple 6199 slp nodes which refer to the same group) the CSE 6200 is even wrong code. See PR56270. */ 6201 && !slp) 6202 { 6203 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 6204 return true; 6205 } 6206 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 6207 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 6208 6209 /* VEC_NUM is the number of vect stmts to be created for this group. */ 6210 if (slp) 6211 { 6212 grouped_load = false; 6213 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 6214 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) 6215 slp_perm = true; 6216 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt)); 6217 } 6218 else 6219 { 6220 vec_num = group_size; 6221 group_gap = 0; 6222 } 6223 } 6224 else 6225 { 6226 first_stmt = stmt; 6227 first_dr = dr; 6228 group_size = vec_num = 1; 6229 group_gap = 0; 6230 } 6231 6232 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); 6233 gcc_assert (alignment_support_scheme); 6234 /* Targets with load-lane instructions must not require explicit 6235 realignment. */ 6236 gcc_assert (!load_lanes_p 6237 || alignment_support_scheme == dr_aligned 6238 || alignment_support_scheme == dr_unaligned_supported); 6239 6240 /* In case the vectorization factor (VF) is bigger than the number 6241 of elements that we can fit in a vectype (nunits), we have to generate 6242 more than one vector stmt - i.e - we need to "unroll" the 6243 vector stmt by a factor VF/nunits. In doing so, we record a pointer 6244 from one copy of the vector stmt to the next, in the field 6245 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 6246 stages to find the correct vector defs to be used when vectorizing 6247 stmts that use the defs of the current stmt. The example below 6248 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we 6249 need to create 4 vectorized stmts): 6250 6251 before vectorization: 6252 RELATED_STMT VEC_STMT 6253 S1: x = memref - - 6254 S2: z = x + 1 - - 6255 6256 step 1: vectorize stmt S1: 6257 We first create the vector stmt VS1_0, and, as usual, record a 6258 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1. 6259 Next, we create the vector stmt VS1_1, and record a pointer to 6260 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0. 6261 Similarly, for VS1_2 and VS1_3. This is the resulting chain of 6262 stmts and pointers: 6263 RELATED_STMT VEC_STMT 6264 VS1_0: vx0 = memref0 VS1_1 - 6265 VS1_1: vx1 = memref1 VS1_2 - 6266 VS1_2: vx2 = memref2 VS1_3 - 6267 VS1_3: vx3 = memref3 - - 6268 S1: x = load - VS1_0 6269 S2: z = x + 1 - - 6270 6271 See in documentation in vect_get_vec_def_for_stmt_copy for how the 6272 information we recorded in RELATED_STMT field is used to vectorize 6273 stmt S2. */ 6274 6275 /* In case of interleaving (non-unit grouped access): 6276 6277 S1: x2 = &base + 2 6278 S2: x0 = &base 6279 S3: x1 = &base + 1 6280 S4: x3 = &base + 3 6281 6282 Vectorized loads are created in the order of memory accesses 6283 starting from the access of the first stmt of the chain: 6284 6285 VS1: vx0 = &base 6286 VS2: vx1 = &base + vec_size*1 6287 VS3: vx3 = &base + vec_size*2 6288 VS4: vx4 = &base + vec_size*3 6289 6290 Then permutation statements are generated: 6291 6292 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } > 6293 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } > 6294 ... 6295 6296 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 6297 (the order of the data-refs in the output of vect_permute_load_chain 6298 corresponds to the order of scalar stmts in the interleaving chain - see 6299 the documentation of vect_permute_load_chain()). 6300 The generation of permutation stmts and recording them in 6301 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load(). 6302 6303 In case of both multiple types and interleaving, the vector loads and 6304 permutation stmts above are created for every copy. The result vector 6305 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the 6306 corresponding STMT_VINFO_RELATED_STMT for the next copies. */ 6307 6308 /* If the data reference is aligned (dr_aligned) or potentially unaligned 6309 on a target that supports unaligned accesses (dr_unaligned_supported) 6310 we generate the following code: 6311 p = initial_addr; 6312 indx = 0; 6313 loop { 6314 p = p + indx * vectype_size; 6315 vec_dest = *(p); 6316 indx = indx + 1; 6317 } 6318 6319 Otherwise, the data reference is potentially unaligned on a target that 6320 does not support unaligned accesses (dr_explicit_realign_optimized) - 6321 then generate the following code, in which the data in each iteration is 6322 obtained by two vector loads, one from the previous iteration, and one 6323 from the current iteration: 6324 p1 = initial_addr; 6325 msq_init = *(floor(p1)) 6326 p2 = initial_addr + VS - 1; 6327 realignment_token = call target_builtin; 6328 indx = 0; 6329 loop { 6330 p2 = p2 + indx * vectype_size 6331 lsq = *(floor(p2)) 6332 vec_dest = realign_load (msq, lsq, realignment_token) 6333 indx = indx + 1; 6334 msq = lsq; 6335 } */ 6336 6337 /* If the misalignment remains the same throughout the execution of the 6338 loop, we can create the init_addr and permutation mask at the loop 6339 preheader. Otherwise, it needs to be created inside the loop. 6340 This can only occur when vectorizing memory accesses in the inner-loop 6341 nested within an outer-loop that is being vectorized. */ 6342 6343 if (nested_in_vect_loop 6344 && (TREE_INT_CST_LOW (DR_STEP (dr)) 6345 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)) 6346 { 6347 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized); 6348 compute_in_loop = true; 6349 } 6350 6351 if ((alignment_support_scheme == dr_explicit_realign_optimized 6352 || alignment_support_scheme == dr_explicit_realign) 6353 && !compute_in_loop) 6354 { 6355 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token, 6356 alignment_support_scheme, NULL_TREE, 6357 &at_loop); 6358 if (alignment_support_scheme == dr_explicit_realign_optimized) 6359 { 6360 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq)); 6361 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype), 6362 size_one_node); 6363 } 6364 } 6365 else 6366 at_loop = loop; 6367 6368 if (negative) 6369 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); 6370 6371 if (load_lanes_p) 6372 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 6373 else 6374 aggr_type = vectype; 6375 6376 prev_stmt_info = NULL; 6377 for (j = 0; j < ncopies; j++) 6378 { 6379 /* 1. Create the vector or array pointer update chain. */ 6380 if (j == 0) 6381 { 6382 bool simd_lane_access_p 6383 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info); 6384 if (simd_lane_access_p 6385 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR 6386 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0)) 6387 && integer_zerop (DR_OFFSET (first_dr)) 6388 && integer_zerop (DR_INIT (first_dr)) 6389 && alias_sets_conflict_p (get_alias_set (aggr_type), 6390 get_alias_set (DR_REF (first_dr))) 6391 && (alignment_support_scheme == dr_aligned 6392 || alignment_support_scheme == dr_unaligned_supported)) 6393 { 6394 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr)); 6395 dataref_offset = build_int_cst (reference_alias_ptr_type 6396 (DR_REF (first_dr)), 0); 6397 inv_p = false; 6398 } 6399 else 6400 dataref_ptr 6401 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop, 6402 offset, &dummy, gsi, &ptr_incr, 6403 simd_lane_access_p, &inv_p, 6404 byte_offset); 6405 } 6406 else if (dataref_offset) 6407 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, 6408 TYPE_SIZE_UNIT (aggr_type)); 6409 else 6410 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 6411 TYPE_SIZE_UNIT (aggr_type)); 6412 6413 if (grouped_load || slp_perm) 6414 dr_chain.create (vec_num); 6415 6416 if (load_lanes_p) 6417 { 6418 tree vec_array; 6419 6420 vec_array = create_vector_array (vectype, vec_num); 6421 6422 /* Emit: 6423 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */ 6424 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); 6425 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref); 6426 gimple_call_set_lhs (new_stmt, vec_array); 6427 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6428 6429 /* Extract each vector into an SSA_NAME. */ 6430 for (i = 0; i < vec_num; i++) 6431 { 6432 new_temp = read_vector_array (stmt, gsi, scalar_dest, 6433 vec_array, i); 6434 dr_chain.quick_push (new_temp); 6435 } 6436 6437 /* Record the mapping between SSA_NAMEs and statements. */ 6438 vect_record_grouped_load_vectors (stmt, dr_chain); 6439 } 6440 else 6441 { 6442 for (i = 0; i < vec_num; i++) 6443 { 6444 if (i > 0) 6445 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 6446 stmt, NULL_TREE); 6447 6448 /* 2. Create the vector-load in the loop. */ 6449 switch (alignment_support_scheme) 6450 { 6451 case dr_aligned: 6452 case dr_unaligned_supported: 6453 { 6454 unsigned int align, misalign; 6455 6456 data_ref 6457 = build2 (MEM_REF, vectype, dataref_ptr, 6458 dataref_offset 6459 ? dataref_offset 6460 : build_int_cst (reference_alias_ptr_type 6461 (DR_REF (first_dr)), 0)); 6462 align = TYPE_ALIGN_UNIT (vectype); 6463 if (alignment_support_scheme == dr_aligned) 6464 { 6465 gcc_assert (aligned_access_p (first_dr)); 6466 misalign = 0; 6467 } 6468 else if (DR_MISALIGNMENT (first_dr) == -1) 6469 { 6470 if (DR_VECT_AUX (first_dr)->base_element_aligned) 6471 align = TYPE_ALIGN_UNIT (elem_type); 6472 else 6473 align = (get_object_alignment (DR_REF (first_dr)) 6474 / BITS_PER_UNIT); 6475 misalign = 0; 6476 TREE_TYPE (data_ref) 6477 = build_aligned_type (TREE_TYPE (data_ref), 6478 align * BITS_PER_UNIT); 6479 } 6480 else 6481 { 6482 TREE_TYPE (data_ref) 6483 = build_aligned_type (TREE_TYPE (data_ref), 6484 TYPE_ALIGN (elem_type)); 6485 misalign = DR_MISALIGNMENT (first_dr); 6486 } 6487 if (dataref_offset == NULL_TREE) 6488 set_ptr_info_alignment (get_ptr_info (dataref_ptr), 6489 align, misalign); 6490 break; 6491 } 6492 case dr_explicit_realign: 6493 { 6494 tree ptr, bump; 6495 6496 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype)); 6497 6498 if (compute_in_loop) 6499 msq = vect_setup_realignment (first_stmt, gsi, 6500 &realignment_token, 6501 dr_explicit_realign, 6502 dataref_ptr, NULL); 6503 6504 ptr = copy_ssa_name (dataref_ptr); 6505 new_stmt = gimple_build_assign 6506 (ptr, BIT_AND_EXPR, dataref_ptr, 6507 build_int_cst 6508 (TREE_TYPE (dataref_ptr), 6509 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); 6510 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6511 data_ref 6512 = build2 (MEM_REF, vectype, ptr, 6513 build_int_cst (reference_alias_ptr_type 6514 (DR_REF (first_dr)), 0)); 6515 vec_dest = vect_create_destination_var (scalar_dest, 6516 vectype); 6517 new_stmt = gimple_build_assign (vec_dest, data_ref); 6518 new_temp = make_ssa_name (vec_dest, new_stmt); 6519 gimple_assign_set_lhs (new_stmt, new_temp); 6520 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); 6521 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); 6522 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6523 msq = new_temp; 6524 6525 bump = size_binop (MULT_EXPR, vs, 6526 TYPE_SIZE_UNIT (elem_type)); 6527 bump = size_binop (MINUS_EXPR, bump, size_one_node); 6528 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); 6529 new_stmt = gimple_build_assign 6530 (NULL_TREE, BIT_AND_EXPR, ptr, 6531 build_int_cst 6532 (TREE_TYPE (ptr), 6533 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); 6534 ptr = copy_ssa_name (dataref_ptr, new_stmt); 6535 gimple_assign_set_lhs (new_stmt, ptr); 6536 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6537 data_ref 6538 = build2 (MEM_REF, vectype, ptr, 6539 build_int_cst (reference_alias_ptr_type 6540 (DR_REF (first_dr)), 0)); 6541 break; 6542 } 6543 case dr_explicit_realign_optimized: 6544 new_temp = copy_ssa_name (dataref_ptr); 6545 new_stmt = gimple_build_assign 6546 (new_temp, BIT_AND_EXPR, dataref_ptr, 6547 build_int_cst 6548 (TREE_TYPE (dataref_ptr), 6549 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); 6550 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6551 data_ref 6552 = build2 (MEM_REF, vectype, new_temp, 6553 build_int_cst (reference_alias_ptr_type 6554 (DR_REF (first_dr)), 0)); 6555 break; 6556 default: 6557 gcc_unreachable (); 6558 } 6559 vec_dest = vect_create_destination_var (scalar_dest, vectype); 6560 new_stmt = gimple_build_assign (vec_dest, data_ref); 6561 new_temp = make_ssa_name (vec_dest, new_stmt); 6562 gimple_assign_set_lhs (new_stmt, new_temp); 6563 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6564 6565 /* 3. Handle explicit realignment if necessary/supported. 6566 Create in loop: 6567 vec_dest = realign_load (msq, lsq, realignment_token) */ 6568 if (alignment_support_scheme == dr_explicit_realign_optimized 6569 || alignment_support_scheme == dr_explicit_realign) 6570 { 6571 lsq = gimple_assign_lhs (new_stmt); 6572 if (!realignment_token) 6573 realignment_token = dataref_ptr; 6574 vec_dest = vect_create_destination_var (scalar_dest, vectype); 6575 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, 6576 msq, lsq, realignment_token); 6577 new_temp = make_ssa_name (vec_dest, new_stmt); 6578 gimple_assign_set_lhs (new_stmt, new_temp); 6579 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6580 6581 if (alignment_support_scheme == dr_explicit_realign_optimized) 6582 { 6583 gcc_assert (phi); 6584 if (i == vec_num - 1 && j == ncopies - 1) 6585 add_phi_arg (phi, lsq, 6586 loop_latch_edge (containing_loop), 6587 UNKNOWN_LOCATION); 6588 msq = lsq; 6589 } 6590 } 6591 6592 /* 4. Handle invariant-load. */ 6593 if (inv_p && !bb_vinfo) 6594 { 6595 gcc_assert (!grouped_load); 6596 /* If we have versioned for aliasing or the loop doesn't 6597 have any data dependencies that would preclude this, 6598 then we are sure this is a loop invariant load and 6599 thus we can insert it on the preheader edge. */ 6600 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) 6601 && !nested_in_vect_loop 6602 && hoist_defs_of_uses (stmt, loop)) 6603 { 6604 if (dump_enabled_p ()) 6605 { 6606 dump_printf_loc (MSG_NOTE, vect_location, 6607 "hoisting out of the vectorized " 6608 "loop: "); 6609 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 6610 } 6611 tree tem = copy_ssa_name (scalar_dest); 6612 gsi_insert_on_edge_immediate 6613 (loop_preheader_edge (loop), 6614 gimple_build_assign (tem, 6615 unshare_expr 6616 (gimple_assign_rhs1 (stmt)))); 6617 new_temp = vect_init_vector (stmt, tem, vectype, NULL); 6618 } 6619 else 6620 { 6621 gimple_stmt_iterator gsi2 = *gsi; 6622 gsi_next (&gsi2); 6623 new_temp = vect_init_vector (stmt, scalar_dest, 6624 vectype, &gsi2); 6625 } 6626 new_stmt = SSA_NAME_DEF_STMT (new_temp); 6627 set_vinfo_for_stmt (new_stmt, 6628 new_stmt_vec_info (new_stmt, loop_vinfo, 6629 bb_vinfo)); 6630 } 6631 6632 if (negative) 6633 { 6634 tree perm_mask = perm_mask_for_reverse (vectype); 6635 new_temp = permute_vec_elements (new_temp, new_temp, 6636 perm_mask, stmt, gsi); 6637 new_stmt = SSA_NAME_DEF_STMT (new_temp); 6638 } 6639 6640 /* Collect vector loads and later create their permutation in 6641 vect_transform_grouped_load (). */ 6642 if (grouped_load || slp_perm) 6643 dr_chain.quick_push (new_temp); 6644 6645 /* Store vector loads in the corresponding SLP_NODE. */ 6646 if (slp && !slp_perm) 6647 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 6648 } 6649 /* Bump the vector pointer to account for a gap. */ 6650 if (slp && group_gap != 0) 6651 { 6652 tree bump = size_binop (MULT_EXPR, 6653 TYPE_SIZE_UNIT (elem_type), 6654 size_int (group_gap)); 6655 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 6656 stmt, bump); 6657 } 6658 } 6659 6660 if (slp && !slp_perm) 6661 continue; 6662 6663 if (slp_perm) 6664 { 6665 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf, 6666 slp_node_instance, false)) 6667 { 6668 dr_chain.release (); 6669 return false; 6670 } 6671 } 6672 else 6673 { 6674 if (grouped_load) 6675 { 6676 if (!load_lanes_p) 6677 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi); 6678 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 6679 } 6680 else 6681 { 6682 if (j == 0) 6683 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 6684 else 6685 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 6686 prev_stmt_info = vinfo_for_stmt (new_stmt); 6687 } 6688 } 6689 dr_chain.release (); 6690 } 6691 6692 return true; 6693} 6694 6695/* Function vect_is_simple_cond. 6696 6697 Input: 6698 LOOP - the loop that is being vectorized. 6699 COND - Condition that is checked for simple use. 6700 6701 Output: 6702 *COMP_VECTYPE - the vector type for the comparison. 6703 6704 Returns whether a COND can be vectorized. Checks whether 6705 condition operands are supportable using vec_is_simple_use. */ 6706 6707static bool 6708vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo, 6709 bb_vec_info bb_vinfo, tree *comp_vectype) 6710{ 6711 tree lhs, rhs; 6712 tree def; 6713 enum vect_def_type dt; 6714 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 6715 6716 if (!COMPARISON_CLASS_P (cond)) 6717 return false; 6718 6719 lhs = TREE_OPERAND (cond, 0); 6720 rhs = TREE_OPERAND (cond, 1); 6721 6722 if (TREE_CODE (lhs) == SSA_NAME) 6723 { 6724 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); 6725 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo, 6726 &lhs_def_stmt, &def, &dt, &vectype1)) 6727 return false; 6728 } 6729 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST 6730 && TREE_CODE (lhs) != FIXED_CST) 6731 return false; 6732 6733 if (TREE_CODE (rhs) == SSA_NAME) 6734 { 6735 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); 6736 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo, 6737 &rhs_def_stmt, &def, &dt, &vectype2)) 6738 return false; 6739 } 6740 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST 6741 && TREE_CODE (rhs) != FIXED_CST) 6742 return false; 6743 6744 *comp_vectype = vectype1 ? vectype1 : vectype2; 6745 return true; 6746} 6747 6748/* vectorizable_condition. 6749 6750 Check if STMT is conditional modify expression that can be vectorized. 6751 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 6752 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it 6753 at GSI. 6754 6755 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable 6756 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in 6757 else caluse if it is 2). 6758 6759 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 6760 6761bool 6762vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, 6763 gimple *vec_stmt, tree reduc_def, int reduc_index, 6764 slp_tree slp_node) 6765{ 6766 tree scalar_dest = NULL_TREE; 6767 tree vec_dest = NULL_TREE; 6768 tree cond_expr, then_clause, else_clause; 6769 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 6770 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 6771 tree comp_vectype = NULL_TREE; 6772 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE; 6773 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE; 6774 tree vec_compare, vec_cond_expr; 6775 tree new_temp; 6776 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 6777 tree def; 6778 enum vect_def_type dt, dts[4]; 6779 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 6780 int ncopies; 6781 enum tree_code code; 6782 stmt_vec_info prev_stmt_info = NULL; 6783 int i, j; 6784 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 6785 vec<tree> vec_oprnds0 = vNULL; 6786 vec<tree> vec_oprnds1 = vNULL; 6787 vec<tree> vec_oprnds2 = vNULL; 6788 vec<tree> vec_oprnds3 = vNULL; 6789 tree vec_cmp_type; 6790 6791 if (slp_node || PURE_SLP_STMT (stmt_info)) 6792 ncopies = 1; 6793 else 6794 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 6795 6796 gcc_assert (ncopies >= 1); 6797 if (reduc_index && ncopies > 1) 6798 return false; /* FORNOW */ 6799 6800 if (reduc_index && STMT_SLP_TYPE (stmt_info)) 6801 return false; 6802 6803 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 6804 return false; 6805 6806 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 6807 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle 6808 && reduc_def)) 6809 return false; 6810 6811 /* FORNOW: not yet supported. */ 6812 if (STMT_VINFO_LIVE_P (stmt_info)) 6813 { 6814 if (dump_enabled_p ()) 6815 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6816 "value used after loop.\n"); 6817 return false; 6818 } 6819 6820 /* Is vectorizable conditional operation? */ 6821 if (!is_gimple_assign (stmt)) 6822 return false; 6823 6824 code = gimple_assign_rhs_code (stmt); 6825 6826 if (code != COND_EXPR) 6827 return false; 6828 6829 cond_expr = gimple_assign_rhs1 (stmt); 6830 then_clause = gimple_assign_rhs2 (stmt); 6831 else_clause = gimple_assign_rhs3 (stmt); 6832 6833 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo, 6834 &comp_vectype) 6835 || !comp_vectype) 6836 return false; 6837 6838 if (TREE_CODE (then_clause) == SSA_NAME) 6839 { 6840 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause); 6841 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo, 6842 &then_def_stmt, &def, &dt)) 6843 return false; 6844 } 6845 else if (TREE_CODE (then_clause) != INTEGER_CST 6846 && TREE_CODE (then_clause) != REAL_CST 6847 && TREE_CODE (then_clause) != FIXED_CST) 6848 return false; 6849 6850 if (TREE_CODE (else_clause) == SSA_NAME) 6851 { 6852 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause); 6853 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo, 6854 &else_def_stmt, &def, &dt)) 6855 return false; 6856 } 6857 else if (TREE_CODE (else_clause) != INTEGER_CST 6858 && TREE_CODE (else_clause) != REAL_CST 6859 && TREE_CODE (else_clause) != FIXED_CST) 6860 return false; 6861 6862 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))); 6863 /* The result of a vector comparison should be signed type. */ 6864 tree cmp_type = build_nonstandard_integer_type (prec, 0); 6865 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype); 6866 if (vec_cmp_type == NULL_TREE) 6867 return false; 6868 6869 if (!vec_stmt) 6870 { 6871 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; 6872 return expand_vec_cond_expr_p (vectype, comp_vectype); 6873 } 6874 6875 /* Transform. */ 6876 6877 if (!slp_node) 6878 { 6879 vec_oprnds0.create (1); 6880 vec_oprnds1.create (1); 6881 vec_oprnds2.create (1); 6882 vec_oprnds3.create (1); 6883 } 6884 6885 /* Handle def. */ 6886 scalar_dest = gimple_assign_lhs (stmt); 6887 vec_dest = vect_create_destination_var (scalar_dest, vectype); 6888 6889 /* Handle cond expr. */ 6890 for (j = 0; j < ncopies; j++) 6891 { 6892 gassign *new_stmt = NULL; 6893 if (j == 0) 6894 { 6895 if (slp_node) 6896 { 6897 auto_vec<tree, 4> ops; 6898 auto_vec<vec<tree>, 4> vec_defs; 6899 6900 ops.safe_push (TREE_OPERAND (cond_expr, 0)); 6901 ops.safe_push (TREE_OPERAND (cond_expr, 1)); 6902 ops.safe_push (then_clause); 6903 ops.safe_push (else_clause); 6904 vect_get_slp_defs (ops, slp_node, &vec_defs, -1); 6905 vec_oprnds3 = vec_defs.pop (); 6906 vec_oprnds2 = vec_defs.pop (); 6907 vec_oprnds1 = vec_defs.pop (); 6908 vec_oprnds0 = vec_defs.pop (); 6909 6910 ops.release (); 6911 vec_defs.release (); 6912 } 6913 else 6914 { 6915 gimple gtemp; 6916 vec_cond_lhs = 6917 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), 6918 stmt, NULL); 6919 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt, 6920 loop_vinfo, NULL, >emp, &def, &dts[0]); 6921 6922 vec_cond_rhs = 6923 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), 6924 stmt, NULL); 6925 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt, 6926 loop_vinfo, NULL, >emp, &def, &dts[1]); 6927 if (reduc_index == 1) 6928 vec_then_clause = reduc_def; 6929 else 6930 { 6931 vec_then_clause = vect_get_vec_def_for_operand (then_clause, 6932 stmt, NULL); 6933 vect_is_simple_use (then_clause, stmt, loop_vinfo, 6934 NULL, >emp, &def, &dts[2]); 6935 } 6936 if (reduc_index == 2) 6937 vec_else_clause = reduc_def; 6938 else 6939 { 6940 vec_else_clause = vect_get_vec_def_for_operand (else_clause, 6941 stmt, NULL); 6942 vect_is_simple_use (else_clause, stmt, loop_vinfo, 6943 NULL, >emp, &def, &dts[3]); 6944 } 6945 } 6946 } 6947 else 6948 { 6949 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], 6950 vec_oprnds0.pop ()); 6951 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], 6952 vec_oprnds1.pop ()); 6953 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2], 6954 vec_oprnds2.pop ()); 6955 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3], 6956 vec_oprnds3.pop ()); 6957 } 6958 6959 if (!slp_node) 6960 { 6961 vec_oprnds0.quick_push (vec_cond_lhs); 6962 vec_oprnds1.quick_push (vec_cond_rhs); 6963 vec_oprnds2.quick_push (vec_then_clause); 6964 vec_oprnds3.quick_push (vec_else_clause); 6965 } 6966 6967 /* Arguments are ready. Create the new vector stmt. */ 6968 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs) 6969 { 6970 vec_cond_rhs = vec_oprnds1[i]; 6971 vec_then_clause = vec_oprnds2[i]; 6972 vec_else_clause = vec_oprnds3[i]; 6973 6974 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type, 6975 vec_cond_lhs, vec_cond_rhs); 6976 vec_cond_expr = build3 (VEC_COND_EXPR, vectype, 6977 vec_compare, vec_then_clause, vec_else_clause); 6978 6979 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); 6980 new_temp = make_ssa_name (vec_dest, new_stmt); 6981 gimple_assign_set_lhs (new_stmt, new_temp); 6982 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6983 if (slp_node) 6984 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 6985 } 6986 6987 if (slp_node) 6988 continue; 6989 6990 if (j == 0) 6991 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 6992 else 6993 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 6994 6995 prev_stmt_info = vinfo_for_stmt (new_stmt); 6996 } 6997 6998 vec_oprnds0.release (); 6999 vec_oprnds1.release (); 7000 vec_oprnds2.release (); 7001 vec_oprnds3.release (); 7002 7003 return true; 7004} 7005 7006 7007/* Make sure the statement is vectorizable. */ 7008 7009bool 7010vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) 7011{ 7012 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 7013 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 7014 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); 7015 bool ok; 7016 tree scalar_type, vectype; 7017 gimple pattern_stmt; 7018 gimple_seq pattern_def_seq; 7019 7020 if (dump_enabled_p ()) 7021 { 7022 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: "); 7023 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 7024 } 7025 7026 if (gimple_has_volatile_ops (stmt)) 7027 { 7028 if (dump_enabled_p ()) 7029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7030 "not vectorized: stmt has volatile operands\n"); 7031 7032 return false; 7033 } 7034 7035 /* Skip stmts that do not need to be vectorized. In loops this is expected 7036 to include: 7037 - the COND_EXPR which is the loop exit condition 7038 - any LABEL_EXPRs in the loop 7039 - computations that are used only for array indexing or loop control. 7040 In basic blocks we only analyze statements that are a part of some SLP 7041 instance, therefore, all the statements are relevant. 7042 7043 Pattern statement needs to be analyzed instead of the original statement 7044 if the original statement is not relevant. Otherwise, we analyze both 7045 statements. In basic blocks we are called from some SLP instance 7046 traversal, don't analyze pattern stmts instead, the pattern stmts 7047 already will be part of SLP instance. */ 7048 7049 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 7050 if (!STMT_VINFO_RELEVANT_P (stmt_info) 7051 && !STMT_VINFO_LIVE_P (stmt_info)) 7052 { 7053 if (STMT_VINFO_IN_PATTERN_P (stmt_info) 7054 && pattern_stmt 7055 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) 7056 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) 7057 { 7058 /* Analyze PATTERN_STMT instead of the original stmt. */ 7059 stmt = pattern_stmt; 7060 stmt_info = vinfo_for_stmt (pattern_stmt); 7061 if (dump_enabled_p ()) 7062 { 7063 dump_printf_loc (MSG_NOTE, vect_location, 7064 "==> examining pattern statement: "); 7065 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 7066 } 7067 } 7068 else 7069 { 7070 if (dump_enabled_p ()) 7071 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n"); 7072 7073 return true; 7074 } 7075 } 7076 else if (STMT_VINFO_IN_PATTERN_P (stmt_info) 7077 && node == NULL 7078 && pattern_stmt 7079 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) 7080 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) 7081 { 7082 /* Analyze PATTERN_STMT too. */ 7083 if (dump_enabled_p ()) 7084 { 7085 dump_printf_loc (MSG_NOTE, vect_location, 7086 "==> examining pattern statement: "); 7087 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 7088 } 7089 7090 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node)) 7091 return false; 7092 } 7093 7094 if (is_pattern_stmt_p (stmt_info) 7095 && node == NULL 7096 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))) 7097 { 7098 gimple_stmt_iterator si; 7099 7100 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si)) 7101 { 7102 gimple pattern_def_stmt = gsi_stmt (si); 7103 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) 7104 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))) 7105 { 7106 /* Analyze def stmt of STMT if it's a pattern stmt. */ 7107 if (dump_enabled_p ()) 7108 { 7109 dump_printf_loc (MSG_NOTE, vect_location, 7110 "==> examining pattern def statement: "); 7111 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0); 7112 } 7113 7114 if (!vect_analyze_stmt (pattern_def_stmt, 7115 need_to_vectorize, node)) 7116 return false; 7117 } 7118 } 7119 } 7120 7121 switch (STMT_VINFO_DEF_TYPE (stmt_info)) 7122 { 7123 case vect_internal_def: 7124 break; 7125 7126 case vect_reduction_def: 7127 case vect_nested_cycle: 7128 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer 7129 || relevance == vect_used_in_outer_by_reduction 7130 || relevance == vect_unused_in_scope)); 7131 break; 7132 7133 case vect_induction_def: 7134 case vect_constant_def: 7135 case vect_external_def: 7136 case vect_unknown_def_type: 7137 default: 7138 gcc_unreachable (); 7139 } 7140 7141 if (bb_vinfo) 7142 { 7143 gcc_assert (PURE_SLP_STMT (stmt_info)); 7144 7145 scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); 7146 if (dump_enabled_p ()) 7147 { 7148 dump_printf_loc (MSG_NOTE, vect_location, 7149 "get vectype for scalar type: "); 7150 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); 7151 dump_printf (MSG_NOTE, "\n"); 7152 } 7153 7154 vectype = get_vectype_for_scalar_type (scalar_type); 7155 if (!vectype) 7156 { 7157 if (dump_enabled_p ()) 7158 { 7159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7160 "not SLPed: unsupported data-type "); 7161 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, 7162 scalar_type); 7163 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); 7164 } 7165 return false; 7166 } 7167 7168 if (dump_enabled_p ()) 7169 { 7170 dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); 7171 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); 7172 dump_printf (MSG_NOTE, "\n"); 7173 } 7174 7175 STMT_VINFO_VECTYPE (stmt_info) = vectype; 7176 } 7177 7178 if (STMT_VINFO_RELEVANT_P (stmt_info)) 7179 { 7180 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))); 7181 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) 7182 || (is_gimple_call (stmt) 7183 && gimple_call_lhs (stmt) == NULL_TREE)); 7184 *need_to_vectorize = true; 7185 } 7186 7187 ok = true; 7188 if (!bb_vinfo 7189 && (STMT_VINFO_RELEVANT_P (stmt_info) 7190 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) 7191 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL) 7192 || vectorizable_conversion (stmt, NULL, NULL, NULL) 7193 || vectorizable_shift (stmt, NULL, NULL, NULL) 7194 || vectorizable_operation (stmt, NULL, NULL, NULL) 7195 || vectorizable_assignment (stmt, NULL, NULL, NULL) 7196 || vectorizable_load (stmt, NULL, NULL, NULL, NULL) 7197 || vectorizable_call (stmt, NULL, NULL, NULL) 7198 || vectorizable_store (stmt, NULL, NULL, NULL) 7199 || vectorizable_reduction (stmt, NULL, NULL, NULL) 7200 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL)); 7201 else 7202 { 7203 if (bb_vinfo) 7204 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node) 7205 || vectorizable_conversion (stmt, NULL, NULL, node) 7206 || vectorizable_shift (stmt, NULL, NULL, node) 7207 || vectorizable_operation (stmt, NULL, NULL, node) 7208 || vectorizable_assignment (stmt, NULL, NULL, node) 7209 || vectorizable_load (stmt, NULL, NULL, node, NULL) 7210 || vectorizable_call (stmt, NULL, NULL, node) 7211 || vectorizable_store (stmt, NULL, NULL, node) 7212 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); 7213 } 7214 7215 if (!ok) 7216 { 7217 if (dump_enabled_p ()) 7218 { 7219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7220 "not vectorized: relevant stmt not "); 7221 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: "); 7222 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); 7223 } 7224 7225 return false; 7226 } 7227 7228 if (bb_vinfo) 7229 return true; 7230 7231 /* Stmts that are (also) "live" (i.e. - that are used out of the loop) 7232 need extra handling, except for vectorizable reductions. */ 7233 if (STMT_VINFO_LIVE_P (stmt_info) 7234 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type) 7235 ok = vectorizable_live_operation (stmt, NULL, NULL); 7236 7237 if (!ok) 7238 { 7239 if (dump_enabled_p ()) 7240 { 7241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7242 "not vectorized: live stmt not "); 7243 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: "); 7244 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); 7245 } 7246 7247 return false; 7248 } 7249 7250 return true; 7251} 7252 7253 7254/* Function vect_transform_stmt. 7255 7256 Create a vectorized stmt to replace STMT, and insert it at BSI. */ 7257 7258bool 7259vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi, 7260 bool *grouped_store, slp_tree slp_node, 7261 slp_instance slp_node_instance) 7262{ 7263 bool is_store = false; 7264 gimple vec_stmt = NULL; 7265 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 7266 bool done; 7267 7268 switch (STMT_VINFO_TYPE (stmt_info)) 7269 { 7270 case type_demotion_vec_info_type: 7271 case type_promotion_vec_info_type: 7272 case type_conversion_vec_info_type: 7273 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node); 7274 gcc_assert (done); 7275 break; 7276 7277 case induc_vec_info_type: 7278 gcc_assert (!slp_node); 7279 done = vectorizable_induction (stmt, gsi, &vec_stmt); 7280 gcc_assert (done); 7281 break; 7282 7283 case shift_vec_info_type: 7284 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node); 7285 gcc_assert (done); 7286 break; 7287 7288 case op_vec_info_type: 7289 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node); 7290 gcc_assert (done); 7291 break; 7292 7293 case assignment_vec_info_type: 7294 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node); 7295 gcc_assert (done); 7296 break; 7297 7298 case load_vec_info_type: 7299 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node, 7300 slp_node_instance); 7301 gcc_assert (done); 7302 break; 7303 7304 case store_vec_info_type: 7305 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node); 7306 gcc_assert (done); 7307 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node) 7308 { 7309 /* In case of interleaving, the whole chain is vectorized when the 7310 last store in the chain is reached. Store stmts before the last 7311 one are skipped, and there vec_stmt_info shouldn't be freed 7312 meanwhile. */ 7313 *grouped_store = true; 7314 if (STMT_VINFO_VEC_STMT (stmt_info)) 7315 is_store = true; 7316 } 7317 else 7318 is_store = true; 7319 break; 7320 7321 case condition_vec_info_type: 7322 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node); 7323 gcc_assert (done); 7324 break; 7325 7326 case call_vec_info_type: 7327 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node); 7328 stmt = gsi_stmt (*gsi); 7329 if (is_gimple_call (stmt) 7330 && gimple_call_internal_p (stmt) 7331 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE) 7332 is_store = true; 7333 break; 7334 7335 case call_simd_clone_vec_info_type: 7336 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node); 7337 stmt = gsi_stmt (*gsi); 7338 break; 7339 7340 case reduc_vec_info_type: 7341 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node); 7342 gcc_assert (done); 7343 break; 7344 7345 default: 7346 if (!STMT_VINFO_LIVE_P (stmt_info)) 7347 { 7348 if (dump_enabled_p ()) 7349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7350 "stmt not supported.\n"); 7351 gcc_unreachable (); 7352 } 7353 } 7354 7355 /* Handle inner-loop stmts whose DEF is used in the loop-nest that 7356 is being vectorized, but outside the immediately enclosing loop. */ 7357 if (vec_stmt 7358 && STMT_VINFO_LOOP_VINFO (stmt_info) 7359 && nested_in_vect_loop_p (LOOP_VINFO_LOOP ( 7360 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt) 7361 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type 7362 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer 7363 || STMT_VINFO_RELEVANT (stmt_info) == 7364 vect_used_in_outer_by_reduction)) 7365 { 7366 struct loop *innerloop = LOOP_VINFO_LOOP ( 7367 STMT_VINFO_LOOP_VINFO (stmt_info))->inner; 7368 imm_use_iterator imm_iter; 7369 use_operand_p use_p; 7370 tree scalar_dest; 7371 gimple exit_phi; 7372 7373 if (dump_enabled_p ()) 7374 dump_printf_loc (MSG_NOTE, vect_location, 7375 "Record the vdef for outer-loop vectorization.\n"); 7376 7377 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there 7378 (to be used when vectorizing outer-loop stmts that use the DEF of 7379 STMT). */ 7380 if (gimple_code (stmt) == GIMPLE_PHI) 7381 scalar_dest = PHI_RESULT (stmt); 7382 else 7383 scalar_dest = gimple_assign_lhs (stmt); 7384 7385 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) 7386 { 7387 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p)))) 7388 { 7389 exit_phi = USE_STMT (use_p); 7390 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt; 7391 } 7392 } 7393 } 7394 7395 /* Handle stmts whose DEF is used outside the loop-nest that is 7396 being vectorized. */ 7397 if (STMT_VINFO_LIVE_P (stmt_info) 7398 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type) 7399 { 7400 done = vectorizable_live_operation (stmt, gsi, &vec_stmt); 7401 gcc_assert (done); 7402 } 7403 7404 if (vec_stmt) 7405 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 7406 7407 return is_store; 7408} 7409 7410 7411/* Remove a group of stores (for SLP or interleaving), free their 7412 stmt_vec_info. */ 7413 7414void 7415vect_remove_stores (gimple first_stmt) 7416{ 7417 gimple next = first_stmt; 7418 gimple tmp; 7419 gimple_stmt_iterator next_si; 7420 7421 while (next) 7422 { 7423 stmt_vec_info stmt_info = vinfo_for_stmt (next); 7424 7425 tmp = GROUP_NEXT_ELEMENT (stmt_info); 7426 if (is_pattern_stmt_p (stmt_info)) 7427 next = STMT_VINFO_RELATED_STMT (stmt_info); 7428 /* Free the attached stmt_vec_info and remove the stmt. */ 7429 next_si = gsi_for_stmt (next); 7430 unlink_stmt_vdef (next); 7431 gsi_remove (&next_si, true); 7432 release_defs (next); 7433 free_stmt_vec_info (next); 7434 next = tmp; 7435 } 7436} 7437 7438 7439/* Function new_stmt_vec_info. 7440 7441 Create and initialize a new stmt_vec_info struct for STMT. */ 7442 7443stmt_vec_info 7444new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo, 7445 bb_vec_info bb_vinfo) 7446{ 7447 stmt_vec_info res; 7448 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); 7449 7450 STMT_VINFO_TYPE (res) = undef_vec_info_type; 7451 STMT_VINFO_STMT (res) = stmt; 7452 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo; 7453 STMT_VINFO_BB_VINFO (res) = bb_vinfo; 7454 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope; 7455 STMT_VINFO_LIVE_P (res) = false; 7456 STMT_VINFO_VECTYPE (res) = NULL; 7457 STMT_VINFO_VEC_STMT (res) = NULL; 7458 STMT_VINFO_VECTORIZABLE (res) = true; 7459 STMT_VINFO_IN_PATTERN_P (res) = false; 7460 STMT_VINFO_RELATED_STMT (res) = NULL; 7461 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL; 7462 STMT_VINFO_DATA_REF (res) = NULL; 7463 7464 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; 7465 STMT_VINFO_DR_OFFSET (res) = NULL; 7466 STMT_VINFO_DR_INIT (res) = NULL; 7467 STMT_VINFO_DR_STEP (res) = NULL; 7468 STMT_VINFO_DR_ALIGNED_TO (res) = NULL; 7469 7470 if (gimple_code (stmt) == GIMPLE_PHI 7471 && is_loop_header_bb_p (gimple_bb (stmt))) 7472 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; 7473 else 7474 STMT_VINFO_DEF_TYPE (res) = vect_internal_def; 7475 7476 STMT_VINFO_SAME_ALIGN_REFS (res).create (0); 7477 STMT_SLP_TYPE (res) = loop_vect; 7478 GROUP_FIRST_ELEMENT (res) = NULL; 7479 GROUP_NEXT_ELEMENT (res) = NULL; 7480 GROUP_SIZE (res) = 0; 7481 GROUP_STORE_COUNT (res) = 0; 7482 GROUP_GAP (res) = 0; 7483 GROUP_SAME_DR_STMT (res) = NULL; 7484 7485 return res; 7486} 7487 7488 7489/* Create a hash table for stmt_vec_info. */ 7490 7491void 7492init_stmt_vec_info_vec (void) 7493{ 7494 gcc_assert (!stmt_vec_info_vec.exists ()); 7495 stmt_vec_info_vec.create (50); 7496} 7497 7498 7499/* Free hash table for stmt_vec_info. */ 7500 7501void 7502free_stmt_vec_info_vec (void) 7503{ 7504 unsigned int i; 7505 vec_void_p info; 7506 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info) 7507 if (info != NULL) 7508 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info)); 7509 gcc_assert (stmt_vec_info_vec.exists ()); 7510 stmt_vec_info_vec.release (); 7511} 7512 7513 7514/* Free stmt vectorization related info. */ 7515 7516void 7517free_stmt_vec_info (gimple stmt) 7518{ 7519 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 7520 7521 if (!stmt_info) 7522 return; 7523 7524 /* Check if this statement has a related "pattern stmt" 7525 (introduced by the vectorizer during the pattern recognition 7526 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info 7527 too. */ 7528 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 7529 { 7530 stmt_vec_info patt_info 7531 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); 7532 if (patt_info) 7533 { 7534 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info); 7535 gimple patt_stmt = STMT_VINFO_STMT (patt_info); 7536 gimple_set_bb (patt_stmt, NULL); 7537 tree lhs = gimple_get_lhs (patt_stmt); 7538 if (TREE_CODE (lhs) == SSA_NAME) 7539 release_ssa_name (lhs); 7540 if (seq) 7541 { 7542 gimple_stmt_iterator si; 7543 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si)) 7544 { 7545 gimple seq_stmt = gsi_stmt (si); 7546 gimple_set_bb (seq_stmt, NULL); 7547 lhs = gimple_get_lhs (patt_stmt); 7548 if (TREE_CODE (lhs) == SSA_NAME) 7549 release_ssa_name (lhs); 7550 free_stmt_vec_info (seq_stmt); 7551 } 7552 } 7553 free_stmt_vec_info (patt_stmt); 7554 } 7555 } 7556 7557 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release (); 7558 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release (); 7559 set_vinfo_for_stmt (stmt, NULL); 7560 free (stmt_info); 7561} 7562 7563 7564/* Function get_vectype_for_scalar_type_and_size. 7565 7566 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported 7567 by the target. */ 7568 7569static tree 7570get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size) 7571{ 7572 machine_mode inner_mode = TYPE_MODE (scalar_type); 7573 machine_mode simd_mode; 7574 unsigned int nbytes = GET_MODE_SIZE (inner_mode); 7575 int nunits; 7576 tree vectype; 7577 7578 if (nbytes == 0) 7579 return NULL_TREE; 7580 7581 if (GET_MODE_CLASS (inner_mode) != MODE_INT 7582 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT) 7583 return NULL_TREE; 7584 7585 /* For vector types of elements whose mode precision doesn't 7586 match their types precision we use a element type of mode 7587 precision. The vectorization routines will have to make sure 7588 they support the proper result truncation/extension. 7589 We also make sure to build vector types with INTEGER_TYPE 7590 component type only. */ 7591 if (INTEGRAL_TYPE_P (scalar_type) 7592 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type) 7593 || TREE_CODE (scalar_type) != INTEGER_TYPE)) 7594 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode), 7595 TYPE_UNSIGNED (scalar_type)); 7596 7597 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components. 7598 When the component mode passes the above test simply use a type 7599 corresponding to that mode. The theory is that any use that 7600 would cause problems with this will disable vectorization anyway. */ 7601 else if (!SCALAR_FLOAT_TYPE_P (scalar_type) 7602 && !INTEGRAL_TYPE_P (scalar_type)) 7603 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1); 7604 7605 /* We can't build a vector type of elements with alignment bigger than 7606 their size. */ 7607 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) 7608 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 7609 TYPE_UNSIGNED (scalar_type)); 7610 7611 /* If we felt back to using the mode fail if there was 7612 no scalar type for it. */ 7613 if (scalar_type == NULL_TREE) 7614 return NULL_TREE; 7615 7616 /* If no size was supplied use the mode the target prefers. Otherwise 7617 lookup a vector mode of the specified size. */ 7618 if (size == 0) 7619 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode); 7620 else 7621 simd_mode = mode_for_vector (inner_mode, size / nbytes); 7622 nunits = GET_MODE_SIZE (simd_mode) / nbytes; 7623 if (nunits <= 1) 7624 return NULL_TREE; 7625 7626 vectype = build_vector_type (scalar_type, nunits); 7627 7628 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 7629 && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) 7630 return NULL_TREE; 7631 7632 return vectype; 7633} 7634 7635unsigned int current_vector_size; 7636 7637/* Function get_vectype_for_scalar_type. 7638 7639 Returns the vector type corresponding to SCALAR_TYPE as supported 7640 by the target. */ 7641 7642tree 7643get_vectype_for_scalar_type (tree scalar_type) 7644{ 7645 tree vectype; 7646 vectype = get_vectype_for_scalar_type_and_size (scalar_type, 7647 current_vector_size); 7648 if (vectype 7649 && current_vector_size == 0) 7650 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype)); 7651 return vectype; 7652} 7653 7654/* Function get_same_sized_vectype 7655 7656 Returns a vector type corresponding to SCALAR_TYPE of size 7657 VECTOR_TYPE if supported by the target. */ 7658 7659tree 7660get_same_sized_vectype (tree scalar_type, tree vector_type) 7661{ 7662 return get_vectype_for_scalar_type_and_size 7663 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type))); 7664} 7665 7666/* Function vect_is_simple_use. 7667 7668 Input: 7669 LOOP_VINFO - the vect info of the loop that is being vectorized. 7670 BB_VINFO - the vect info of the basic block that is being vectorized. 7671 OPERAND - operand of STMT in the loop or bb. 7672 DEF - the defining stmt in case OPERAND is an SSA_NAME. 7673 7674 Returns whether a stmt with OPERAND can be vectorized. 7675 For loops, supportable operands are constants, loop invariants, and operands 7676 that are defined by the current iteration of the loop. Unsupportable 7677 operands are those that are defined by a previous iteration of the loop (as 7678 is the case in reduction/induction computations). 7679 For basic blocks, supportable operands are constants and bb invariants. 7680 For now, operands defined outside the basic block are not supported. */ 7681 7682bool 7683vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo, 7684 bb_vec_info bb_vinfo, gimple *def_stmt, 7685 tree *def, enum vect_def_type *dt) 7686{ 7687 basic_block bb; 7688 stmt_vec_info stmt_vinfo; 7689 struct loop *loop = NULL; 7690 7691 if (loop_vinfo) 7692 loop = LOOP_VINFO_LOOP (loop_vinfo); 7693 7694 *def_stmt = NULL; 7695 *def = NULL_TREE; 7696 7697 if (dump_enabled_p ()) 7698 { 7699 dump_printf_loc (MSG_NOTE, vect_location, 7700 "vect_is_simple_use: operand "); 7701 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand); 7702 dump_printf (MSG_NOTE, "\n"); 7703 } 7704 7705 if (CONSTANT_CLASS_P (operand)) 7706 { 7707 *dt = vect_constant_def; 7708 return true; 7709 } 7710 7711 if (is_gimple_min_invariant (operand)) 7712 { 7713 *def = operand; 7714 *dt = vect_external_def; 7715 return true; 7716 } 7717 7718 if (TREE_CODE (operand) == PAREN_EXPR) 7719 { 7720 if (dump_enabled_p ()) 7721 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n"); 7722 operand = TREE_OPERAND (operand, 0); 7723 } 7724 7725 if (TREE_CODE (operand) != SSA_NAME) 7726 { 7727 if (dump_enabled_p ()) 7728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7729 "not ssa-name.\n"); 7730 return false; 7731 } 7732 7733 *def_stmt = SSA_NAME_DEF_STMT (operand); 7734 if (*def_stmt == NULL) 7735 { 7736 if (dump_enabled_p ()) 7737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7738 "no def_stmt.\n"); 7739 return false; 7740 } 7741 7742 if (dump_enabled_p ()) 7743 { 7744 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: "); 7745 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0); 7746 } 7747 7748 /* Empty stmt is expected only in case of a function argument. 7749 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */ 7750 if (gimple_nop_p (*def_stmt)) 7751 { 7752 *def = operand; 7753 *dt = vect_external_def; 7754 return true; 7755 } 7756 7757 bb = gimple_bb (*def_stmt); 7758 7759 if ((loop && !flow_bb_inside_loop_p (loop, bb)) 7760 || (!loop && bb != BB_VINFO_BB (bb_vinfo)) 7761 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI)) 7762 *dt = vect_external_def; 7763 else 7764 { 7765 stmt_vinfo = vinfo_for_stmt (*def_stmt); 7766 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); 7767 } 7768 7769 if (*dt == vect_unknown_def_type 7770 || (stmt 7771 && *dt == vect_double_reduction_def 7772 && gimple_code (stmt) != GIMPLE_PHI)) 7773 { 7774 if (dump_enabled_p ()) 7775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7776 "Unsupported pattern.\n"); 7777 return false; 7778 } 7779 7780 if (dump_enabled_p ()) 7781 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt); 7782 7783 switch (gimple_code (*def_stmt)) 7784 { 7785 case GIMPLE_PHI: 7786 *def = gimple_phi_result (*def_stmt); 7787 break; 7788 7789 case GIMPLE_ASSIGN: 7790 *def = gimple_assign_lhs (*def_stmt); 7791 break; 7792 7793 case GIMPLE_CALL: 7794 *def = gimple_call_lhs (*def_stmt); 7795 if (*def != NULL) 7796 break; 7797 /* FALLTHRU */ 7798 default: 7799 if (dump_enabled_p ()) 7800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7801 "unsupported defining stmt:\n"); 7802 return false; 7803 } 7804 7805 return true; 7806} 7807 7808/* Function vect_is_simple_use_1. 7809 7810 Same as vect_is_simple_use_1 but also determines the vector operand 7811 type of OPERAND and stores it to *VECTYPE. If the definition of 7812 OPERAND is vect_uninitialized_def, vect_constant_def or 7813 vect_external_def *VECTYPE will be set to NULL_TREE and the caller 7814 is responsible to compute the best suited vector type for the 7815 scalar operand. */ 7816 7817bool 7818vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo, 7819 bb_vec_info bb_vinfo, gimple *def_stmt, 7820 tree *def, enum vect_def_type *dt, tree *vectype) 7821{ 7822 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt, 7823 def, dt)) 7824 return false; 7825 7826 /* Now get a vector type if the def is internal, otherwise supply 7827 NULL_TREE and leave it up to the caller to figure out a proper 7828 type for the use stmt. */ 7829 if (*dt == vect_internal_def 7830 || *dt == vect_induction_def 7831 || *dt == vect_reduction_def 7832 || *dt == vect_double_reduction_def 7833 || *dt == vect_nested_cycle) 7834 { 7835 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); 7836 7837 if (STMT_VINFO_IN_PATTERN_P (stmt_info) 7838 && !STMT_VINFO_RELEVANT (stmt_info) 7839 && !STMT_VINFO_LIVE_P (stmt_info)) 7840 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); 7841 7842 *vectype = STMT_VINFO_VECTYPE (stmt_info); 7843 gcc_assert (*vectype != NULL_TREE); 7844 } 7845 else if (*dt == vect_uninitialized_def 7846 || *dt == vect_constant_def 7847 || *dt == vect_external_def) 7848 *vectype = NULL_TREE; 7849 else 7850 gcc_unreachable (); 7851 7852 return true; 7853} 7854 7855 7856/* Function supportable_widening_operation 7857 7858 Check whether an operation represented by the code CODE is a 7859 widening operation that is supported by the target platform in 7860 vector form (i.e., when operating on arguments of type VECTYPE_IN 7861 producing a result of type VECTYPE_OUT). 7862 7863 Widening operations we currently support are NOP (CONVERT), FLOAT 7864 and WIDEN_MULT. This function checks if these operations are supported 7865 by the target platform either directly (via vector tree-codes), or via 7866 target builtins. 7867 7868 Output: 7869 - CODE1 and CODE2 are codes of vector operations to be used when 7870 vectorizing the operation, if available. 7871 - MULTI_STEP_CVT determines the number of required intermediate steps in 7872 case of multi-step conversion (like char->short->int - in that case 7873 MULTI_STEP_CVT will be 1). 7874 - INTERM_TYPES contains the intermediate type required to perform the 7875 widening operation (short in the above example). */ 7876 7877bool 7878supportable_widening_operation (enum tree_code code, gimple stmt, 7879 tree vectype_out, tree vectype_in, 7880 enum tree_code *code1, enum tree_code *code2, 7881 int *multi_step_cvt, 7882 vec<tree> *interm_types) 7883{ 7884 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 7885 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); 7886 struct loop *vect_loop = NULL; 7887 machine_mode vec_mode; 7888 enum insn_code icode1, icode2; 7889 optab optab1, optab2; 7890 tree vectype = vectype_in; 7891 tree wide_vectype = vectype_out; 7892 enum tree_code c1, c2; 7893 int i; 7894 tree prev_type, intermediate_type; 7895 machine_mode intermediate_mode, prev_mode; 7896 optab optab3, optab4; 7897 7898 *multi_step_cvt = 0; 7899 if (loop_info) 7900 vect_loop = LOOP_VINFO_LOOP (loop_info); 7901 7902 switch (code) 7903 { 7904 case WIDEN_MULT_EXPR: 7905 /* The result of a vectorized widening operation usually requires 7906 two vectors (because the widened results do not fit into one vector). 7907 The generated vector results would normally be expected to be 7908 generated in the same order as in the original scalar computation, 7909 i.e. if 8 results are generated in each vector iteration, they are 7910 to be organized as follows: 7911 vect1: [res1,res2,res3,res4], 7912 vect2: [res5,res6,res7,res8]. 7913 7914 However, in the special case that the result of the widening 7915 operation is used in a reduction computation only, the order doesn't 7916 matter (because when vectorizing a reduction we change the order of 7917 the computation). Some targets can take advantage of this and 7918 generate more efficient code. For example, targets like Altivec, 7919 that support widen_mult using a sequence of {mult_even,mult_odd} 7920 generate the following vectors: 7921 vect1: [res1,res3,res5,res7], 7922 vect2: [res2,res4,res6,res8]. 7923 7924 When vectorizing outer-loops, we execute the inner-loop sequentially 7925 (each vectorized inner-loop iteration contributes to VF outer-loop 7926 iterations in parallel). We therefore don't allow to change the 7927 order of the computation in the inner-loop during outer-loop 7928 vectorization. */ 7929 /* TODO: Another case in which order doesn't *really* matter is when we 7930 widen and then contract again, e.g. (short)((int)x * y >> 8). 7931 Normally, pack_trunc performs an even/odd permute, whereas the 7932 repack from an even/odd expansion would be an interleave, which 7933 would be significantly simpler for e.g. AVX2. */ 7934 /* In any case, in order to avoid duplicating the code below, recurse 7935 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values 7936 are properly set up for the caller. If we fail, we'll continue with 7937 a VEC_WIDEN_MULT_LO/HI_EXPR check. */ 7938 if (vect_loop 7939 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction 7940 && !nested_in_vect_loop_p (vect_loop, stmt) 7941 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR, 7942 stmt, vectype_out, vectype_in, 7943 code1, code2, multi_step_cvt, 7944 interm_types)) 7945 { 7946 /* Elements in a vector with vect_used_by_reduction property cannot 7947 be reordered if the use chain with this property does not have the 7948 same operation. One such an example is s += a * b, where elements 7949 in a and b cannot be reordered. Here we check if the vector defined 7950 by STMT is only directly used in the reduction statement. */ 7951 tree lhs = gimple_assign_lhs (stmt); 7952 use_operand_p dummy; 7953 gimple use_stmt; 7954 stmt_vec_info use_stmt_info = NULL; 7955 if (single_imm_use (lhs, &dummy, &use_stmt) 7956 && (use_stmt_info = vinfo_for_stmt (use_stmt)) 7957 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def) 7958 return true; 7959 } 7960 c1 = VEC_WIDEN_MULT_LO_EXPR; 7961 c2 = VEC_WIDEN_MULT_HI_EXPR; 7962 break; 7963 7964 case VEC_WIDEN_MULT_EVEN_EXPR: 7965 /* Support the recursion induced just above. */ 7966 c1 = VEC_WIDEN_MULT_EVEN_EXPR; 7967 c2 = VEC_WIDEN_MULT_ODD_EXPR; 7968 break; 7969 7970 case WIDEN_LSHIFT_EXPR: 7971 c1 = VEC_WIDEN_LSHIFT_LO_EXPR; 7972 c2 = VEC_WIDEN_LSHIFT_HI_EXPR; 7973 break; 7974 7975 CASE_CONVERT: 7976 c1 = VEC_UNPACK_LO_EXPR; 7977 c2 = VEC_UNPACK_HI_EXPR; 7978 break; 7979 7980 case FLOAT_EXPR: 7981 c1 = VEC_UNPACK_FLOAT_LO_EXPR; 7982 c2 = VEC_UNPACK_FLOAT_HI_EXPR; 7983 break; 7984 7985 case FIX_TRUNC_EXPR: 7986 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/ 7987 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for 7988 computing the operation. */ 7989 return false; 7990 7991 default: 7992 gcc_unreachable (); 7993 } 7994 7995 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR) 7996 { 7997 enum tree_code ctmp = c1; 7998 c1 = c2; 7999 c2 = ctmp; 8000 } 8001 8002 if (code == FIX_TRUNC_EXPR) 8003 { 8004 /* The signedness is determined from output operand. */ 8005 optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 8006 optab2 = optab_for_tree_code (c2, vectype_out, optab_default); 8007 } 8008 else 8009 { 8010 optab1 = optab_for_tree_code (c1, vectype, optab_default); 8011 optab2 = optab_for_tree_code (c2, vectype, optab_default); 8012 } 8013 8014 if (!optab1 || !optab2) 8015 return false; 8016 8017 vec_mode = TYPE_MODE (vectype); 8018 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing 8019 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing) 8020 return false; 8021 8022 *code1 = c1; 8023 *code2 = c2; 8024 8025 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 8026 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 8027 return true; 8028 8029 /* Check if it's a multi-step conversion that can be done using intermediate 8030 types. */ 8031 8032 prev_type = vectype; 8033 prev_mode = vec_mode; 8034 8035 if (!CONVERT_EXPR_CODE_P (code)) 8036 return false; 8037 8038 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 8039 intermediate steps in promotion sequence. We try 8040 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do 8041 not. */ 8042 interm_types->create (MAX_INTERM_CVT_STEPS); 8043 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 8044 { 8045 intermediate_mode = insn_data[icode1].operand[0].mode; 8046 intermediate_type 8047 = lang_hooks.types.type_for_mode (intermediate_mode, 8048 TYPE_UNSIGNED (prev_type)); 8049 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); 8050 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); 8051 8052 if (!optab3 || !optab4 8053 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing 8054 || insn_data[icode1].operand[0].mode != intermediate_mode 8055 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing 8056 || insn_data[icode2].operand[0].mode != intermediate_mode 8057 || ((icode1 = optab_handler (optab3, intermediate_mode)) 8058 == CODE_FOR_nothing) 8059 || ((icode2 = optab_handler (optab4, intermediate_mode)) 8060 == CODE_FOR_nothing)) 8061 break; 8062 8063 interm_types->quick_push (intermediate_type); 8064 (*multi_step_cvt)++; 8065 8066 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 8067 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 8068 return true; 8069 8070 prev_type = intermediate_type; 8071 prev_mode = intermediate_mode; 8072 } 8073 8074 interm_types->release (); 8075 return false; 8076} 8077 8078 8079/* Function supportable_narrowing_operation 8080 8081 Check whether an operation represented by the code CODE is a 8082 narrowing operation that is supported by the target platform in 8083 vector form (i.e., when operating on arguments of type VECTYPE_IN 8084 and producing a result of type VECTYPE_OUT). 8085 8086 Narrowing operations we currently support are NOP (CONVERT) and 8087 FIX_TRUNC. This function checks if these operations are supported by 8088 the target platform directly via vector tree-codes. 8089 8090 Output: 8091 - CODE1 is the code of a vector operation to be used when 8092 vectorizing the operation, if available. 8093 - MULTI_STEP_CVT determines the number of required intermediate steps in 8094 case of multi-step conversion (like int->short->char - in that case 8095 MULTI_STEP_CVT will be 1). 8096 - INTERM_TYPES contains the intermediate type required to perform the 8097 narrowing operation (short in the above example). */ 8098 8099bool 8100supportable_narrowing_operation (enum tree_code code, 8101 tree vectype_out, tree vectype_in, 8102 enum tree_code *code1, int *multi_step_cvt, 8103 vec<tree> *interm_types) 8104{ 8105 machine_mode vec_mode; 8106 enum insn_code icode1; 8107 optab optab1, interm_optab; 8108 tree vectype = vectype_in; 8109 tree narrow_vectype = vectype_out; 8110 enum tree_code c1; 8111 tree intermediate_type; 8112 machine_mode intermediate_mode, prev_mode; 8113 int i; 8114 bool uns; 8115 8116 *multi_step_cvt = 0; 8117 switch (code) 8118 { 8119 CASE_CONVERT: 8120 c1 = VEC_PACK_TRUNC_EXPR; 8121 break; 8122 8123 case FIX_TRUNC_EXPR: 8124 c1 = VEC_PACK_FIX_TRUNC_EXPR; 8125 break; 8126 8127 case FLOAT_EXPR: 8128 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR 8129 tree code and optabs used for computing the operation. */ 8130 return false; 8131 8132 default: 8133 gcc_unreachable (); 8134 } 8135 8136 if (code == FIX_TRUNC_EXPR) 8137 /* The signedness is determined from output operand. */ 8138 optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 8139 else 8140 optab1 = optab_for_tree_code (c1, vectype, optab_default); 8141 8142 if (!optab1) 8143 return false; 8144 8145 vec_mode = TYPE_MODE (vectype); 8146 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing) 8147 return false; 8148 8149 *code1 = c1; 8150 8151 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 8152 return true; 8153 8154 /* Check if it's a multi-step conversion that can be done using intermediate 8155 types. */ 8156 prev_mode = vec_mode; 8157 if (code == FIX_TRUNC_EXPR) 8158 uns = TYPE_UNSIGNED (vectype_out); 8159 else 8160 uns = TYPE_UNSIGNED (vectype); 8161 8162 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer 8163 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more 8164 costly than signed. */ 8165 if (code == FIX_TRUNC_EXPR && uns) 8166 { 8167 enum insn_code icode2; 8168 8169 intermediate_type 8170 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0); 8171 interm_optab 8172 = optab_for_tree_code (c1, intermediate_type, optab_default); 8173 if (interm_optab != unknown_optab 8174 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing 8175 && insn_data[icode1].operand[0].mode 8176 == insn_data[icode2].operand[0].mode) 8177 { 8178 uns = false; 8179 optab1 = interm_optab; 8180 icode1 = icode2; 8181 } 8182 } 8183 8184 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 8185 intermediate steps in promotion sequence. We try 8186 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */ 8187 interm_types->create (MAX_INTERM_CVT_STEPS); 8188 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 8189 { 8190 intermediate_mode = insn_data[icode1].operand[0].mode; 8191 intermediate_type 8192 = lang_hooks.types.type_for_mode (intermediate_mode, uns); 8193 interm_optab 8194 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type, 8195 optab_default); 8196 if (!interm_optab 8197 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing) 8198 || insn_data[icode1].operand[0].mode != intermediate_mode 8199 || ((icode1 = optab_handler (interm_optab, intermediate_mode)) 8200 == CODE_FOR_nothing)) 8201 break; 8202 8203 interm_types->quick_push (intermediate_type); 8204 (*multi_step_cvt)++; 8205 8206 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 8207 return true; 8208 8209 prev_mode = intermediate_mode; 8210 optab1 = interm_optab; 8211 } 8212 8213 interm_types->release (); 8214 return false; 8215} 8216