tree-vect-stmts.c revision 1.3
1/* Statement Analysis and Transformation for Vectorization 2 Copyright (C) 2003-2013 Free Software Foundation, Inc. 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 4 and Ira Rosen <irar@il.ibm.com> 5 6This file is part of GCC. 7 8GCC is free software; you can redistribute it and/or modify it under 9the terms of the GNU General Public License as published by the Free 10Software Foundation; either version 3, or (at your option) any later 11version. 12 13GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14WARRANTY; without even the implied warranty of MERCHANTABILITY or 15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16for more details. 17 18You should have received a copy of the GNU General Public License 19along with GCC; see the file COPYING3. If not see 20<http://www.gnu.org/licenses/>. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "dumpfile.h" 26#include "tm.h" 27#include "ggc.h" 28#include "tree.h" 29#include "target.h" 30#include "basic-block.h" 31#include "gimple-pretty-print.h" 32#include "tree-flow.h" 33#include "cfgloop.h" 34#include "expr.h" 35#include "recog.h" /* FIXME: for insn_data */ 36#include "optabs.h" 37#include "diagnostic-core.h" 38#include "tree-vectorizer.h" 39#include "dumpfile.h" 40 41/* For lang_hooks.types.type_for_mode. */ 42#include "langhooks.h" 43 44/* Return the vectorized type for the given statement. */ 45 46tree 47stmt_vectype (struct _stmt_vec_info *stmt_info) 48{ 49 return STMT_VINFO_VECTYPE (stmt_info); 50} 51 52/* Return TRUE iff the given statement is in an inner loop relative to 53 the loop being vectorized. */ 54bool 55stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info) 56{ 57 gimple stmt = STMT_VINFO_STMT (stmt_info); 58 basic_block bb = gimple_bb (stmt); 59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 60 struct loop* loop; 61 62 if (!loop_vinfo) 63 return false; 64 65 loop = LOOP_VINFO_LOOP (loop_vinfo); 66 67 return (bb->loop_father == loop->inner); 68} 69 70/* Record the cost of a statement, either by directly informing the 71 target model or by saving it in a vector for later processing. 72 Return a preliminary estimate of the statement's cost. */ 73 74unsigned 75record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, 76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, 77 int misalign, enum vect_cost_model_location where) 78{ 79 if (body_cost_vec) 80 { 81 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; 82 add_stmt_info_to_vec (body_cost_vec, count, kind, 83 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL, 84 misalign); 85 return (unsigned) 86 (builtin_vectorization_cost (kind, vectype, misalign) * count); 87 88 } 89 else 90 { 91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 93 void *target_cost_data; 94 95 if (loop_vinfo) 96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); 97 else 98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo); 99 100 return add_stmt_cost (target_cost_data, count, kind, stmt_info, 101 misalign, where); 102 } 103} 104 105/* Return a variable of type ELEM_TYPE[NELEMS]. */ 106 107static tree 108create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems) 109{ 110 return create_tmp_var (build_array_type_nelts (elem_type, nelems), 111 "vect_array"); 112} 113 114/* ARRAY is an array of vectors created by create_vector_array. 115 Return an SSA_NAME for the vector in index N. The reference 116 is part of the vectorization of STMT and the vector is associated 117 with scalar destination SCALAR_DEST. */ 118 119static tree 120read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest, 121 tree array, unsigned HOST_WIDE_INT n) 122{ 123 tree vect_type, vect, vect_name, array_ref; 124 gimple new_stmt; 125 126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE); 127 vect_type = TREE_TYPE (TREE_TYPE (array)); 128 vect = vect_create_destination_var (scalar_dest, vect_type); 129 array_ref = build4 (ARRAY_REF, vect_type, array, 130 build_int_cst (size_type_node, n), 131 NULL_TREE, NULL_TREE); 132 133 new_stmt = gimple_build_assign (vect, array_ref); 134 vect_name = make_ssa_name (vect, new_stmt); 135 gimple_assign_set_lhs (new_stmt, vect_name); 136 vect_finish_stmt_generation (stmt, new_stmt, gsi); 137 138 return vect_name; 139} 140 141/* ARRAY is an array of vectors created by create_vector_array. 142 Emit code to store SSA_NAME VECT in index N of the array. 143 The store is part of the vectorization of STMT. */ 144 145static void 146write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect, 147 tree array, unsigned HOST_WIDE_INT n) 148{ 149 tree array_ref; 150 gimple new_stmt; 151 152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array, 153 build_int_cst (size_type_node, n), 154 NULL_TREE, NULL_TREE); 155 156 new_stmt = gimple_build_assign (array_ref, vect); 157 vect_finish_stmt_generation (stmt, new_stmt, gsi); 158} 159 160/* PTR is a pointer to an array of type TYPE. Return a representation 161 of *PTR. The memory reference replaces those in FIRST_DR 162 (and its group). */ 163 164static tree 165create_array_ref (tree type, tree ptr, struct data_reference *first_dr) 166{ 167 tree mem_ref, alias_ptr_type; 168 169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr)); 170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0)); 171 /* Arrays have the same alignment as their type. */ 172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0); 173 return mem_ref; 174} 175 176/* Utility functions used by vect_mark_stmts_to_be_vectorized. */ 177 178/* Function vect_mark_relevant. 179 180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ 181 182static void 183vect_mark_relevant (vec<gimple> *worklist, gimple stmt, 184 enum vect_relevant relevant, bool live_p, 185 bool used_in_pattern) 186{ 187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); 189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); 190 gimple pattern_stmt; 191 192 if (dump_enabled_p ()) 193 dump_printf_loc (MSG_NOTE, vect_location, 194 "mark relevant %d, live %d.", relevant, live_p); 195 196 /* If this stmt is an original stmt in a pattern, we might need to mark its 197 related pattern stmt instead of the original stmt. However, such stmts 198 may have their own uses that are not in any pattern, in such cases the 199 stmt itself should be marked. */ 200 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 201 { 202 bool found = false; 203 if (!used_in_pattern) 204 { 205 imm_use_iterator imm_iter; 206 use_operand_p use_p; 207 gimple use_stmt; 208 tree lhs; 209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 210 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 211 212 if (is_gimple_assign (stmt)) 213 lhs = gimple_assign_lhs (stmt); 214 else 215 lhs = gimple_call_lhs (stmt); 216 217 /* This use is out of pattern use, if LHS has other uses that are 218 pattern uses, we should mark the stmt itself, and not the pattern 219 stmt. */ 220 if (TREE_CODE (lhs) == SSA_NAME) 221 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 222 { 223 if (is_gimple_debug (USE_STMT (use_p))) 224 continue; 225 use_stmt = USE_STMT (use_p); 226 227 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) 228 continue; 229 230 if (vinfo_for_stmt (use_stmt) 231 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) 232 { 233 found = true; 234 break; 235 } 236 } 237 } 238 239 if (!found) 240 { 241 /* This is the last stmt in a sequence that was detected as a 242 pattern that can potentially be vectorized. Don't mark the stmt 243 as relevant/live because it's not going to be vectorized. 244 Instead mark the pattern-stmt that replaces it. */ 245 246 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 247 248 if (dump_enabled_p ()) 249 dump_printf_loc (MSG_NOTE, vect_location, 250 "last stmt in pattern. don't mark" 251 " relevant/live."); 252 stmt_info = vinfo_for_stmt (pattern_stmt); 253 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); 254 save_relevant = STMT_VINFO_RELEVANT (stmt_info); 255 save_live_p = STMT_VINFO_LIVE_P (stmt_info); 256 stmt = pattern_stmt; 257 } 258 } 259 260 STMT_VINFO_LIVE_P (stmt_info) |= live_p; 261 if (relevant > STMT_VINFO_RELEVANT (stmt_info)) 262 STMT_VINFO_RELEVANT (stmt_info) = relevant; 263 264 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant 265 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p) 266 { 267 if (dump_enabled_p ()) 268 dump_printf_loc (MSG_NOTE, vect_location, 269 "already marked relevant/live."); 270 return; 271 } 272 273 worklist->safe_push (stmt); 274} 275 276 277/* Function vect_stmt_relevant_p. 278 279 Return true if STMT in loop that is represented by LOOP_VINFO is 280 "relevant for vectorization". 281 282 A stmt is considered "relevant for vectorization" if: 283 - it has uses outside the loop. 284 - it has vdefs (it alters memory). 285 - control stmts in the loop (except for the exit condition). 286 287 CHECKME: what other side effects would the vectorizer allow? */ 288 289static bool 290vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo, 291 enum vect_relevant *relevant, bool *live_p) 292{ 293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 294 ssa_op_iter op_iter; 295 imm_use_iterator imm_iter; 296 use_operand_p use_p; 297 def_operand_p def_p; 298 299 *relevant = vect_unused_in_scope; 300 *live_p = false; 301 302 /* cond stmt other than loop exit cond. */ 303 if (is_ctrl_stmt (stmt) 304 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) 305 != loop_exit_ctrl_vec_info_type) 306 *relevant = vect_used_in_scope; 307 308 /* changing memory. */ 309 if (gimple_code (stmt) != GIMPLE_PHI) 310 if (gimple_vdef (stmt)) 311 { 312 if (dump_enabled_p ()) 313 dump_printf_loc (MSG_NOTE, vect_location, 314 "vec_stmt_relevant_p: stmt has vdefs."); 315 *relevant = vect_used_in_scope; 316 } 317 318 /* uses outside the loop. */ 319 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF) 320 { 321 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) 322 { 323 basic_block bb = gimple_bb (USE_STMT (use_p)); 324 if (!flow_bb_inside_loop_p (loop, bb)) 325 { 326 if (dump_enabled_p ()) 327 dump_printf_loc (MSG_NOTE, vect_location, 328 "vec_stmt_relevant_p: used out of loop."); 329 330 if (is_gimple_debug (USE_STMT (use_p))) 331 continue; 332 333 /* We expect all such uses to be in the loop exit phis 334 (because of loop closed form) */ 335 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); 336 gcc_assert (bb == single_exit (loop)->dest); 337 338 *live_p = true; 339 } 340 } 341 } 342 343 return (*live_p || *relevant); 344} 345 346 347/* Function exist_non_indexing_operands_for_use_p 348 349 USE is one of the uses attached to STMT. Check if USE is 350 used in STMT for anything other than indexing an array. */ 351 352static bool 353exist_non_indexing_operands_for_use_p (tree use, gimple stmt) 354{ 355 tree operand; 356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 357 358 /* USE corresponds to some operand in STMT. If there is no data 359 reference in STMT, then any operand that corresponds to USE 360 is not indexing an array. */ 361 if (!STMT_VINFO_DATA_REF (stmt_info)) 362 return true; 363 364 /* STMT has a data_ref. FORNOW this means that its of one of 365 the following forms: 366 -1- ARRAY_REF = var 367 -2- var = ARRAY_REF 368 (This should have been verified in analyze_data_refs). 369 370 'var' in the second case corresponds to a def, not a use, 371 so USE cannot correspond to any operands that are not used 372 for array indexing. 373 374 Therefore, all we need to check is if STMT falls into the 375 first case, and whether var corresponds to USE. */ 376 377 if (!gimple_assign_copy_p (stmt)) 378 return false; 379 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME) 380 return false; 381 operand = gimple_assign_rhs1 (stmt); 382 if (TREE_CODE (operand) != SSA_NAME) 383 return false; 384 385 if (operand == use) 386 return true; 387 388 return false; 389} 390 391 392/* 393 Function process_use. 394 395 Inputs: 396 - a USE in STMT in a loop represented by LOOP_VINFO 397 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt 398 that defined USE. This is done by calling mark_relevant and passing it 399 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant). 400 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't 401 be performed. 402 403 Outputs: 404 Generally, LIVE_P and RELEVANT are used to define the liveness and 405 relevance info of the DEF_STMT of this USE: 406 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p 407 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant 408 Exceptions: 409 - case 1: If USE is used only for address computations (e.g. array indexing), 410 which does not need to be directly vectorized, then the liveness/relevance 411 of the respective DEF_STMT is left unchanged. 412 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we 413 skip DEF_STMT cause it had already been processed. 414 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will 415 be modified accordingly. 416 417 Return true if everything is as expected. Return false otherwise. */ 418 419static bool 420process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, 421 enum vect_relevant relevant, vec<gimple> *worklist, 422 bool force) 423{ 424 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 425 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 426 stmt_vec_info dstmt_vinfo; 427 basic_block bb, def_bb; 428 tree def; 429 gimple def_stmt; 430 enum vect_def_type dt; 431 432 /* case 1: we are only interested in uses that need to be vectorized. Uses 433 that are used for address computation are not considered relevant. */ 434 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt)) 435 return true; 436 437 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt)) 438 { 439 if (dump_enabled_p ()) 440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 441 "not vectorized: unsupported use in stmt."); 442 return false; 443 } 444 445 if (!def_stmt || gimple_nop_p (def_stmt)) 446 return true; 447 448 def_bb = gimple_bb (def_stmt); 449 if (!flow_bb_inside_loop_p (loop, def_bb)) 450 { 451 if (dump_enabled_p ()) 452 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop."); 453 return true; 454 } 455 456 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT). 457 DEF_STMT must have already been processed, because this should be the 458 only way that STMT, which is a reduction-phi, was put in the worklist, 459 as there should be no other uses for DEF_STMT in the loop. So we just 460 check that everything is as expected, and we are done. */ 461 dstmt_vinfo = vinfo_for_stmt (def_stmt); 462 bb = gimple_bb (stmt); 463 if (gimple_code (stmt) == GIMPLE_PHI 464 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 465 && gimple_code (def_stmt) != GIMPLE_PHI 466 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def 467 && bb->loop_father == def_bb->loop_father) 468 { 469 if (dump_enabled_p ()) 470 dump_printf_loc (MSG_NOTE, vect_location, 471 "reduc-stmt defining reduc-phi in the same nest."); 472 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo)) 473 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo)); 474 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction); 475 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo) 476 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope); 477 return true; 478 } 479 480 /* case 3a: outer-loop stmt defining an inner-loop stmt: 481 outer-loop-header-bb: 482 d = def_stmt 483 inner-loop: 484 stmt # use (d) 485 outer-loop-tail-bb: 486 ... */ 487 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father)) 488 { 489 if (dump_enabled_p ()) 490 dump_printf_loc (MSG_NOTE, vect_location, 491 "outer-loop def-stmt defining inner-loop stmt."); 492 493 switch (relevant) 494 { 495 case vect_unused_in_scope: 496 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ? 497 vect_used_in_scope : vect_unused_in_scope; 498 break; 499 500 case vect_used_in_outer_by_reduction: 501 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 502 relevant = vect_used_by_reduction; 503 break; 504 505 case vect_used_in_outer: 506 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 507 relevant = vect_used_in_scope; 508 break; 509 510 case vect_used_in_scope: 511 break; 512 513 default: 514 gcc_unreachable (); 515 } 516 } 517 518 /* case 3b: inner-loop stmt defining an outer-loop stmt: 519 outer-loop-header-bb: 520 ... 521 inner-loop: 522 d = def_stmt 523 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction): 524 stmt # use (d) */ 525 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father)) 526 { 527 if (dump_enabled_p ()) 528 dump_printf_loc (MSG_NOTE, vect_location, 529 "inner-loop def-stmt defining outer-loop stmt."); 530 531 switch (relevant) 532 { 533 case vect_unused_in_scope: 534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 535 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ? 536 vect_used_in_outer_by_reduction : vect_unused_in_scope; 537 break; 538 539 case vect_used_by_reduction: 540 relevant = vect_used_in_outer_by_reduction; 541 break; 542 543 case vect_used_in_scope: 544 relevant = vect_used_in_outer; 545 break; 546 547 default: 548 gcc_unreachable (); 549 } 550 } 551 552 vect_mark_relevant (worklist, def_stmt, relevant, live_p, 553 is_pattern_stmt_p (stmt_vinfo)); 554 return true; 555} 556 557 558/* Function vect_mark_stmts_to_be_vectorized. 559 560 Not all stmts in the loop need to be vectorized. For example: 561 562 for i... 563 for j... 564 1. T0 = i + j 565 2. T1 = a[T0] 566 567 3. j = j + 1 568 569 Stmt 1 and 3 do not need to be vectorized, because loop control and 570 addressing of vectorized data-refs are handled differently. 571 572 This pass detects such stmts. */ 573 574bool 575vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) 576{ 577 vec<gimple> worklist; 578 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 579 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 580 unsigned int nbbs = loop->num_nodes; 581 gimple_stmt_iterator si; 582 gimple stmt; 583 unsigned int i; 584 stmt_vec_info stmt_vinfo; 585 basic_block bb; 586 gimple phi; 587 bool live_p; 588 enum vect_relevant relevant, tmp_relevant; 589 enum vect_def_type def_type; 590 591 if (dump_enabled_p ()) 592 dump_printf_loc (MSG_NOTE, vect_location, 593 "=== vect_mark_stmts_to_be_vectorized ==="); 594 595 worklist.create (64); 596 597 /* 1. Init worklist. */ 598 for (i = 0; i < nbbs; i++) 599 { 600 bb = bbs[i]; 601 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) 602 { 603 phi = gsi_stmt (si); 604 if (dump_enabled_p ()) 605 { 606 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? "); 607 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); 608 } 609 610 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) 611 vect_mark_relevant (&worklist, phi, relevant, live_p, false); 612 } 613 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 614 { 615 stmt = gsi_stmt (si); 616 if (dump_enabled_p ()) 617 { 618 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? "); 619 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 620 } 621 622 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) 623 vect_mark_relevant (&worklist, stmt, relevant, live_p, false); 624 } 625 } 626 627 /* 2. Process_worklist */ 628 while (worklist.length () > 0) 629 { 630 use_operand_p use_p; 631 ssa_op_iter iter; 632 633 stmt = worklist.pop (); 634 if (dump_enabled_p ()) 635 { 636 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: "); 637 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 638 } 639 640 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it 641 (DEF_STMT) as relevant/irrelevant and live/dead according to the 642 liveness and relevance properties of STMT. */ 643 stmt_vinfo = vinfo_for_stmt (stmt); 644 relevant = STMT_VINFO_RELEVANT (stmt_vinfo); 645 live_p = STMT_VINFO_LIVE_P (stmt_vinfo); 646 647 /* Generally, the liveness and relevance properties of STMT are 648 propagated as is to the DEF_STMTs of its USEs: 649 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO) 650 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO) 651 652 One exception is when STMT has been identified as defining a reduction 653 variable; in this case we set the liveness/relevance as follows: 654 live_p = false 655 relevant = vect_used_by_reduction 656 This is because we distinguish between two kinds of relevant stmts - 657 those that are used by a reduction computation, and those that are 658 (also) used by a regular computation. This allows us later on to 659 identify stmts that are used solely by a reduction, and therefore the 660 order of the results that they produce does not have to be kept. */ 661 662 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo); 663 tmp_relevant = relevant; 664 switch (def_type) 665 { 666 case vect_reduction_def: 667 switch (tmp_relevant) 668 { 669 case vect_unused_in_scope: 670 relevant = vect_used_by_reduction; 671 break; 672 673 case vect_used_by_reduction: 674 if (gimple_code (stmt) == GIMPLE_PHI) 675 break; 676 /* fall through */ 677 678 default: 679 if (dump_enabled_p ()) 680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 681 "unsupported use of reduction."); 682 worklist.release (); 683 return false; 684 } 685 686 live_p = false; 687 break; 688 689 case vect_nested_cycle: 690 if (tmp_relevant != vect_unused_in_scope 691 && tmp_relevant != vect_used_in_outer_by_reduction 692 && tmp_relevant != vect_used_in_outer) 693 { 694 if (dump_enabled_p ()) 695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 696 "unsupported use of nested cycle."); 697 698 worklist.release (); 699 return false; 700 } 701 702 live_p = false; 703 break; 704 705 case vect_double_reduction_def: 706 if (tmp_relevant != vect_unused_in_scope 707 && tmp_relevant != vect_used_by_reduction) 708 { 709 if (dump_enabled_p ()) 710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 711 "unsupported use of double reduction."); 712 713 worklist.release (); 714 return false; 715 } 716 717 live_p = false; 718 break; 719 720 default: 721 break; 722 } 723 724 if (is_pattern_stmt_p (stmt_vinfo)) 725 { 726 /* Pattern statements are not inserted into the code, so 727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we 728 have to scan the RHS or function arguments instead. */ 729 if (is_gimple_assign (stmt)) 730 { 731 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 732 tree op = gimple_assign_rhs1 (stmt); 733 734 i = 1; 735 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op)) 736 { 737 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo, 738 live_p, relevant, &worklist, false) 739 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo, 740 live_p, relevant, &worklist, false)) 741 { 742 worklist.release (); 743 return false; 744 } 745 i = 2; 746 } 747 for (; i < gimple_num_ops (stmt); i++) 748 { 749 op = gimple_op (stmt, i); 750 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, 751 &worklist, false)) 752 { 753 worklist.release (); 754 return false; 755 } 756 } 757 } 758 else if (is_gimple_call (stmt)) 759 { 760 for (i = 0; i < gimple_call_num_args (stmt); i++) 761 { 762 tree arg = gimple_call_arg (stmt, i); 763 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant, 764 &worklist, false)) 765 { 766 worklist.release (); 767 return false; 768 } 769 } 770 } 771 } 772 else 773 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 774 { 775 tree op = USE_FROM_PTR (use_p); 776 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, 777 &worklist, false)) 778 { 779 worklist.release (); 780 return false; 781 } 782 } 783 784 if (STMT_VINFO_GATHER_P (stmt_vinfo)) 785 { 786 tree off; 787 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL); 788 gcc_assert (decl); 789 if (!process_use (stmt, off, loop_vinfo, live_p, relevant, 790 &worklist, true)) 791 { 792 worklist.release (); 793 return false; 794 } 795 } 796 } /* while worklist */ 797 798 worklist.release (); 799 return true; 800} 801 802 803/* Function vect_model_simple_cost. 804 805 Models cost for simple operations, i.e. those that only emit ncopies of a 806 single op. Right now, this does not account for multiple insns that could 807 be generated for the single vector op. We will handle that shortly. */ 808 809void 810vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, 811 enum vect_def_type *dt, 812 stmt_vector_for_cost *prologue_cost_vec, 813 stmt_vector_for_cost *body_cost_vec) 814{ 815 int i; 816 int inside_cost = 0, prologue_cost = 0; 817 818 /* The SLP costs were already calculated during SLP tree build. */ 819 if (PURE_SLP_STMT (stmt_info)) 820 return; 821 822 /* FORNOW: Assuming maximum 2 args per stmts. */ 823 for (i = 0; i < 2; i++) 824 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 825 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt, 826 stmt_info, 0, vect_prologue); 827 828 /* Pass the inside-of-loop statements to the target-specific cost model. */ 829 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt, 830 stmt_info, 0, vect_body); 831 832 if (dump_enabled_p ()) 833 dump_printf_loc (MSG_NOTE, vect_location, 834 "vect_model_simple_cost: inside_cost = %d, " 835 "prologue_cost = %d .", inside_cost, prologue_cost); 836} 837 838 839/* Model cost for type demotion and promotion operations. PWR is normally 840 zero for single-step promotions and demotions. It will be one if 841 two-step promotion/demotion is required, and so on. Each additional 842 step doubles the number of instructions required. */ 843 844static void 845vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, 846 enum vect_def_type *dt, int pwr) 847{ 848 int i, tmp; 849 int inside_cost = 0, prologue_cost = 0; 850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 852 void *target_cost_data; 853 854 /* The SLP costs were already calculated during SLP tree build. */ 855 if (PURE_SLP_STMT (stmt_info)) 856 return; 857 858 if (loop_vinfo) 859 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); 860 else 861 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo); 862 863 for (i = 0; i < pwr + 1; i++) 864 { 865 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ? 866 (i + 1) : i; 867 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp), 868 vec_promote_demote, stmt_info, 0, 869 vect_body); 870 } 871 872 /* FORNOW: Assuming maximum 2 args per stmts. */ 873 for (i = 0; i < 2; i++) 874 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 875 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt, 876 stmt_info, 0, vect_prologue); 877 878 if (dump_enabled_p ()) 879 dump_printf_loc (MSG_NOTE, vect_location, 880 "vect_model_promotion_demotion_cost: inside_cost = %d, " 881 "prologue_cost = %d .", inside_cost, prologue_cost); 882} 883 884/* Function vect_cost_group_size 885 886 For grouped load or store, return the group_size only if it is the first 887 load or store of a group, else return 1. This ensures that group size is 888 only returned once per group. */ 889 890static int 891vect_cost_group_size (stmt_vec_info stmt_info) 892{ 893 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 894 895 if (first_stmt == STMT_VINFO_STMT (stmt_info)) 896 return GROUP_SIZE (stmt_info); 897 898 return 1; 899} 900 901 902/* Function vect_model_store_cost 903 904 Models cost for stores. In the case of grouped accesses, one access 905 has the overhead of the grouped access attributed to it. */ 906 907void 908vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, 909 bool store_lanes_p, enum vect_def_type dt, 910 slp_tree slp_node, 911 stmt_vector_for_cost *prologue_cost_vec, 912 stmt_vector_for_cost *body_cost_vec) 913{ 914 int group_size; 915 unsigned int inside_cost = 0, prologue_cost = 0; 916 struct data_reference *first_dr; 917 gimple first_stmt; 918 919 /* The SLP costs were already calculated during SLP tree build. */ 920 if (PURE_SLP_STMT (stmt_info)) 921 return; 922 923 if (dt == vect_constant_def || dt == vect_external_def) 924 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec, 925 stmt_info, 0, vect_prologue); 926 927 /* Grouped access? */ 928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 929 { 930 if (slp_node) 931 { 932 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 933 group_size = 1; 934 } 935 else 936 { 937 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 938 group_size = vect_cost_group_size (stmt_info); 939 } 940 941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 942 } 943 /* Not a grouped access. */ 944 else 945 { 946 group_size = 1; 947 first_dr = STMT_VINFO_DATA_REF (stmt_info); 948 } 949 950 /* We assume that the cost of a single store-lanes instruction is 951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped 952 access is instead being provided by a permute-and-store operation, 953 include the cost of the permutes. */ 954 if (!store_lanes_p && group_size > 1) 955 { 956 /* Uses a high and low interleave operation for each needed permute. */ 957 958 int nstmts = ncopies * exact_log2 (group_size) * group_size; 959 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm, 960 stmt_info, 0, vect_body); 961 962 if (dump_enabled_p ()) 963 dump_printf_loc (MSG_NOTE, vect_location, 964 "vect_model_store_cost: strided group_size = %d .", 965 group_size); 966 } 967 968 /* Costs of the stores. */ 969 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec); 970 971 if (dump_enabled_p ()) 972 dump_printf_loc (MSG_NOTE, vect_location, 973 "vect_model_store_cost: inside_cost = %d, " 974 "prologue_cost = %d .", inside_cost, prologue_cost); 975} 976 977 978/* Calculate cost of DR's memory access. */ 979void 980vect_get_store_cost (struct data_reference *dr, int ncopies, 981 unsigned int *inside_cost, 982 stmt_vector_for_cost *body_cost_vec) 983{ 984 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 985 gimple stmt = DR_STMT (dr); 986 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 987 988 switch (alignment_support_scheme) 989 { 990 case dr_aligned: 991 { 992 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 993 vector_store, stmt_info, 0, 994 vect_body); 995 996 if (dump_enabled_p ()) 997 dump_printf_loc (MSG_NOTE, vect_location, 998 "vect_model_store_cost: aligned."); 999 break; 1000 } 1001 1002 case dr_unaligned_supported: 1003 { 1004 /* Here, we assign an additional cost for the unaligned store. */ 1005 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1006 unaligned_store, stmt_info, 1007 DR_MISALIGNMENT (dr), vect_body); 1008 if (dump_enabled_p ()) 1009 dump_printf_loc (MSG_NOTE, vect_location, 1010 "vect_model_store_cost: unaligned supported by " 1011 "hardware."); 1012 break; 1013 } 1014 1015 case dr_unaligned_unsupported: 1016 { 1017 *inside_cost = VECT_MAX_COST; 1018 1019 if (dump_enabled_p ()) 1020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1021 "vect_model_store_cost: unsupported access."); 1022 break; 1023 } 1024 1025 default: 1026 gcc_unreachable (); 1027 } 1028} 1029 1030 1031/* Function vect_model_load_cost 1032 1033 Models cost for loads. In the case of grouped accesses, the last access 1034 has the overhead of the grouped access attributed to it. Since unaligned 1035 accesses are supported for loads, we also account for the costs of the 1036 access scheme chosen. */ 1037 1038void 1039vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, 1040 bool load_lanes_p, slp_tree slp_node, 1041 stmt_vector_for_cost *prologue_cost_vec, 1042 stmt_vector_for_cost *body_cost_vec) 1043{ 1044 int group_size; 1045 gimple first_stmt; 1046 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; 1047 unsigned int inside_cost = 0, prologue_cost = 0; 1048 1049 /* The SLP costs were already calculated during SLP tree build. */ 1050 if (PURE_SLP_STMT (stmt_info)) 1051 return; 1052 1053 /* Grouped accesses? */ 1054 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 1055 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node) 1056 { 1057 group_size = vect_cost_group_size (stmt_info); 1058 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 1059 } 1060 /* Not a grouped access. */ 1061 else 1062 { 1063 group_size = 1; 1064 first_dr = dr; 1065 } 1066 1067 /* We assume that the cost of a single load-lanes instruction is 1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped 1069 access is instead being provided by a load-and-permute operation, 1070 include the cost of the permutes. */ 1071 if (!load_lanes_p && group_size > 1) 1072 { 1073 /* Uses an even and odd extract operations for each needed permute. */ 1074 int nstmts = ncopies * exact_log2 (group_size) * group_size; 1075 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm, 1076 stmt_info, 0, vect_body); 1077 1078 if (dump_enabled_p ()) 1079 dump_printf_loc (MSG_NOTE, vect_location, 1080 "vect_model_load_cost: strided group_size = %d .", 1081 group_size); 1082 } 1083 1084 /* The loads themselves. */ 1085 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) 1086 { 1087 /* N scalar loads plus gathering them into a vector. */ 1088 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1089 inside_cost += record_stmt_cost (body_cost_vec, 1090 ncopies * TYPE_VECTOR_SUBPARTS (vectype), 1091 scalar_load, stmt_info, 0, vect_body); 1092 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct, 1093 stmt_info, 0, vect_body); 1094 } 1095 else 1096 vect_get_load_cost (first_dr, ncopies, 1097 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info)) 1098 || group_size > 1 || slp_node), 1099 &inside_cost, &prologue_cost, 1100 prologue_cost_vec, body_cost_vec, true); 1101 1102 if (dump_enabled_p ()) 1103 dump_printf_loc (MSG_NOTE, vect_location, 1104 "vect_model_load_cost: inside_cost = %d, " 1105 "prologue_cost = %d .", inside_cost, prologue_cost); 1106} 1107 1108 1109/* Calculate cost of DR's memory access. */ 1110void 1111vect_get_load_cost (struct data_reference *dr, int ncopies, 1112 bool add_realign_cost, unsigned int *inside_cost, 1113 unsigned int *prologue_cost, 1114 stmt_vector_for_cost *prologue_cost_vec, 1115 stmt_vector_for_cost *body_cost_vec, 1116 bool record_prologue_costs) 1117{ 1118 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 1119 gimple stmt = DR_STMT (dr); 1120 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1121 1122 switch (alignment_support_scheme) 1123 { 1124 case dr_aligned: 1125 { 1126 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, 1127 stmt_info, 0, vect_body); 1128 1129 if (dump_enabled_p ()) 1130 dump_printf_loc (MSG_NOTE, vect_location, 1131 "vect_model_load_cost: aligned."); 1132 1133 break; 1134 } 1135 case dr_unaligned_supported: 1136 { 1137 /* Here, we assign an additional cost for the unaligned load. */ 1138 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1139 unaligned_load, stmt_info, 1140 DR_MISALIGNMENT (dr), vect_body); 1141 1142 if (dump_enabled_p ()) 1143 dump_printf_loc (MSG_NOTE, vect_location, 1144 "vect_model_load_cost: unaligned supported by " 1145 "hardware."); 1146 1147 break; 1148 } 1149 case dr_explicit_realign: 1150 { 1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2, 1152 vector_load, stmt_info, 0, vect_body); 1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1154 vec_perm, stmt_info, 0, vect_body); 1155 1156 /* FIXME: If the misalignment remains fixed across the iterations of 1157 the containing loop, the following cost should be added to the 1158 prologue costs. */ 1159 if (targetm.vectorize.builtin_mask_for_load) 1160 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt, 1161 stmt_info, 0, vect_body); 1162 1163 if (dump_enabled_p ()) 1164 dump_printf_loc (MSG_NOTE, vect_location, 1165 "vect_model_load_cost: explicit realign"); 1166 1167 break; 1168 } 1169 case dr_explicit_realign_optimized: 1170 { 1171 if (dump_enabled_p ()) 1172 dump_printf_loc (MSG_NOTE, vect_location, 1173 "vect_model_load_cost: unaligned software " 1174 "pipelined."); 1175 1176 /* Unaligned software pipeline has a load of an address, an initial 1177 load, and possibly a mask operation to "prime" the loop. However, 1178 if this is an access in a group of loads, which provide grouped 1179 access, then the above cost should only be considered for one 1180 access in the group. Inside the loop, there is a load op 1181 and a realignment op. */ 1182 1183 if (add_realign_cost && record_prologue_costs) 1184 { 1185 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2, 1186 vector_stmt, stmt_info, 1187 0, vect_prologue); 1188 if (targetm.vectorize.builtin_mask_for_load) 1189 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1, 1190 vector_stmt, stmt_info, 1191 0, vect_prologue); 1192 } 1193 1194 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, 1195 stmt_info, 0, vect_body); 1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm, 1197 stmt_info, 0, vect_body); 1198 1199 if (dump_enabled_p ()) 1200 dump_printf_loc (MSG_NOTE, vect_location, 1201 "vect_model_load_cost: explicit realign optimized"); 1202 1203 break; 1204 } 1205 1206 case dr_unaligned_unsupported: 1207 { 1208 *inside_cost = VECT_MAX_COST; 1209 1210 if (dump_enabled_p ()) 1211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1212 "vect_model_load_cost: unsupported access."); 1213 break; 1214 } 1215 1216 default: 1217 gcc_unreachable (); 1218 } 1219} 1220 1221/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in 1222 the loop preheader for the vectorized stmt STMT. */ 1223 1224static void 1225vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi) 1226{ 1227 if (gsi) 1228 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1229 else 1230 { 1231 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1233 1234 if (loop_vinfo) 1235 { 1236 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1237 basic_block new_bb; 1238 edge pe; 1239 1240 if (nested_in_vect_loop_p (loop, stmt)) 1241 loop = loop->inner; 1242 1243 pe = loop_preheader_edge (loop); 1244 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); 1245 gcc_assert (!new_bb); 1246 } 1247 else 1248 { 1249 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 1250 basic_block bb; 1251 gimple_stmt_iterator gsi_bb_start; 1252 1253 gcc_assert (bb_vinfo); 1254 bb = BB_VINFO_BB (bb_vinfo); 1255 gsi_bb_start = gsi_after_labels (bb); 1256 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT); 1257 } 1258 } 1259 1260 if (dump_enabled_p ()) 1261 { 1262 dump_printf_loc (MSG_NOTE, vect_location, 1263 "created new init_stmt: "); 1264 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0); 1265 } 1266} 1267 1268/* Function vect_init_vector. 1269 1270 Insert a new stmt (INIT_STMT) that initializes a new variable of type 1271 TYPE with the value VAL. If TYPE is a vector type and VAL does not have 1272 vector type a vector with all elements equal to VAL is created first. 1273 Place the initialization at BSI if it is not NULL. Otherwise, place the 1274 initialization at the loop preheader. 1275 Return the DEF of INIT_STMT. 1276 It will be used in the vectorization of STMT. */ 1277 1278tree 1279vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi) 1280{ 1281 tree new_var; 1282 gimple init_stmt; 1283 tree vec_oprnd; 1284 tree new_temp; 1285 1286 if (TREE_CODE (type) == VECTOR_TYPE 1287 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE) 1288 { 1289 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val))) 1290 { 1291 if (CONSTANT_CLASS_P (val)) 1292 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val); 1293 else 1294 { 1295 new_temp = make_ssa_name (TREE_TYPE (type), NULL); 1296 init_stmt = gimple_build_assign_with_ops (NOP_EXPR, 1297 new_temp, val, 1298 NULL_TREE); 1299 vect_init_vector_1 (stmt, init_stmt, gsi); 1300 val = new_temp; 1301 } 1302 } 1303 val = build_vector_from_val (type, val); 1304 } 1305 1306 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_"); 1307 init_stmt = gimple_build_assign (new_var, val); 1308 new_temp = make_ssa_name (new_var, init_stmt); 1309 gimple_assign_set_lhs (init_stmt, new_temp); 1310 vect_init_vector_1 (stmt, init_stmt, gsi); 1311 vec_oprnd = gimple_assign_lhs (init_stmt); 1312 return vec_oprnd; 1313} 1314 1315 1316/* Function vect_get_vec_def_for_operand. 1317 1318 OP is an operand in STMT. This function returns a (vector) def that will be 1319 used in the vectorized stmt for STMT. 1320 1321 In the case that OP is an SSA_NAME which is defined in the loop, then 1322 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. 1323 1324 In case OP is an invariant or constant, a new stmt that creates a vector def 1325 needs to be introduced. */ 1326 1327tree 1328vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def) 1329{ 1330 tree vec_oprnd; 1331 gimple vec_stmt; 1332 gimple def_stmt; 1333 stmt_vec_info def_stmt_info = NULL; 1334 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1335 unsigned int nunits; 1336 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1337 tree def; 1338 enum vect_def_type dt; 1339 bool is_simple_use; 1340 tree vector_type; 1341 1342 if (dump_enabled_p ()) 1343 { 1344 dump_printf_loc (MSG_NOTE, vect_location, 1345 "vect_get_vec_def_for_operand: "); 1346 dump_generic_expr (MSG_NOTE, TDF_SLIM, op); 1347 } 1348 1349 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL, 1350 &def_stmt, &def, &dt); 1351 gcc_assert (is_simple_use); 1352 if (dump_enabled_p ()) 1353 { 1354 int loc_printed = 0; 1355 if (def) 1356 { 1357 dump_printf_loc (MSG_NOTE, vect_location, "def = "); 1358 loc_printed = 1; 1359 dump_generic_expr (MSG_NOTE, TDF_SLIM, def); 1360 } 1361 if (def_stmt) 1362 { 1363 if (loc_printed) 1364 dump_printf (MSG_NOTE, " def_stmt = "); 1365 else 1366 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = "); 1367 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); 1368 } 1369 } 1370 1371 switch (dt) 1372 { 1373 /* Case 1: operand is a constant. */ 1374 case vect_constant_def: 1375 { 1376 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); 1377 gcc_assert (vector_type); 1378 nunits = TYPE_VECTOR_SUBPARTS (vector_type); 1379 1380 if (scalar_def) 1381 *scalar_def = op; 1382 1383 /* Create 'vect_cst_ = {cst,cst,...,cst}' */ 1384 if (dump_enabled_p ()) 1385 dump_printf_loc (MSG_NOTE, vect_location, 1386 "Create vector_cst. nunits = %d", nunits); 1387 1388 return vect_init_vector (stmt, op, vector_type, NULL); 1389 } 1390 1391 /* Case 2: operand is defined outside the loop - loop invariant. */ 1392 case vect_external_def: 1393 { 1394 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def)); 1395 gcc_assert (vector_type); 1396 1397 if (scalar_def) 1398 *scalar_def = def; 1399 1400 /* Create 'vec_inv = {inv,inv,..,inv}' */ 1401 if (dump_enabled_p ()) 1402 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv."); 1403 1404 return vect_init_vector (stmt, def, vector_type, NULL); 1405 } 1406 1407 /* Case 3: operand is defined inside the loop. */ 1408 case vect_internal_def: 1409 { 1410 if (scalar_def) 1411 *scalar_def = NULL/* FIXME tuples: def_stmt*/; 1412 1413 /* Get the def from the vectorized stmt. */ 1414 def_stmt_info = vinfo_for_stmt (def_stmt); 1415 1416 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 1417 /* Get vectorized pattern statement. */ 1418 if (!vec_stmt 1419 && STMT_VINFO_IN_PATTERN_P (def_stmt_info) 1420 && !STMT_VINFO_RELEVANT (def_stmt_info)) 1421 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt ( 1422 STMT_VINFO_RELATED_STMT (def_stmt_info))); 1423 gcc_assert (vec_stmt); 1424 if (gimple_code (vec_stmt) == GIMPLE_PHI) 1425 vec_oprnd = PHI_RESULT (vec_stmt); 1426 else if (is_gimple_call (vec_stmt)) 1427 vec_oprnd = gimple_call_lhs (vec_stmt); 1428 else 1429 vec_oprnd = gimple_assign_lhs (vec_stmt); 1430 return vec_oprnd; 1431 } 1432 1433 /* Case 4: operand is defined by a loop header phi - reduction */ 1434 case vect_reduction_def: 1435 case vect_double_reduction_def: 1436 case vect_nested_cycle: 1437 { 1438 struct loop *loop; 1439 1440 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); 1441 loop = (gimple_bb (def_stmt))->loop_father; 1442 1443 /* Get the def before the loop */ 1444 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); 1445 return get_initial_def_for_reduction (stmt, op, scalar_def); 1446 } 1447 1448 /* Case 5: operand is defined by loop-header phi - induction. */ 1449 case vect_induction_def: 1450 { 1451 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); 1452 1453 /* Get the def from the vectorized stmt. */ 1454 def_stmt_info = vinfo_for_stmt (def_stmt); 1455 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 1456 if (gimple_code (vec_stmt) == GIMPLE_PHI) 1457 vec_oprnd = PHI_RESULT (vec_stmt); 1458 else 1459 vec_oprnd = gimple_get_lhs (vec_stmt); 1460 return vec_oprnd; 1461 } 1462 1463 default: 1464 gcc_unreachable (); 1465 } 1466} 1467 1468 1469/* Function vect_get_vec_def_for_stmt_copy 1470 1471 Return a vector-def for an operand. This function is used when the 1472 vectorized stmt to be created (by the caller to this function) is a "copy" 1473 created in case the vectorized result cannot fit in one vector, and several 1474 copies of the vector-stmt are required. In this case the vector-def is 1475 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field 1476 of the stmt that defines VEC_OPRND. 1477 DT is the type of the vector def VEC_OPRND. 1478 1479 Context: 1480 In case the vectorization factor (VF) is bigger than the number 1481 of elements that can fit in a vectype (nunits), we have to generate 1482 more than one vector stmt to vectorize the scalar stmt. This situation 1483 arises when there are multiple data-types operated upon in the loop; the 1484 smallest data-type determines the VF, and as a result, when vectorizing 1485 stmts operating on wider types we need to create 'VF/nunits' "copies" of the 1486 vector stmt (each computing a vector of 'nunits' results, and together 1487 computing 'VF' results in each iteration). This function is called when 1488 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in 1489 which VF=16 and nunits=4, so the number of copies required is 4): 1490 1491 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT 1492 1493 S1: x = load VS1.0: vx.0 = memref0 VS1.1 1494 VS1.1: vx.1 = memref1 VS1.2 1495 VS1.2: vx.2 = memref2 VS1.3 1496 VS1.3: vx.3 = memref3 1497 1498 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1 1499 VSnew.1: vz1 = vx.1 + ... VSnew.2 1500 VSnew.2: vz2 = vx.2 + ... VSnew.3 1501 VSnew.3: vz3 = vx.3 + ... 1502 1503 The vectorization of S1 is explained in vectorizable_load. 1504 The vectorization of S2: 1505 To create the first vector-stmt out of the 4 copies - VSnew.0 - 1506 the function 'vect_get_vec_def_for_operand' is called to 1507 get the relevant vector-def for each operand of S2. For operand x it 1508 returns the vector-def 'vx.0'. 1509 1510 To create the remaining copies of the vector-stmt (VSnew.j), this 1511 function is called to get the relevant vector-def for each operand. It is 1512 obtained from the respective VS1.j stmt, which is recorded in the 1513 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND. 1514 1515 For example, to obtain the vector-def 'vx.1' in order to create the 1516 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'. 1517 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the 1518 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1', 1519 and return its def ('vx.1'). 1520 Overall, to create the above sequence this function will be called 3 times: 1521 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0); 1522 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1); 1523 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */ 1524 1525tree 1526vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) 1527{ 1528 gimple vec_stmt_for_operand; 1529 stmt_vec_info def_stmt_info; 1530 1531 /* Do nothing; can reuse same def. */ 1532 if (dt == vect_external_def || dt == vect_constant_def ) 1533 return vec_oprnd; 1534 1535 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd); 1536 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand); 1537 gcc_assert (def_stmt_info); 1538 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); 1539 gcc_assert (vec_stmt_for_operand); 1540 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); 1541 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI) 1542 vec_oprnd = PHI_RESULT (vec_stmt_for_operand); 1543 else 1544 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); 1545 return vec_oprnd; 1546} 1547 1548 1549/* Get vectorized definitions for the operands to create a copy of an original 1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */ 1551 1552static void 1553vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, 1554 vec<tree> *vec_oprnds0, 1555 vec<tree> *vec_oprnds1) 1556{ 1557 tree vec_oprnd = vec_oprnds0->pop (); 1558 1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd); 1560 vec_oprnds0->quick_push (vec_oprnd); 1561 1562 if (vec_oprnds1 && vec_oprnds1->length ()) 1563 { 1564 vec_oprnd = vec_oprnds1->pop (); 1565 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd); 1566 vec_oprnds1->quick_push (vec_oprnd); 1567 } 1568} 1569 1570 1571/* Get vectorized definitions for OP0 and OP1. 1572 REDUC_INDEX is the index of reduction operand in case of reduction, 1573 and -1 otherwise. */ 1574 1575void 1576vect_get_vec_defs (tree op0, tree op1, gimple stmt, 1577 vec<tree> *vec_oprnds0, 1578 vec<tree> *vec_oprnds1, 1579 slp_tree slp_node, int reduc_index) 1580{ 1581 if (slp_node) 1582 { 1583 int nops = (op1 == NULL_TREE) ? 1 : 2; 1584 vec<tree> ops; 1585 ops.create (nops); 1586 vec<vec<tree> > vec_defs; 1587 vec_defs.create (nops); 1588 1589 ops.quick_push (op0); 1590 if (op1) 1591 ops.quick_push (op1); 1592 1593 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index); 1594 1595 *vec_oprnds0 = vec_defs[0]; 1596 if (op1) 1597 *vec_oprnds1 = vec_defs[1]; 1598 1599 ops.release (); 1600 vec_defs.release (); 1601 } 1602 else 1603 { 1604 tree vec_oprnd; 1605 1606 vec_oprnds0->create (1); 1607 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL); 1608 vec_oprnds0->quick_push (vec_oprnd); 1609 1610 if (op1) 1611 { 1612 vec_oprnds1->create (1); 1613 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL); 1614 vec_oprnds1->quick_push (vec_oprnd); 1615 } 1616 } 1617} 1618 1619 1620/* Function vect_finish_stmt_generation. 1621 1622 Insert a new stmt. */ 1623 1624void 1625vect_finish_stmt_generation (gimple stmt, gimple vec_stmt, 1626 gimple_stmt_iterator *gsi) 1627{ 1628 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1629 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1630 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 1631 1632 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL); 1633 1634 if (!gsi_end_p (*gsi) 1635 && gimple_has_mem_ops (vec_stmt)) 1636 { 1637 gimple at_stmt = gsi_stmt (*gsi); 1638 tree vuse = gimple_vuse (at_stmt); 1639 if (vuse && TREE_CODE (vuse) == SSA_NAME) 1640 { 1641 tree vdef = gimple_vdef (at_stmt); 1642 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt)); 1643 /* If we have an SSA vuse and insert a store, update virtual 1644 SSA form to avoid triggering the renamer. Do so only 1645 if we can easily see all uses - which is what almost always 1646 happens with the way vectorized stmts are inserted. */ 1647 if ((vdef && TREE_CODE (vdef) == SSA_NAME) 1648 && ((is_gimple_assign (vec_stmt) 1649 && !is_gimple_reg (gimple_assign_lhs (vec_stmt))) 1650 || (is_gimple_call (vec_stmt) 1651 && !(gimple_call_flags (vec_stmt) 1652 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))))) 1653 { 1654 tree new_vdef = copy_ssa_name (vuse, vec_stmt); 1655 gimple_set_vdef (vec_stmt, new_vdef); 1656 SET_USE (gimple_vuse_op (at_stmt), new_vdef); 1657 } 1658 } 1659 } 1660 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT); 1661 1662 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo, 1663 bb_vinfo)); 1664 1665 if (dump_enabled_p ()) 1666 { 1667 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: "); 1668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0); 1669 } 1670 1671 gimple_set_location (vec_stmt, gimple_location (stmt)); 1672} 1673 1674/* Checks if CALL can be vectorized in type VECTYPE. Returns 1675 a function declaration if the target has a vectorized version 1676 of the function, or NULL_TREE if the function cannot be vectorized. */ 1677 1678tree 1679vectorizable_function (gimple call, tree vectype_out, tree vectype_in) 1680{ 1681 tree fndecl = gimple_call_fndecl (call); 1682 1683 /* We only handle functions that do not read or clobber memory -- i.e. 1684 const or novops ones. */ 1685 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS))) 1686 return NULL_TREE; 1687 1688 if (!fndecl 1689 || TREE_CODE (fndecl) != FUNCTION_DECL 1690 || !DECL_BUILT_IN (fndecl)) 1691 return NULL_TREE; 1692 1693 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out, 1694 vectype_in); 1695} 1696 1697/* Function vectorizable_call. 1698 1699 Check if STMT performs a function call that can be vectorized. 1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1701 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1703 1704static bool 1705vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, 1706 slp_tree slp_node) 1707{ 1708 tree vec_dest; 1709 tree scalar_dest; 1710 tree op, type; 1711 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info; 1713 tree vectype_out, vectype_in; 1714 int nunits_in; 1715 int nunits_out; 1716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1717 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 1718 tree fndecl, new_temp, def, rhs_type; 1719 gimple def_stmt; 1720 enum vect_def_type dt[3] 1721 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; 1722 gimple new_stmt = NULL; 1723 int ncopies, j; 1724 vec<tree> vargs = vNULL; 1725 enum { NARROW, NONE, WIDEN } modifier; 1726 size_t i, nargs; 1727 tree lhs; 1728 1729 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 1730 return false; 1731 1732 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 1733 return false; 1734 1735 /* Is STMT a vectorizable call? */ 1736 if (!is_gimple_call (stmt)) 1737 return false; 1738 1739 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 1740 return false; 1741 1742 if (stmt_can_throw_internal (stmt)) 1743 return false; 1744 1745 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 1746 1747 /* Process function arguments. */ 1748 rhs_type = NULL_TREE; 1749 vectype_in = NULL_TREE; 1750 nargs = gimple_call_num_args (stmt); 1751 1752 /* Bail out if the function has more than three arguments, we do not have 1753 interesting builtin functions to vectorize with more than two arguments 1754 except for fma. No arguments is also not good. */ 1755 if (nargs == 0 || nargs > 3) 1756 return false; 1757 1758 for (i = 0; i < nargs; i++) 1759 { 1760 tree opvectype; 1761 1762 op = gimple_call_arg (stmt, i); 1763 1764 /* We can only handle calls with arguments of the same type. */ 1765 if (rhs_type 1766 && !types_compatible_p (rhs_type, TREE_TYPE (op))) 1767 { 1768 if (dump_enabled_p ()) 1769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1770 "argument types differ."); 1771 return false; 1772 } 1773 if (!rhs_type) 1774 rhs_type = TREE_TYPE (op); 1775 1776 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo, 1777 &def_stmt, &def, &dt[i], &opvectype)) 1778 { 1779 if (dump_enabled_p ()) 1780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1781 "use not simple."); 1782 return false; 1783 } 1784 1785 if (!vectype_in) 1786 vectype_in = opvectype; 1787 else if (opvectype 1788 && opvectype != vectype_in) 1789 { 1790 if (dump_enabled_p ()) 1791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1792 "argument vector types differ."); 1793 return false; 1794 } 1795 } 1796 /* If all arguments are external or constant defs use a vector type with 1797 the same size as the output vector type. */ 1798 if (!vectype_in) 1799 vectype_in = get_same_sized_vectype (rhs_type, vectype_out); 1800 if (vec_stmt) 1801 gcc_assert (vectype_in); 1802 if (!vectype_in) 1803 { 1804 if (dump_enabled_p ()) 1805 { 1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1807 "no vectype for scalar type "); 1808 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type); 1809 } 1810 1811 return false; 1812 } 1813 1814 /* FORNOW */ 1815 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 1816 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 1817 if (nunits_in == nunits_out / 2) 1818 modifier = NARROW; 1819 else if (nunits_out == nunits_in) 1820 modifier = NONE; 1821 else if (nunits_out == nunits_in / 2) 1822 modifier = WIDEN; 1823 else 1824 return false; 1825 1826 /* For now, we only vectorize functions if a target specific builtin 1827 is available. TODO -- in some cases, it might be profitable to 1828 insert the calls for pieces of the vector, in order to be able 1829 to vectorize other operations in the loop. */ 1830 fndecl = vectorizable_function (stmt, vectype_out, vectype_in); 1831 if (fndecl == NULL_TREE) 1832 { 1833 if (dump_enabled_p ()) 1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1835 "function is not vectorizable."); 1836 1837 return false; 1838 } 1839 1840 gcc_assert (!gimple_vuse (stmt)); 1841 1842 if (slp_node || PURE_SLP_STMT (stmt_info)) 1843 ncopies = 1; 1844 else if (modifier == NARROW) 1845 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 1846 else 1847 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 1848 1849 /* Sanity check: make sure that at least one copy of the vectorized stmt 1850 needs to be generated. */ 1851 gcc_assert (ncopies >= 1); 1852 1853 if (!vec_stmt) /* transformation not required. */ 1854 { 1855 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 1856 if (dump_enabled_p ()) 1857 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="); 1858 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); 1859 return true; 1860 } 1861 1862 /** Transform. **/ 1863 1864 if (dump_enabled_p ()) 1865 dump_printf_loc (MSG_NOTE, vect_location, "transform call."); 1866 1867 /* Handle def. */ 1868 scalar_dest = gimple_call_lhs (stmt); 1869 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 1870 1871 prev_stmt_info = NULL; 1872 switch (modifier) 1873 { 1874 case NONE: 1875 for (j = 0; j < ncopies; ++j) 1876 { 1877 /* Build argument list for the vectorized call. */ 1878 if (j == 0) 1879 vargs.create (nargs); 1880 else 1881 vargs.truncate (0); 1882 1883 if (slp_node) 1884 { 1885 vec<vec<tree> > vec_defs; 1886 vec_defs.create (nargs); 1887 vec<tree> vec_oprnds0; 1888 1889 for (i = 0; i < nargs; i++) 1890 vargs.quick_push (gimple_call_arg (stmt, i)); 1891 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); 1892 vec_oprnds0 = vec_defs[0]; 1893 1894 /* Arguments are ready. Create the new vector stmt. */ 1895 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0) 1896 { 1897 size_t k; 1898 for (k = 0; k < nargs; k++) 1899 { 1900 vec<tree> vec_oprndsk = vec_defs[k]; 1901 vargs[k] = vec_oprndsk[i]; 1902 } 1903 new_stmt = gimple_build_call_vec (fndecl, vargs); 1904 new_temp = make_ssa_name (vec_dest, new_stmt); 1905 gimple_call_set_lhs (new_stmt, new_temp); 1906 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1907 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 1908 } 1909 1910 for (i = 0; i < nargs; i++) 1911 { 1912 vec<tree> vec_oprndsi = vec_defs[i]; 1913 vec_oprndsi.release (); 1914 } 1915 vec_defs.release (); 1916 continue; 1917 } 1918 1919 for (i = 0; i < nargs; i++) 1920 { 1921 op = gimple_call_arg (stmt, i); 1922 if (j == 0) 1923 vec_oprnd0 1924 = vect_get_vec_def_for_operand (op, stmt, NULL); 1925 else 1926 { 1927 vec_oprnd0 = gimple_call_arg (new_stmt, i); 1928 vec_oprnd0 1929 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 1930 } 1931 1932 vargs.quick_push (vec_oprnd0); 1933 } 1934 1935 new_stmt = gimple_build_call_vec (fndecl, vargs); 1936 new_temp = make_ssa_name (vec_dest, new_stmt); 1937 gimple_call_set_lhs (new_stmt, new_temp); 1938 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1939 1940 if (j == 0) 1941 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 1942 else 1943 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 1944 1945 prev_stmt_info = vinfo_for_stmt (new_stmt); 1946 } 1947 1948 break; 1949 1950 case NARROW: 1951 for (j = 0; j < ncopies; ++j) 1952 { 1953 /* Build argument list for the vectorized call. */ 1954 if (j == 0) 1955 vargs.create (nargs * 2); 1956 else 1957 vargs.truncate (0); 1958 1959 if (slp_node) 1960 { 1961 vec<vec<tree> > vec_defs; 1962 vec_defs.create (nargs); 1963 vec<tree> vec_oprnds0; 1964 1965 for (i = 0; i < nargs; i++) 1966 vargs.quick_push (gimple_call_arg (stmt, i)); 1967 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); 1968 vec_oprnds0 = vec_defs[0]; 1969 1970 /* Arguments are ready. Create the new vector stmt. */ 1971 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2) 1972 { 1973 size_t k; 1974 vargs.truncate (0); 1975 for (k = 0; k < nargs; k++) 1976 { 1977 vec<tree> vec_oprndsk = vec_defs[k]; 1978 vargs.quick_push (vec_oprndsk[i]); 1979 vargs.quick_push (vec_oprndsk[i + 1]); 1980 } 1981 new_stmt = gimple_build_call_vec (fndecl, vargs); 1982 new_temp = make_ssa_name (vec_dest, new_stmt); 1983 gimple_call_set_lhs (new_stmt, new_temp); 1984 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1985 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 1986 } 1987 1988 for (i = 0; i < nargs; i++) 1989 { 1990 vec<tree> vec_oprndsi = vec_defs[i]; 1991 vec_oprndsi.release (); 1992 } 1993 vec_defs.release (); 1994 continue; 1995 } 1996 1997 for (i = 0; i < nargs; i++) 1998 { 1999 op = gimple_call_arg (stmt, i); 2000 if (j == 0) 2001 { 2002 vec_oprnd0 2003 = vect_get_vec_def_for_operand (op, stmt, NULL); 2004 vec_oprnd1 2005 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 2006 } 2007 else 2008 { 2009 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1); 2010 vec_oprnd0 2011 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1); 2012 vec_oprnd1 2013 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 2014 } 2015 2016 vargs.quick_push (vec_oprnd0); 2017 vargs.quick_push (vec_oprnd1); 2018 } 2019 2020 new_stmt = gimple_build_call_vec (fndecl, vargs); 2021 new_temp = make_ssa_name (vec_dest, new_stmt); 2022 gimple_call_set_lhs (new_stmt, new_temp); 2023 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2024 2025 if (j == 0) 2026 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 2027 else 2028 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2029 2030 prev_stmt_info = vinfo_for_stmt (new_stmt); 2031 } 2032 2033 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 2034 2035 break; 2036 2037 case WIDEN: 2038 /* No current target implements this case. */ 2039 return false; 2040 } 2041 2042 vargs.release (); 2043 2044 /* Update the exception handling table with the vector stmt if necessary. */ 2045 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt)) 2046 gimple_purge_dead_eh_edges (gimple_bb (stmt)); 2047 2048 /* The call in STMT might prevent it from being removed in dce. 2049 We however cannot remove it here, due to the way the ssa name 2050 it defines is mapped to the new definition. So just replace 2051 rhs of the statement with something harmless. */ 2052 2053 if (slp_node) 2054 return true; 2055 2056 type = TREE_TYPE (scalar_dest); 2057 if (is_pattern_stmt_p (stmt_info)) 2058 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); 2059 else 2060 lhs = gimple_call_lhs (stmt); 2061 new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); 2062 set_vinfo_for_stmt (new_stmt, stmt_info); 2063 set_vinfo_for_stmt (stmt, NULL); 2064 STMT_VINFO_STMT (stmt_info) = new_stmt; 2065 gsi_replace (gsi, new_stmt, false); 2066 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; 2067 2068 return true; 2069} 2070 2071 2072/* Function vect_gen_widened_results_half 2073 2074 Create a vector stmt whose code, type, number of arguments, and result 2075 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are 2076 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI. 2077 In the case that CODE is a CALL_EXPR, this means that a call to DECL 2078 needs to be created (DECL is a function-decl of a target-builtin). 2079 STMT is the original scalar stmt that we are vectorizing. */ 2080 2081static gimple 2082vect_gen_widened_results_half (enum tree_code code, 2083 tree decl, 2084 tree vec_oprnd0, tree vec_oprnd1, int op_type, 2085 tree vec_dest, gimple_stmt_iterator *gsi, 2086 gimple stmt) 2087{ 2088 gimple new_stmt; 2089 tree new_temp; 2090 2091 /* Generate half of the widened result: */ 2092 if (code == CALL_EXPR) 2093 { 2094 /* Target specific support */ 2095 if (op_type == binary_op) 2096 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1); 2097 else 2098 new_stmt = gimple_build_call (decl, 1, vec_oprnd0); 2099 new_temp = make_ssa_name (vec_dest, new_stmt); 2100 gimple_call_set_lhs (new_stmt, new_temp); 2101 } 2102 else 2103 { 2104 /* Generic support */ 2105 gcc_assert (op_type == TREE_CODE_LENGTH (code)); 2106 if (op_type != binary_op) 2107 vec_oprnd1 = NULL; 2108 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0, 2109 vec_oprnd1); 2110 new_temp = make_ssa_name (vec_dest, new_stmt); 2111 gimple_assign_set_lhs (new_stmt, new_temp); 2112 } 2113 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2114 2115 return new_stmt; 2116} 2117 2118 2119/* Get vectorized definitions for loop-based vectorization. For the first 2120 operand we call vect_get_vec_def_for_operand() (with OPRND containing 2121 scalar operand), and for the rest we get a copy with 2122 vect_get_vec_def_for_stmt_copy() using the previous vector definition 2123 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details. 2124 The vectors are collected into VEC_OPRNDS. */ 2125 2126static void 2127vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt, 2128 vec<tree> *vec_oprnds, int multi_step_cvt) 2129{ 2130 tree vec_oprnd; 2131 2132 /* Get first vector operand. */ 2133 /* All the vector operands except the very first one (that is scalar oprnd) 2134 are stmt copies. */ 2135 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE) 2136 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL); 2137 else 2138 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd); 2139 2140 vec_oprnds->quick_push (vec_oprnd); 2141 2142 /* Get second vector operand. */ 2143 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd); 2144 vec_oprnds->quick_push (vec_oprnd); 2145 2146 *oprnd = vec_oprnd; 2147 2148 /* For conversion in multiple steps, continue to get operands 2149 recursively. */ 2150 if (multi_step_cvt) 2151 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1); 2152} 2153 2154 2155/* Create vectorized demotion statements for vector operands from VEC_OPRNDS. 2156 For multi-step conversions store the resulting vectors and call the function 2157 recursively. */ 2158 2159static void 2160vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds, 2161 int multi_step_cvt, gimple stmt, 2162 vec<tree> vec_dsts, 2163 gimple_stmt_iterator *gsi, 2164 slp_tree slp_node, enum tree_code code, 2165 stmt_vec_info *prev_stmt_info) 2166{ 2167 unsigned int i; 2168 tree vop0, vop1, new_tmp, vec_dest; 2169 gimple new_stmt; 2170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2171 2172 vec_dest = vec_dsts.pop (); 2173 2174 for (i = 0; i < vec_oprnds->length (); i += 2) 2175 { 2176 /* Create demotion operation. */ 2177 vop0 = (*vec_oprnds)[i]; 2178 vop1 = (*vec_oprnds)[i + 1]; 2179 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1); 2180 new_tmp = make_ssa_name (vec_dest, new_stmt); 2181 gimple_assign_set_lhs (new_stmt, new_tmp); 2182 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2183 2184 if (multi_step_cvt) 2185 /* Store the resulting vector for next recursive call. */ 2186 (*vec_oprnds)[i/2] = new_tmp; 2187 else 2188 { 2189 /* This is the last step of the conversion sequence. Store the 2190 vectors in SLP_NODE or in vector info of the scalar statement 2191 (or in STMT_VINFO_RELATED_STMT chain). */ 2192 if (slp_node) 2193 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 2194 else 2195 { 2196 if (!*prev_stmt_info) 2197 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 2198 else 2199 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt; 2200 2201 *prev_stmt_info = vinfo_for_stmt (new_stmt); 2202 } 2203 } 2204 } 2205 2206 /* For multi-step demotion operations we first generate demotion operations 2207 from the source type to the intermediate types, and then combine the 2208 results (stored in VEC_OPRNDS) in demotion operation to the destination 2209 type. */ 2210 if (multi_step_cvt) 2211 { 2212 /* At each level of recursion we have half of the operands we had at the 2213 previous level. */ 2214 vec_oprnds->truncate ((i+1)/2); 2215 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1, 2216 stmt, vec_dsts, gsi, slp_node, 2217 VEC_PACK_TRUNC_EXPR, 2218 prev_stmt_info); 2219 } 2220 2221 vec_dsts.quick_push (vec_dest); 2222} 2223 2224 2225/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0 2226 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store 2227 the resulting vectors and call the function recursively. */ 2228 2229static void 2230vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0, 2231 vec<tree> *vec_oprnds1, 2232 gimple stmt, tree vec_dest, 2233 gimple_stmt_iterator *gsi, 2234 enum tree_code code1, 2235 enum tree_code code2, tree decl1, 2236 tree decl2, int op_type) 2237{ 2238 int i; 2239 tree vop0, vop1, new_tmp1, new_tmp2; 2240 gimple new_stmt1, new_stmt2; 2241 vec<tree> vec_tmp = vNULL; 2242 2243 vec_tmp.create (vec_oprnds0->length () * 2); 2244 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0) 2245 { 2246 if (op_type == binary_op) 2247 vop1 = (*vec_oprnds1)[i]; 2248 else 2249 vop1 = NULL_TREE; 2250 2251 /* Generate the two halves of promotion operation. */ 2252 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1, 2253 op_type, vec_dest, gsi, stmt); 2254 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1, 2255 op_type, vec_dest, gsi, stmt); 2256 if (is_gimple_call (new_stmt1)) 2257 { 2258 new_tmp1 = gimple_call_lhs (new_stmt1); 2259 new_tmp2 = gimple_call_lhs (new_stmt2); 2260 } 2261 else 2262 { 2263 new_tmp1 = gimple_assign_lhs (new_stmt1); 2264 new_tmp2 = gimple_assign_lhs (new_stmt2); 2265 } 2266 2267 /* Store the results for the next step. */ 2268 vec_tmp.quick_push (new_tmp1); 2269 vec_tmp.quick_push (new_tmp2); 2270 } 2271 2272 vec_oprnds0->release (); 2273 *vec_oprnds0 = vec_tmp; 2274} 2275 2276 2277/* Check if STMT performs a conversion operation, that can be vectorized. 2278 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2279 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 2280 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2281 2282static bool 2283vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, 2284 gimple *vec_stmt, slp_tree slp_node) 2285{ 2286 tree vec_dest; 2287 tree scalar_dest; 2288 tree op0, op1 = NULL_TREE; 2289 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 2290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2291 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2292 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; 2293 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; 2294 tree decl1 = NULL_TREE, decl2 = NULL_TREE; 2295 tree new_temp; 2296 tree def; 2297 gimple def_stmt; 2298 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 2299 gimple new_stmt = NULL; 2300 stmt_vec_info prev_stmt_info; 2301 int nunits_in; 2302 int nunits_out; 2303 tree vectype_out, vectype_in; 2304 int ncopies, i, j; 2305 tree lhs_type, rhs_type; 2306 enum { NARROW, NONE, WIDEN } modifier; 2307 vec<tree> vec_oprnds0 = vNULL; 2308 vec<tree> vec_oprnds1 = vNULL; 2309 tree vop0; 2310 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2311 int multi_step_cvt = 0; 2312 vec<tree> vec_dsts = vNULL; 2313 vec<tree> interm_types = vNULL; 2314 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE; 2315 int op_type; 2316 enum machine_mode rhs_mode; 2317 unsigned short fltsz; 2318 2319 /* Is STMT a vectorizable conversion? */ 2320 2321 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 2322 return false; 2323 2324 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 2325 return false; 2326 2327 if (!is_gimple_assign (stmt)) 2328 return false; 2329 2330 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 2331 return false; 2332 2333 code = gimple_assign_rhs_code (stmt); 2334 if (!CONVERT_EXPR_CODE_P (code) 2335 && code != FIX_TRUNC_EXPR 2336 && code != FLOAT_EXPR 2337 && code != WIDEN_MULT_EXPR 2338 && code != WIDEN_LSHIFT_EXPR) 2339 return false; 2340 2341 op_type = TREE_CODE_LENGTH (code); 2342 2343 /* Check types of lhs and rhs. */ 2344 scalar_dest = gimple_assign_lhs (stmt); 2345 lhs_type = TREE_TYPE (scalar_dest); 2346 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 2347 2348 op0 = gimple_assign_rhs1 (stmt); 2349 rhs_type = TREE_TYPE (op0); 2350 2351 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 2352 && !((INTEGRAL_TYPE_P (lhs_type) 2353 && INTEGRAL_TYPE_P (rhs_type)) 2354 || (SCALAR_FLOAT_TYPE_P (lhs_type) 2355 && SCALAR_FLOAT_TYPE_P (rhs_type)))) 2356 return false; 2357 2358 if ((INTEGRAL_TYPE_P (lhs_type) 2359 && (TYPE_PRECISION (lhs_type) 2360 != GET_MODE_PRECISION (TYPE_MODE (lhs_type)))) 2361 || (INTEGRAL_TYPE_P (rhs_type) 2362 && (TYPE_PRECISION (rhs_type) 2363 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))) 2364 { 2365 if (dump_enabled_p ()) 2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2367 "type conversion to/from bit-precision unsupported."); 2368 return false; 2369 } 2370 2371 /* Check the operands of the operation. */ 2372 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo, 2373 &def_stmt, &def, &dt[0], &vectype_in)) 2374 { 2375 if (dump_enabled_p ()) 2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2377 "use not simple."); 2378 return false; 2379 } 2380 if (op_type == binary_op) 2381 { 2382 bool ok; 2383 2384 op1 = gimple_assign_rhs2 (stmt); 2385 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR); 2386 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of 2387 OP1. */ 2388 if (CONSTANT_CLASS_P (op0)) 2389 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, 2390 &def_stmt, &def, &dt[1], &vectype_in); 2391 else 2392 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt, 2393 &def, &dt[1]); 2394 2395 if (!ok) 2396 { 2397 if (dump_enabled_p ()) 2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2399 "use not simple."); 2400 return false; 2401 } 2402 } 2403 2404 /* If op0 is an external or constant defs use a vector type of 2405 the same size as the output vector type. */ 2406 if (!vectype_in) 2407 vectype_in = get_same_sized_vectype (rhs_type, vectype_out); 2408 if (vec_stmt) 2409 gcc_assert (vectype_in); 2410 if (!vectype_in) 2411 { 2412 if (dump_enabled_p ()) 2413 { 2414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2415 "no vectype for scalar type "); 2416 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type); 2417 } 2418 2419 return false; 2420 } 2421 2422 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 2423 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 2424 if (nunits_in < nunits_out) 2425 modifier = NARROW; 2426 else if (nunits_out == nunits_in) 2427 modifier = NONE; 2428 else 2429 modifier = WIDEN; 2430 2431 /* Multiple types in SLP are handled by creating the appropriate number of 2432 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 2433 case of SLP. */ 2434 if (slp_node || PURE_SLP_STMT (stmt_info)) 2435 ncopies = 1; 2436 else if (modifier == NARROW) 2437 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 2438 else 2439 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 2440 2441 /* Sanity check: make sure that at least one copy of the vectorized stmt 2442 needs to be generated. */ 2443 gcc_assert (ncopies >= 1); 2444 2445 /* Supportable by target? */ 2446 switch (modifier) 2447 { 2448 case NONE: 2449 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 2450 return false; 2451 if (supportable_convert_operation (code, vectype_out, vectype_in, 2452 &decl1, &code1)) 2453 break; 2454 /* FALLTHRU */ 2455 unsupported: 2456 if (dump_enabled_p ()) 2457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2458 "conversion not supported by target."); 2459 return false; 2460 2461 case WIDEN: 2462 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in, 2463 &code1, &code2, &multi_step_cvt, 2464 &interm_types)) 2465 { 2466 /* Binary widening operation can only be supported directly by the 2467 architecture. */ 2468 gcc_assert (!(multi_step_cvt && op_type == binary_op)); 2469 break; 2470 } 2471 2472 if (code != FLOAT_EXPR 2473 || (GET_MODE_SIZE (TYPE_MODE (lhs_type)) 2474 <= GET_MODE_SIZE (TYPE_MODE (rhs_type)))) 2475 goto unsupported; 2476 2477 rhs_mode = TYPE_MODE (rhs_type); 2478 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type)); 2479 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type)); 2480 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz; 2481 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode)) 2482 { 2483 cvt_type 2484 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 2485 cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 2486 if (cvt_type == NULL_TREE) 2487 goto unsupported; 2488 2489 if (GET_MODE_SIZE (rhs_mode) == fltsz) 2490 { 2491 if (!supportable_convert_operation (code, vectype_out, 2492 cvt_type, &decl1, &codecvt1)) 2493 goto unsupported; 2494 } 2495 else if (!supportable_widening_operation (code, stmt, vectype_out, 2496 cvt_type, &codecvt1, 2497 &codecvt2, &multi_step_cvt, 2498 &interm_types)) 2499 continue; 2500 else 2501 gcc_assert (multi_step_cvt == 0); 2502 2503 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type, 2504 vectype_in, &code1, &code2, 2505 &multi_step_cvt, &interm_types)) 2506 break; 2507 } 2508 2509 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz) 2510 goto unsupported; 2511 2512 if (GET_MODE_SIZE (rhs_mode) == fltsz) 2513 codecvt2 = ERROR_MARK; 2514 else 2515 { 2516 multi_step_cvt++; 2517 interm_types.safe_push (cvt_type); 2518 cvt_type = NULL_TREE; 2519 } 2520 break; 2521 2522 case NARROW: 2523 gcc_assert (op_type == unary_op); 2524 if (supportable_narrowing_operation (code, vectype_out, vectype_in, 2525 &code1, &multi_step_cvt, 2526 &interm_types)) 2527 break; 2528 2529 if (code != FIX_TRUNC_EXPR 2530 || (GET_MODE_SIZE (TYPE_MODE (lhs_type)) 2531 >= GET_MODE_SIZE (TYPE_MODE (rhs_type)))) 2532 goto unsupported; 2533 2534 rhs_mode = TYPE_MODE (rhs_type); 2535 cvt_type 2536 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 2537 cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 2538 if (cvt_type == NULL_TREE) 2539 goto unsupported; 2540 if (!supportable_convert_operation (code, cvt_type, vectype_in, 2541 &decl1, &codecvt1)) 2542 goto unsupported; 2543 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, 2544 &code1, &multi_step_cvt, 2545 &interm_types)) 2546 break; 2547 goto unsupported; 2548 2549 default: 2550 gcc_unreachable (); 2551 } 2552 2553 if (!vec_stmt) /* transformation not required. */ 2554 { 2555 if (dump_enabled_p ()) 2556 dump_printf_loc (MSG_NOTE, vect_location, 2557 "=== vectorizable_conversion ==="); 2558 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR) 2559 { 2560 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; 2561 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); 2562 } 2563 else if (modifier == NARROW) 2564 { 2565 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; 2566 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); 2567 } 2568 else 2569 { 2570 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; 2571 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); 2572 } 2573 interm_types.release (); 2574 return true; 2575 } 2576 2577 /** Transform. **/ 2578 if (dump_enabled_p ()) 2579 dump_printf_loc (MSG_NOTE, vect_location, 2580 "transform conversion. ncopies = %d.", ncopies); 2581 2582 if (op_type == binary_op) 2583 { 2584 if (CONSTANT_CLASS_P (op0)) 2585 op0 = fold_convert (TREE_TYPE (op1), op0); 2586 else if (CONSTANT_CLASS_P (op1)) 2587 op1 = fold_convert (TREE_TYPE (op0), op1); 2588 } 2589 2590 /* In case of multi-step conversion, we first generate conversion operations 2591 to the intermediate types, and then from that types to the final one. 2592 We create vector destinations for the intermediate type (TYPES) received 2593 from supportable_*_operation, and store them in the correct order 2594 for future use in vect_create_vectorized_*_stmts (). */ 2595 vec_dsts.create (multi_step_cvt + 1); 2596 vec_dest = vect_create_destination_var (scalar_dest, 2597 (cvt_type && modifier == WIDEN) 2598 ? cvt_type : vectype_out); 2599 vec_dsts.quick_push (vec_dest); 2600 2601 if (multi_step_cvt) 2602 { 2603 for (i = interm_types.length () - 1; 2604 interm_types.iterate (i, &intermediate_type); i--) 2605 { 2606 vec_dest = vect_create_destination_var (scalar_dest, 2607 intermediate_type); 2608 vec_dsts.quick_push (vec_dest); 2609 } 2610 } 2611 2612 if (cvt_type) 2613 vec_dest = vect_create_destination_var (scalar_dest, 2614 modifier == WIDEN 2615 ? vectype_out : cvt_type); 2616 2617 if (!slp_node) 2618 { 2619 if (modifier == WIDEN) 2620 { 2621 vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1); 2622 if (op_type == binary_op) 2623 vec_oprnds1.create (1); 2624 } 2625 else if (modifier == NARROW) 2626 vec_oprnds0.create ( 2627 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1)); 2628 } 2629 else if (code == WIDEN_LSHIFT_EXPR) 2630 vec_oprnds1.create (slp_node->vec_stmts_size); 2631 2632 last_oprnd = op0; 2633 prev_stmt_info = NULL; 2634 switch (modifier) 2635 { 2636 case NONE: 2637 for (j = 0; j < ncopies; j++) 2638 { 2639 if (j == 0) 2640 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node, 2641 -1); 2642 else 2643 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); 2644 2645 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 2646 { 2647 /* Arguments are ready, create the new vector stmt. */ 2648 if (code1 == CALL_EXPR) 2649 { 2650 new_stmt = gimple_build_call (decl1, 1, vop0); 2651 new_temp = make_ssa_name (vec_dest, new_stmt); 2652 gimple_call_set_lhs (new_stmt, new_temp); 2653 } 2654 else 2655 { 2656 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); 2657 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, 2658 vop0, NULL); 2659 new_temp = make_ssa_name (vec_dest, new_stmt); 2660 gimple_assign_set_lhs (new_stmt, new_temp); 2661 } 2662 2663 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2664 if (slp_node) 2665 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 2666 } 2667 2668 if (j == 0) 2669 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2670 else 2671 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2672 prev_stmt_info = vinfo_for_stmt (new_stmt); 2673 } 2674 break; 2675 2676 case WIDEN: 2677 /* In case the vectorization factor (VF) is bigger than the number 2678 of elements that we can fit in a vectype (nunits), we have to 2679 generate more than one vector stmt - i.e - we need to "unroll" 2680 the vector stmt by a factor VF/nunits. */ 2681 for (j = 0; j < ncopies; j++) 2682 { 2683 /* Handle uses. */ 2684 if (j == 0) 2685 { 2686 if (slp_node) 2687 { 2688 if (code == WIDEN_LSHIFT_EXPR) 2689 { 2690 unsigned int k; 2691 2692 vec_oprnd1 = op1; 2693 /* Store vec_oprnd1 for every vector stmt to be created 2694 for SLP_NODE. We check during the analysis that all 2695 the shift arguments are the same. */ 2696 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 2697 vec_oprnds1.quick_push (vec_oprnd1); 2698 2699 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 2700 slp_node, -1); 2701 } 2702 else 2703 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, 2704 &vec_oprnds1, slp_node, -1); 2705 } 2706 else 2707 { 2708 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); 2709 vec_oprnds0.quick_push (vec_oprnd0); 2710 if (op_type == binary_op) 2711 { 2712 if (code == WIDEN_LSHIFT_EXPR) 2713 vec_oprnd1 = op1; 2714 else 2715 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, 2716 NULL); 2717 vec_oprnds1.quick_push (vec_oprnd1); 2718 } 2719 } 2720 } 2721 else 2722 { 2723 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); 2724 vec_oprnds0.truncate (0); 2725 vec_oprnds0.quick_push (vec_oprnd0); 2726 if (op_type == binary_op) 2727 { 2728 if (code == WIDEN_LSHIFT_EXPR) 2729 vec_oprnd1 = op1; 2730 else 2731 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], 2732 vec_oprnd1); 2733 vec_oprnds1.truncate (0); 2734 vec_oprnds1.quick_push (vec_oprnd1); 2735 } 2736 } 2737 2738 /* Arguments are ready. Create the new vector stmts. */ 2739 for (i = multi_step_cvt; i >= 0; i--) 2740 { 2741 tree this_dest = vec_dsts[i]; 2742 enum tree_code c1 = code1, c2 = code2; 2743 if (i == 0 && codecvt2 != ERROR_MARK) 2744 { 2745 c1 = codecvt1; 2746 c2 = codecvt2; 2747 } 2748 vect_create_vectorized_promotion_stmts (&vec_oprnds0, 2749 &vec_oprnds1, 2750 stmt, this_dest, gsi, 2751 c1, c2, decl1, decl2, 2752 op_type); 2753 } 2754 2755 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 2756 { 2757 if (cvt_type) 2758 { 2759 if (codecvt1 == CALL_EXPR) 2760 { 2761 new_stmt = gimple_build_call (decl1, 1, vop0); 2762 new_temp = make_ssa_name (vec_dest, new_stmt); 2763 gimple_call_set_lhs (new_stmt, new_temp); 2764 } 2765 else 2766 { 2767 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 2768 new_temp = make_ssa_name (vec_dest, NULL); 2769 new_stmt = gimple_build_assign_with_ops (codecvt1, 2770 new_temp, 2771 vop0, NULL); 2772 } 2773 2774 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2775 } 2776 else 2777 new_stmt = SSA_NAME_DEF_STMT (vop0); 2778 2779 if (slp_node) 2780 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 2781 else 2782 { 2783 if (!prev_stmt_info) 2784 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 2785 else 2786 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2787 prev_stmt_info = vinfo_for_stmt (new_stmt); 2788 } 2789 } 2790 } 2791 2792 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 2793 break; 2794 2795 case NARROW: 2796 /* In case the vectorization factor (VF) is bigger than the number 2797 of elements that we can fit in a vectype (nunits), we have to 2798 generate more than one vector stmt - i.e - we need to "unroll" 2799 the vector stmt by a factor VF/nunits. */ 2800 for (j = 0; j < ncopies; j++) 2801 { 2802 /* Handle uses. */ 2803 if (slp_node) 2804 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 2805 slp_node, -1); 2806 else 2807 { 2808 vec_oprnds0.truncate (0); 2809 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0, 2810 vect_pow2 (multi_step_cvt) - 1); 2811 } 2812 2813 /* Arguments are ready. Create the new vector stmts. */ 2814 if (cvt_type) 2815 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 2816 { 2817 if (codecvt1 == CALL_EXPR) 2818 { 2819 new_stmt = gimple_build_call (decl1, 1, vop0); 2820 new_temp = make_ssa_name (vec_dest, new_stmt); 2821 gimple_call_set_lhs (new_stmt, new_temp); 2822 } 2823 else 2824 { 2825 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 2826 new_temp = make_ssa_name (vec_dest, NULL); 2827 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp, 2828 vop0, NULL); 2829 } 2830 2831 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2832 vec_oprnds0[i] = new_temp; 2833 } 2834 2835 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt, 2836 stmt, vec_dsts, gsi, 2837 slp_node, code1, 2838 &prev_stmt_info); 2839 } 2840 2841 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 2842 break; 2843 } 2844 2845 vec_oprnds0.release (); 2846 vec_oprnds1.release (); 2847 vec_dsts.release (); 2848 interm_types.release (); 2849 2850 return true; 2851} 2852 2853 2854/* Function vectorizable_assignment. 2855 2856 Check if STMT performs an assignment (copy) that can be vectorized. 2857 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2858 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 2859 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2860 2861static bool 2862vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi, 2863 gimple *vec_stmt, slp_tree slp_node) 2864{ 2865 tree vec_dest; 2866 tree scalar_dest; 2867 tree op; 2868 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2869 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2870 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2871 tree new_temp; 2872 tree def; 2873 gimple def_stmt; 2874 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 2875 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype); 2876 int ncopies; 2877 int i, j; 2878 vec<tree> vec_oprnds = vNULL; 2879 tree vop; 2880 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2881 gimple new_stmt = NULL; 2882 stmt_vec_info prev_stmt_info = NULL; 2883 enum tree_code code; 2884 tree vectype_in; 2885 2886 /* Multiple types in SLP are handled by creating the appropriate number of 2887 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 2888 case of SLP. */ 2889 if (slp_node || PURE_SLP_STMT (stmt_info)) 2890 ncopies = 1; 2891 else 2892 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 2893 2894 gcc_assert (ncopies >= 1); 2895 2896 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 2897 return false; 2898 2899 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 2900 return false; 2901 2902 /* Is vectorizable assignment? */ 2903 if (!is_gimple_assign (stmt)) 2904 return false; 2905 2906 scalar_dest = gimple_assign_lhs (stmt); 2907 if (TREE_CODE (scalar_dest) != SSA_NAME) 2908 return false; 2909 2910 code = gimple_assign_rhs_code (stmt); 2911 if (gimple_assign_single_p (stmt) 2912 || code == PAREN_EXPR 2913 || CONVERT_EXPR_CODE_P (code)) 2914 op = gimple_assign_rhs1 (stmt); 2915 else 2916 return false; 2917 2918 if (code == VIEW_CONVERT_EXPR) 2919 op = TREE_OPERAND (op, 0); 2920 2921 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo, 2922 &def_stmt, &def, &dt[0], &vectype_in)) 2923 { 2924 if (dump_enabled_p ()) 2925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2926 "use not simple."); 2927 return false; 2928 } 2929 2930 /* We can handle NOP_EXPR conversions that do not change the number 2931 of elements or the vector size. */ 2932 if ((CONVERT_EXPR_CODE_P (code) 2933 || code == VIEW_CONVERT_EXPR) 2934 && (!vectype_in 2935 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits 2936 || (GET_MODE_SIZE (TYPE_MODE (vectype)) 2937 != GET_MODE_SIZE (TYPE_MODE (vectype_in))))) 2938 return false; 2939 2940 /* We do not handle bit-precision changes. */ 2941 if ((CONVERT_EXPR_CODE_P (code) 2942 || code == VIEW_CONVERT_EXPR) 2943 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) 2944 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 2945 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) 2946 || ((TYPE_PRECISION (TREE_TYPE (op)) 2947 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op)))))) 2948 /* But a conversion that does not change the bit-pattern is ok. */ 2949 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 2950 > TYPE_PRECISION (TREE_TYPE (op))) 2951 && TYPE_UNSIGNED (TREE_TYPE (op)))) 2952 { 2953 if (dump_enabled_p ()) 2954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2955 "type conversion to/from bit-precision " 2956 "unsupported."); 2957 return false; 2958 } 2959 2960 if (!vec_stmt) /* transformation not required. */ 2961 { 2962 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 2963 if (dump_enabled_p ()) 2964 dump_printf_loc (MSG_NOTE, vect_location, 2965 "=== vectorizable_assignment ==="); 2966 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); 2967 return true; 2968 } 2969 2970 /** Transform. **/ 2971 if (dump_enabled_p ()) 2972 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment."); 2973 2974 /* Handle def. */ 2975 vec_dest = vect_create_destination_var (scalar_dest, vectype); 2976 2977 /* Handle use. */ 2978 for (j = 0; j < ncopies; j++) 2979 { 2980 /* Handle uses. */ 2981 if (j == 0) 2982 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); 2983 else 2984 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); 2985 2986 /* Arguments are ready. create the new vector stmt. */ 2987 FOR_EACH_VEC_ELT (vec_oprnds, i, vop) 2988 { 2989 if (CONVERT_EXPR_CODE_P (code) 2990 || code == VIEW_CONVERT_EXPR) 2991 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop); 2992 new_stmt = gimple_build_assign (vec_dest, vop); 2993 new_temp = make_ssa_name (vec_dest, new_stmt); 2994 gimple_assign_set_lhs (new_stmt, new_temp); 2995 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2996 if (slp_node) 2997 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 2998 } 2999 3000 if (slp_node) 3001 continue; 3002 3003 if (j == 0) 3004 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3005 else 3006 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3007 3008 prev_stmt_info = vinfo_for_stmt (new_stmt); 3009 } 3010 3011 vec_oprnds.release (); 3012 return true; 3013} 3014 3015 3016/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE 3017 either as shift by a scalar or by a vector. */ 3018 3019bool 3020vect_supportable_shift (enum tree_code code, tree scalar_type) 3021{ 3022 3023 enum machine_mode vec_mode; 3024 optab optab; 3025 int icode; 3026 tree vectype; 3027 3028 vectype = get_vectype_for_scalar_type (scalar_type); 3029 if (!vectype) 3030 return false; 3031 3032 optab = optab_for_tree_code (code, vectype, optab_scalar); 3033 if (!optab 3034 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) 3035 { 3036 optab = optab_for_tree_code (code, vectype, optab_vector); 3037 if (!optab 3038 || (optab_handler (optab, TYPE_MODE (vectype)) 3039 == CODE_FOR_nothing)) 3040 return false; 3041 } 3042 3043 vec_mode = TYPE_MODE (vectype); 3044 icode = (int) optab_handler (optab, vec_mode); 3045 if (icode == CODE_FOR_nothing) 3046 return false; 3047 3048 return true; 3049} 3050 3051 3052/* Function vectorizable_shift. 3053 3054 Check if STMT performs a shift operation that can be vectorized. 3055 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 3056 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 3057 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 3058 3059static bool 3060vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi, 3061 gimple *vec_stmt, slp_tree slp_node) 3062{ 3063 tree vec_dest; 3064 tree scalar_dest; 3065 tree op0, op1 = NULL; 3066 tree vec_oprnd1 = NULL_TREE; 3067 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3068 tree vectype; 3069 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3070 enum tree_code code; 3071 enum machine_mode vec_mode; 3072 tree new_temp; 3073 optab optab; 3074 int icode; 3075 enum machine_mode optab_op2_mode; 3076 tree def; 3077 gimple def_stmt; 3078 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 3079 gimple new_stmt = NULL; 3080 stmt_vec_info prev_stmt_info; 3081 int nunits_in; 3082 int nunits_out; 3083 tree vectype_out; 3084 tree op1_vectype; 3085 int ncopies; 3086 int j, i; 3087 vec<tree> vec_oprnds0 = vNULL; 3088 vec<tree> vec_oprnds1 = vNULL; 3089 tree vop0, vop1; 3090 unsigned int k; 3091 bool scalar_shift_arg = true; 3092 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3093 int vf; 3094 3095 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3096 return false; 3097 3098 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 3099 return false; 3100 3101 /* Is STMT a vectorizable binary/unary operation? */ 3102 if (!is_gimple_assign (stmt)) 3103 return false; 3104 3105 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 3106 return false; 3107 3108 code = gimple_assign_rhs_code (stmt); 3109 3110 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 3111 || code == RROTATE_EXPR)) 3112 return false; 3113 3114 scalar_dest = gimple_assign_lhs (stmt); 3115 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 3116 if (TYPE_PRECISION (TREE_TYPE (scalar_dest)) 3117 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) 3118 { 3119 if (dump_enabled_p ()) 3120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3121 "bit-precision shifts not supported."); 3122 return false; 3123 } 3124 3125 op0 = gimple_assign_rhs1 (stmt); 3126 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo, 3127 &def_stmt, &def, &dt[0], &vectype)) 3128 { 3129 if (dump_enabled_p ()) 3130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3131 "use not simple."); 3132 return false; 3133 } 3134 /* If op0 is an external or constant def use a vector type with 3135 the same size as the output vector type. */ 3136 if (!vectype) 3137 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out); 3138 if (vec_stmt) 3139 gcc_assert (vectype); 3140 if (!vectype) 3141 { 3142 if (dump_enabled_p ()) 3143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3144 "no vectype for scalar type "); 3145 return false; 3146 } 3147 3148 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 3149 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 3150 if (nunits_out != nunits_in) 3151 return false; 3152 3153 op1 = gimple_assign_rhs2 (stmt); 3154 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt, 3155 &def, &dt[1], &op1_vectype)) 3156 { 3157 if (dump_enabled_p ()) 3158 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3159 "use not simple."); 3160 return false; 3161 } 3162 3163 if (loop_vinfo) 3164 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 3165 else 3166 vf = 1; 3167 3168 /* Multiple types in SLP are handled by creating the appropriate number of 3169 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3170 case of SLP. */ 3171 if (slp_node || PURE_SLP_STMT (stmt_info)) 3172 ncopies = 1; 3173 else 3174 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 3175 3176 gcc_assert (ncopies >= 1); 3177 3178 /* Determine whether the shift amount is a vector, or scalar. If the 3179 shift/rotate amount is a vector, use the vector/vector shift optabs. */ 3180 3181 if (dt[1] == vect_internal_def && !slp_node) 3182 scalar_shift_arg = false; 3183 else if (dt[1] == vect_constant_def 3184 || dt[1] == vect_external_def 3185 || dt[1] == vect_internal_def) 3186 { 3187 /* In SLP, need to check whether the shift count is the same, 3188 in loops if it is a constant or invariant, it is always 3189 a scalar shift. */ 3190 if (slp_node) 3191 { 3192 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node); 3193 gimple slpstmt; 3194 3195 FOR_EACH_VEC_ELT (stmts, k, slpstmt) 3196 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0)) 3197 scalar_shift_arg = false; 3198 } 3199 } 3200 else 3201 { 3202 if (dump_enabled_p ()) 3203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3204 "operand mode requires invariant argument."); 3205 return false; 3206 } 3207 3208 /* Vector shifted by vector. */ 3209 if (!scalar_shift_arg) 3210 { 3211 optab = optab_for_tree_code (code, vectype, optab_vector); 3212 if (dump_enabled_p ()) 3213 dump_printf_loc (MSG_NOTE, vect_location, 3214 "vector/vector shift/rotate found."); 3215 3216 if (!op1_vectype) 3217 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out); 3218 if (op1_vectype == NULL_TREE 3219 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)) 3220 { 3221 if (dump_enabled_p ()) 3222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3223 "unusable type for last operand in" 3224 " vector/vector shift/rotate."); 3225 return false; 3226 } 3227 } 3228 /* See if the machine has a vector shifted by scalar insn and if not 3229 then see if it has a vector shifted by vector insn. */ 3230 else 3231 { 3232 optab = optab_for_tree_code (code, vectype, optab_scalar); 3233 if (optab 3234 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing) 3235 { 3236 if (dump_enabled_p ()) 3237 dump_printf_loc (MSG_NOTE, vect_location, 3238 "vector/scalar shift/rotate found."); 3239 } 3240 else 3241 { 3242 optab = optab_for_tree_code (code, vectype, optab_vector); 3243 if (optab 3244 && (optab_handler (optab, TYPE_MODE (vectype)) 3245 != CODE_FOR_nothing)) 3246 { 3247 scalar_shift_arg = false; 3248 3249 if (dump_enabled_p ()) 3250 dump_printf_loc (MSG_NOTE, vect_location, 3251 "vector/vector shift/rotate found."); 3252 3253 /* Unlike the other binary operators, shifts/rotates have 3254 the rhs being int, instead of the same type as the lhs, 3255 so make sure the scalar is the right type if we are 3256 dealing with vectors of long long/long/short/char. */ 3257 if (dt[1] == vect_constant_def) 3258 op1 = fold_convert (TREE_TYPE (vectype), op1); 3259 else if (!useless_type_conversion_p (TREE_TYPE (vectype), 3260 TREE_TYPE (op1))) 3261 { 3262 if (slp_node 3263 && TYPE_MODE (TREE_TYPE (vectype)) 3264 != TYPE_MODE (TREE_TYPE (op1))) 3265 { 3266 if (dump_enabled_p ()) 3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3268 "unusable type for last operand in" 3269 " vector/vector shift/rotate."); 3270 return false; 3271 } 3272 if (vec_stmt && !slp_node) 3273 { 3274 op1 = fold_convert (TREE_TYPE (vectype), op1); 3275 op1 = vect_init_vector (stmt, op1, 3276 TREE_TYPE (vectype), NULL); 3277 } 3278 } 3279 } 3280 } 3281 } 3282 3283 /* Supportable by target? */ 3284 if (!optab) 3285 { 3286 if (dump_enabled_p ()) 3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3288 "no optab."); 3289 return false; 3290 } 3291 vec_mode = TYPE_MODE (vectype); 3292 icode = (int) optab_handler (optab, vec_mode); 3293 if (icode == CODE_FOR_nothing) 3294 { 3295 if (dump_enabled_p ()) 3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3297 "op not supported by target."); 3298 /* Check only during analysis. */ 3299 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 3300 || (vf < vect_min_worthwhile_factor (code) 3301 && !vec_stmt)) 3302 return false; 3303 if (dump_enabled_p ()) 3304 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode."); 3305 } 3306 3307 /* Worthwhile without SIMD support? Check only during analysis. */ 3308 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 3309 && vf < vect_min_worthwhile_factor (code) 3310 && !vec_stmt) 3311 { 3312 if (dump_enabled_p ()) 3313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3314 "not worthwhile without SIMD support."); 3315 return false; 3316 } 3317 3318 if (!vec_stmt) /* transformation not required. */ 3319 { 3320 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; 3321 if (dump_enabled_p ()) 3322 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ==="); 3323 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); 3324 return true; 3325 } 3326 3327 /** Transform. **/ 3328 3329 if (dump_enabled_p ()) 3330 dump_printf_loc (MSG_NOTE, vect_location, 3331 "transform binary/unary operation."); 3332 3333 /* Handle def. */ 3334 vec_dest = vect_create_destination_var (scalar_dest, vectype); 3335 3336 prev_stmt_info = NULL; 3337 for (j = 0; j < ncopies; j++) 3338 { 3339 /* Handle uses. */ 3340 if (j == 0) 3341 { 3342 if (scalar_shift_arg) 3343 { 3344 /* Vector shl and shr insn patterns can be defined with scalar 3345 operand 2 (shift operand). In this case, use constant or loop 3346 invariant op1 directly, without extending it to vector mode 3347 first. */ 3348 optab_op2_mode = insn_data[icode].operand[2].mode; 3349 if (!VECTOR_MODE_P (optab_op2_mode)) 3350 { 3351 if (dump_enabled_p ()) 3352 dump_printf_loc (MSG_NOTE, vect_location, 3353 "operand 1 using scalar mode."); 3354 vec_oprnd1 = op1; 3355 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1); 3356 vec_oprnds1.quick_push (vec_oprnd1); 3357 if (slp_node) 3358 { 3359 /* Store vec_oprnd1 for every vector stmt to be created 3360 for SLP_NODE. We check during the analysis that all 3361 the shift arguments are the same. 3362 TODO: Allow different constants for different vector 3363 stmts generated for an SLP instance. */ 3364 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 3365 vec_oprnds1.quick_push (vec_oprnd1); 3366 } 3367 } 3368 } 3369 3370 /* vec_oprnd1 is available if operand 1 should be of a scalar-type 3371 (a special case for certain kind of vector shifts); otherwise, 3372 operand 1 should be of a vector type (the usual case). */ 3373 if (vec_oprnd1) 3374 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 3375 slp_node, -1); 3376 else 3377 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 3378 slp_node, -1); 3379 } 3380 else 3381 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 3382 3383 /* Arguments are ready. Create the new vector stmt. */ 3384 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 3385 { 3386 vop1 = vec_oprnds1[i]; 3387 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1); 3388 new_temp = make_ssa_name (vec_dest, new_stmt); 3389 gimple_assign_set_lhs (new_stmt, new_temp); 3390 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3391 if (slp_node) 3392 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 3393 } 3394 3395 if (slp_node) 3396 continue; 3397 3398 if (j == 0) 3399 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3400 else 3401 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3402 prev_stmt_info = vinfo_for_stmt (new_stmt); 3403 } 3404 3405 vec_oprnds0.release (); 3406 vec_oprnds1.release (); 3407 3408 return true; 3409} 3410 3411 3412static tree permute_vec_elements (tree, tree, tree, gimple, 3413 gimple_stmt_iterator *); 3414 3415 3416/* Function vectorizable_operation. 3417 3418 Check if STMT performs a binary, unary or ternary operation that can 3419 be vectorized. 3420 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 3421 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 3422 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 3423 3424static bool 3425vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, 3426 gimple *vec_stmt, slp_tree slp_node) 3427{ 3428 tree vec_dest; 3429 tree scalar_dest; 3430 tree op0, op1 = NULL_TREE, op2 = NULL_TREE; 3431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3432 tree vectype; 3433 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3434 enum tree_code code; 3435 enum machine_mode vec_mode; 3436 tree new_temp; 3437 int op_type; 3438 optab optab; 3439 int icode; 3440 tree def; 3441 gimple def_stmt; 3442 enum vect_def_type dt[3] 3443 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; 3444 gimple new_stmt = NULL; 3445 stmt_vec_info prev_stmt_info; 3446 int nunits_in; 3447 int nunits_out; 3448 tree vectype_out; 3449 int ncopies; 3450 int j, i; 3451 vec<tree> vec_oprnds0 = vNULL; 3452 vec<tree> vec_oprnds1 = vNULL; 3453 vec<tree> vec_oprnds2 = vNULL; 3454 tree vop0, vop1, vop2; 3455 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3456 int vf; 3457 3458 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3459 return false; 3460 3461 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 3462 return false; 3463 3464 /* Is STMT a vectorizable binary/unary operation? */ 3465 if (!is_gimple_assign (stmt)) 3466 return false; 3467 3468 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 3469 return false; 3470 3471 code = gimple_assign_rhs_code (stmt); 3472 3473 /* For pointer addition, we should use the normal plus for 3474 the vector addition. */ 3475 if (code == POINTER_PLUS_EXPR) 3476 code = PLUS_EXPR; 3477 3478 /* Support only unary or binary operations. */ 3479 op_type = TREE_CODE_LENGTH (code); 3480 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op) 3481 { 3482 if (dump_enabled_p ()) 3483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3484 "num. args = %d (not unary/binary/ternary op).", 3485 op_type); 3486 return false; 3487 } 3488 3489 scalar_dest = gimple_assign_lhs (stmt); 3490 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 3491 3492 /* Most operations cannot handle bit-precision types without extra 3493 truncations. */ 3494 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 3495 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) 3496 /* Exception are bitwise binary operations. */ 3497 && code != BIT_IOR_EXPR 3498 && code != BIT_XOR_EXPR 3499 && code != BIT_AND_EXPR) 3500 { 3501 if (dump_enabled_p ()) 3502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3503 "bit-precision arithmetic not supported."); 3504 return false; 3505 } 3506 3507 op0 = gimple_assign_rhs1 (stmt); 3508 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo, 3509 &def_stmt, &def, &dt[0], &vectype)) 3510 { 3511 if (dump_enabled_p ()) 3512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3513 "use not simple."); 3514 return false; 3515 } 3516 /* If op0 is an external or constant def use a vector type with 3517 the same size as the output vector type. */ 3518 if (!vectype) 3519 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out); 3520 if (vec_stmt) 3521 gcc_assert (vectype); 3522 if (!vectype) 3523 { 3524 if (dump_enabled_p ()) 3525 { 3526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3527 "no vectype for scalar type "); 3528 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, 3529 TREE_TYPE (op0)); 3530 } 3531 3532 return false; 3533 } 3534 3535 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 3536 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 3537 if (nunits_out != nunits_in) 3538 return false; 3539 3540 if (op_type == binary_op || op_type == ternary_op) 3541 { 3542 op1 = gimple_assign_rhs2 (stmt); 3543 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt, 3544 &def, &dt[1])) 3545 { 3546 if (dump_enabled_p ()) 3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3548 "use not simple."); 3549 return false; 3550 } 3551 } 3552 if (op_type == ternary_op) 3553 { 3554 op2 = gimple_assign_rhs3 (stmt); 3555 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt, 3556 &def, &dt[2])) 3557 { 3558 if (dump_enabled_p ()) 3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3560 "use not simple."); 3561 return false; 3562 } 3563 } 3564 3565 if (loop_vinfo) 3566 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 3567 else 3568 vf = 1; 3569 3570 /* Multiple types in SLP are handled by creating the appropriate number of 3571 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3572 case of SLP. */ 3573 if (slp_node || PURE_SLP_STMT (stmt_info)) 3574 ncopies = 1; 3575 else 3576 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 3577 3578 gcc_assert (ncopies >= 1); 3579 3580 /* Shifts are handled in vectorizable_shift (). */ 3581 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 3582 || code == RROTATE_EXPR) 3583 return false; 3584 3585 /* Supportable by target? */ 3586 3587 vec_mode = TYPE_MODE (vectype); 3588 if (code == MULT_HIGHPART_EXPR) 3589 { 3590 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype))) 3591 icode = LAST_INSN_CODE; 3592 else 3593 icode = CODE_FOR_nothing; 3594 } 3595 else 3596 { 3597 optab = optab_for_tree_code (code, vectype, optab_default); 3598 if (!optab) 3599 { 3600 if (dump_enabled_p ()) 3601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3602 "no optab."); 3603 return false; 3604 } 3605 icode = (int) optab_handler (optab, vec_mode); 3606 } 3607 3608 if (icode == CODE_FOR_nothing) 3609 { 3610 if (dump_enabled_p ()) 3611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3612 "op not supported by target."); 3613 /* Check only during analysis. */ 3614 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 3615 || (!vec_stmt && vf < vect_min_worthwhile_factor (code))) 3616 return false; 3617 if (dump_enabled_p ()) 3618 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode."); 3619 } 3620 3621 /* Worthwhile without SIMD support? Check only during analysis. */ 3622 if (!VECTOR_MODE_P (vec_mode) 3623 && !vec_stmt 3624 && vf < vect_min_worthwhile_factor (code)) 3625 { 3626 if (dump_enabled_p ()) 3627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3628 "not worthwhile without SIMD support."); 3629 return false; 3630 } 3631 3632 if (!vec_stmt) /* transformation not required. */ 3633 { 3634 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; 3635 if (dump_enabled_p ()) 3636 dump_printf_loc (MSG_NOTE, vect_location, 3637 "=== vectorizable_operation ==="); 3638 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); 3639 return true; 3640 } 3641 3642 /** Transform. **/ 3643 3644 if (dump_enabled_p ()) 3645 dump_printf_loc (MSG_NOTE, vect_location, 3646 "transform binary/unary operation."); 3647 3648 /* Handle def. */ 3649 vec_dest = vect_create_destination_var (scalar_dest, vectype); 3650 3651 /* In case the vectorization factor (VF) is bigger than the number 3652 of elements that we can fit in a vectype (nunits), we have to generate 3653 more than one vector stmt - i.e - we need to "unroll" the 3654 vector stmt by a factor VF/nunits. In doing so, we record a pointer 3655 from one copy of the vector stmt to the next, in the field 3656 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 3657 stages to find the correct vector defs to be used when vectorizing 3658 stmts that use the defs of the current stmt. The example below 3659 illustrates the vectorization process when VF=16 and nunits=4 (i.e., 3660 we need to create 4 vectorized stmts): 3661 3662 before vectorization: 3663 RELATED_STMT VEC_STMT 3664 S1: x = memref - - 3665 S2: z = x + 1 - - 3666 3667 step 1: vectorize stmt S1 (done in vectorizable_load. See more details 3668 there): 3669 RELATED_STMT VEC_STMT 3670 VS1_0: vx0 = memref0 VS1_1 - 3671 VS1_1: vx1 = memref1 VS1_2 - 3672 VS1_2: vx2 = memref2 VS1_3 - 3673 VS1_3: vx3 = memref3 - - 3674 S1: x = load - VS1_0 3675 S2: z = x + 1 - - 3676 3677 step2: vectorize stmt S2 (done here): 3678 To vectorize stmt S2 we first need to find the relevant vector 3679 def for the first operand 'x'. This is, as usual, obtained from 3680 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt 3681 that defines 'x' (S1). This way we find the stmt VS1_0, and the 3682 relevant vector def 'vx0'. Having found 'vx0' we can generate 3683 the vector stmt VS2_0, and as usual, record it in the 3684 STMT_VINFO_VEC_STMT of stmt S2. 3685 When creating the second copy (VS2_1), we obtain the relevant vector 3686 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of 3687 stmt VS1_0. This way we find the stmt VS1_1 and the relevant 3688 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a 3689 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0. 3690 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting 3691 chain of stmts and pointers: 3692 RELATED_STMT VEC_STMT 3693 VS1_0: vx0 = memref0 VS1_1 - 3694 VS1_1: vx1 = memref1 VS1_2 - 3695 VS1_2: vx2 = memref2 VS1_3 - 3696 VS1_3: vx3 = memref3 - - 3697 S1: x = load - VS1_0 3698 VS2_0: vz0 = vx0 + v1 VS2_1 - 3699 VS2_1: vz1 = vx1 + v1 VS2_2 - 3700 VS2_2: vz2 = vx2 + v1 VS2_3 - 3701 VS2_3: vz3 = vx3 + v1 - - 3702 S2: z = x + 1 - VS2_0 */ 3703 3704 prev_stmt_info = NULL; 3705 for (j = 0; j < ncopies; j++) 3706 { 3707 /* Handle uses. */ 3708 if (j == 0) 3709 { 3710 if (op_type == binary_op || op_type == ternary_op) 3711 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 3712 slp_node, -1); 3713 else 3714 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 3715 slp_node, -1); 3716 if (op_type == ternary_op) 3717 { 3718 vec_oprnds2.create (1); 3719 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2, 3720 stmt, 3721 NULL)); 3722 } 3723 } 3724 else 3725 { 3726 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 3727 if (op_type == ternary_op) 3728 { 3729 tree vec_oprnd = vec_oprnds2.pop (); 3730 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2], 3731 vec_oprnd)); 3732 } 3733 } 3734 3735 /* Arguments are ready. Create the new vector stmt. */ 3736 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 3737 { 3738 vop1 = ((op_type == binary_op || op_type == ternary_op) 3739 ? vec_oprnds1[i] : NULL_TREE); 3740 vop2 = ((op_type == ternary_op) 3741 ? vec_oprnds2[i] : NULL_TREE); 3742 new_stmt = gimple_build_assign_with_ops (code, vec_dest, 3743 vop0, vop1, vop2); 3744 new_temp = make_ssa_name (vec_dest, new_stmt); 3745 gimple_assign_set_lhs (new_stmt, new_temp); 3746 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3747 if (slp_node) 3748 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 3749 } 3750 3751 if (slp_node) 3752 continue; 3753 3754 if (j == 0) 3755 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3756 else 3757 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3758 prev_stmt_info = vinfo_for_stmt (new_stmt); 3759 } 3760 3761 vec_oprnds0.release (); 3762 vec_oprnds1.release (); 3763 vec_oprnds2.release (); 3764 3765 return true; 3766} 3767 3768 3769/* Function vectorizable_store. 3770 3771 Check if STMT defines a non scalar data-ref (array/pointer/structure) that 3772 can be vectorized. 3773 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 3774 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 3775 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 3776 3777static bool 3778vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, 3779 slp_tree slp_node) 3780{ 3781 tree scalar_dest; 3782 tree data_ref; 3783 tree op; 3784 tree vec_oprnd = NULL_TREE; 3785 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3786 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; 3787 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3788 tree elem_type; 3789 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3790 struct loop *loop = NULL; 3791 enum machine_mode vec_mode; 3792 tree dummy; 3793 enum dr_alignment_support alignment_support_scheme; 3794 tree def; 3795 gimple def_stmt; 3796 enum vect_def_type dt; 3797 stmt_vec_info prev_stmt_info = NULL; 3798 tree dataref_ptr = NULL_TREE; 3799 gimple ptr_incr = NULL; 3800 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 3801 int ncopies; 3802 int j; 3803 gimple next_stmt, first_stmt = NULL; 3804 bool grouped_store = false; 3805 bool store_lanes_p = false; 3806 unsigned int group_size, i; 3807 vec<tree> dr_chain = vNULL; 3808 vec<tree> oprnds = vNULL; 3809 vec<tree> result_chain = vNULL; 3810 bool inv_p; 3811 vec<tree> vec_oprnds = vNULL; 3812 bool slp = (slp_node != NULL); 3813 unsigned int vec_num; 3814 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3815 tree aggr_type; 3816 3817 if (loop_vinfo) 3818 loop = LOOP_VINFO_LOOP (loop_vinfo); 3819 3820 /* Multiple types in SLP are handled by creating the appropriate number of 3821 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3822 case of SLP. */ 3823 if (slp || PURE_SLP_STMT (stmt_info)) 3824 ncopies = 1; 3825 else 3826 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 3827 3828 gcc_assert (ncopies >= 1); 3829 3830 /* FORNOW. This restriction should be relaxed. */ 3831 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1) 3832 { 3833 if (dump_enabled_p ()) 3834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3835 "multiple types in nested loop."); 3836 return false; 3837 } 3838 3839 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3840 return false; 3841 3842 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 3843 return false; 3844 3845 /* Is vectorizable store? */ 3846 3847 if (!is_gimple_assign (stmt)) 3848 return false; 3849 3850 scalar_dest = gimple_assign_lhs (stmt); 3851 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR 3852 && is_pattern_stmt_p (stmt_info)) 3853 scalar_dest = TREE_OPERAND (scalar_dest, 0); 3854 if (TREE_CODE (scalar_dest) != ARRAY_REF 3855 && TREE_CODE (scalar_dest) != INDIRECT_REF 3856 && TREE_CODE (scalar_dest) != COMPONENT_REF 3857 && TREE_CODE (scalar_dest) != IMAGPART_EXPR 3858 && TREE_CODE (scalar_dest) != REALPART_EXPR 3859 && TREE_CODE (scalar_dest) != MEM_REF) 3860 return false; 3861 3862 gcc_assert (gimple_assign_single_p (stmt)); 3863 op = gimple_assign_rhs1 (stmt); 3864 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt, 3865 &def, &dt)) 3866 { 3867 if (dump_enabled_p ()) 3868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3869 "use not simple."); 3870 return false; 3871 } 3872 3873 elem_type = TREE_TYPE (vectype); 3874 vec_mode = TYPE_MODE (vectype); 3875 3876 /* FORNOW. In some cases can vectorize even if data-type not supported 3877 (e.g. - array initialization with 0). */ 3878 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing) 3879 return false; 3880 3881 if (!STMT_VINFO_DATA_REF (stmt_info)) 3882 return false; 3883 3884 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt) 3885 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr), 3886 size_zero_node) < 0) 3887 { 3888 if (dump_enabled_p ()) 3889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3890 "negative step for store."); 3891 return false; 3892 } 3893 3894 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 3895 { 3896 grouped_store = true; 3897 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 3898 if (!slp && !PURE_SLP_STMT (stmt_info)) 3899 { 3900 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 3901 if (vect_store_lanes_supported (vectype, group_size)) 3902 store_lanes_p = true; 3903 else if (!vect_grouped_store_supported (vectype, group_size)) 3904 return false; 3905 } 3906 3907 if (first_stmt == stmt) 3908 { 3909 /* STMT is the leader of the group. Check the operands of all the 3910 stmts of the group. */ 3911 next_stmt = GROUP_NEXT_ELEMENT (stmt_info); 3912 while (next_stmt) 3913 { 3914 gcc_assert (gimple_assign_single_p (next_stmt)); 3915 op = gimple_assign_rhs1 (next_stmt); 3916 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo, 3917 &def_stmt, &def, &dt)) 3918 { 3919 if (dump_enabled_p ()) 3920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3921 "use not simple."); 3922 return false; 3923 } 3924 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 3925 } 3926 } 3927 } 3928 3929 if (!vec_stmt) /* transformation not required. */ 3930 { 3931 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; 3932 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, 3933 NULL, NULL, NULL); 3934 return true; 3935 } 3936 3937 /** Transform. **/ 3938 3939 if (grouped_store) 3940 { 3941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 3942 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 3943 3944 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++; 3945 3946 /* FORNOW */ 3947 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt)); 3948 3949 /* We vectorize all the stmts of the interleaving group when we 3950 reach the last stmt in the group. */ 3951 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt)) 3952 < GROUP_SIZE (vinfo_for_stmt (first_stmt)) 3953 && !slp) 3954 { 3955 *vec_stmt = NULL; 3956 return true; 3957 } 3958 3959 if (slp) 3960 { 3961 grouped_store = false; 3962 /* VEC_NUM is the number of vect stmts to be created for this 3963 group. */ 3964 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 3965 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 3966 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 3967 op = gimple_assign_rhs1 (first_stmt); 3968 } 3969 else 3970 /* VEC_NUM is the number of vect stmts to be created for this 3971 group. */ 3972 vec_num = group_size; 3973 } 3974 else 3975 { 3976 first_stmt = stmt; 3977 first_dr = dr; 3978 group_size = vec_num = 1; 3979 } 3980 3981 if (dump_enabled_p ()) 3982 dump_printf_loc (MSG_NOTE, vect_location, 3983 "transform store. ncopies = %d", ncopies); 3984 3985 dr_chain.create (group_size); 3986 oprnds.create (group_size); 3987 3988 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); 3989 gcc_assert (alignment_support_scheme); 3990 /* Targets with store-lane instructions must not require explicit 3991 realignment. */ 3992 gcc_assert (!store_lanes_p 3993 || alignment_support_scheme == dr_aligned 3994 || alignment_support_scheme == dr_unaligned_supported); 3995 3996 if (store_lanes_p) 3997 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 3998 else 3999 aggr_type = vectype; 4000 4001 /* In case the vectorization factor (VF) is bigger than the number 4002 of elements that we can fit in a vectype (nunits), we have to generate 4003 more than one vector stmt - i.e - we need to "unroll" the 4004 vector stmt by a factor VF/nunits. For more details see documentation in 4005 vect_get_vec_def_for_copy_stmt. */ 4006 4007 /* In case of interleaving (non-unit grouped access): 4008 4009 S1: &base + 2 = x2 4010 S2: &base = x0 4011 S3: &base + 1 = x1 4012 S4: &base + 3 = x3 4013 4014 We create vectorized stores starting from base address (the access of the 4015 first stmt in the chain (S2 in the above example), when the last store stmt 4016 of the chain (S4) is reached: 4017 4018 VS1: &base = vx2 4019 VS2: &base + vec_size*1 = vx0 4020 VS3: &base + vec_size*2 = vx1 4021 VS4: &base + vec_size*3 = vx3 4022 4023 Then permutation statements are generated: 4024 4025 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} > 4026 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} > 4027 ... 4028 4029 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 4030 (the order of the data-refs in the output of vect_permute_store_chain 4031 corresponds to the order of scalar stmts in the interleaving chain - see 4032 the documentation of vect_permute_store_chain()). 4033 4034 In case of both multiple types and interleaving, above vector stores and 4035 permutation stmts are created for every copy. The result vector stmts are 4036 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding 4037 STMT_VINFO_RELATED_STMT for the next copies. 4038 */ 4039 4040 prev_stmt_info = NULL; 4041 for (j = 0; j < ncopies; j++) 4042 { 4043 gimple new_stmt; 4044 4045 if (j == 0) 4046 { 4047 if (slp) 4048 { 4049 /* Get vectorized arguments for SLP_NODE. */ 4050 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, 4051 NULL, slp_node, -1); 4052 4053 vec_oprnd = vec_oprnds[0]; 4054 } 4055 else 4056 { 4057 /* For interleaved stores we collect vectorized defs for all the 4058 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then 4059 used as an input to vect_permute_store_chain(), and OPRNDS as 4060 an input to vect_get_vec_def_for_stmt_copy() for the next copy. 4061 4062 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and 4063 OPRNDS are of size 1. */ 4064 next_stmt = first_stmt; 4065 for (i = 0; i < group_size; i++) 4066 { 4067 /* Since gaps are not supported for interleaved stores, 4068 GROUP_SIZE is the exact number of stmts in the chain. 4069 Therefore, NEXT_STMT can't be NULL_TREE. In case that 4070 there is no interleaving, GROUP_SIZE is 1, and only one 4071 iteration of the loop will be executed. */ 4072 gcc_assert (next_stmt 4073 && gimple_assign_single_p (next_stmt)); 4074 op = gimple_assign_rhs1 (next_stmt); 4075 4076 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, 4077 NULL); 4078 dr_chain.quick_push (vec_oprnd); 4079 oprnds.quick_push (vec_oprnd); 4080 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 4081 } 4082 } 4083 4084 /* We should have catched mismatched types earlier. */ 4085 gcc_assert (useless_type_conversion_p (vectype, 4086 TREE_TYPE (vec_oprnd))); 4087 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL, 4088 NULL_TREE, &dummy, gsi, 4089 &ptr_incr, false, &inv_p); 4090 gcc_assert (bb_vinfo || !inv_p); 4091 } 4092 else 4093 { 4094 /* For interleaved stores we created vectorized defs for all the 4095 defs stored in OPRNDS in the previous iteration (previous copy). 4096 DR_CHAIN is then used as an input to vect_permute_store_chain(), 4097 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the 4098 next copy. 4099 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and 4100 OPRNDS are of size 1. */ 4101 for (i = 0; i < group_size; i++) 4102 { 4103 op = oprnds[i]; 4104 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt, 4105 &def, &dt); 4106 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op); 4107 dr_chain[i] = vec_oprnd; 4108 oprnds[i] = vec_oprnd; 4109 } 4110 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 4111 TYPE_SIZE_UNIT (aggr_type)); 4112 } 4113 4114 if (store_lanes_p) 4115 { 4116 tree vec_array; 4117 4118 /* Combine all the vectors into an array. */ 4119 vec_array = create_vector_array (vectype, vec_num); 4120 for (i = 0; i < vec_num; i++) 4121 { 4122 vec_oprnd = dr_chain[i]; 4123 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i); 4124 } 4125 4126 /* Emit: 4127 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */ 4128 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); 4129 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array); 4130 gimple_call_set_lhs (new_stmt, data_ref); 4131 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4132 } 4133 else 4134 { 4135 new_stmt = NULL; 4136 if (grouped_store) 4137 { 4138 if (j == 0) 4139 result_chain.create (group_size); 4140 /* Permute. */ 4141 vect_permute_store_chain (dr_chain, group_size, stmt, gsi, 4142 &result_chain); 4143 } 4144 4145 next_stmt = first_stmt; 4146 for (i = 0; i < vec_num; i++) 4147 { 4148 unsigned align, misalign; 4149 4150 if (i > 0) 4151 /* Bump the vector pointer. */ 4152 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 4153 stmt, NULL_TREE); 4154 4155 if (slp) 4156 vec_oprnd = vec_oprnds[i]; 4157 else if (grouped_store) 4158 /* For grouped stores vectorized defs are interleaved in 4159 vect_permute_store_chain(). */ 4160 vec_oprnd = result_chain[i]; 4161 4162 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr, 4163 build_int_cst (reference_alias_ptr_type 4164 (DR_REF (first_dr)), 0)); 4165 align = TYPE_ALIGN_UNIT (vectype); 4166 if (aligned_access_p (first_dr)) 4167 misalign = 0; 4168 else if (DR_MISALIGNMENT (first_dr) == -1) 4169 { 4170 TREE_TYPE (data_ref) 4171 = build_aligned_type (TREE_TYPE (data_ref), 4172 TYPE_ALIGN (elem_type)); 4173 align = TYPE_ALIGN_UNIT (elem_type); 4174 misalign = 0; 4175 } 4176 else 4177 { 4178 TREE_TYPE (data_ref) 4179 = build_aligned_type (TREE_TYPE (data_ref), 4180 TYPE_ALIGN (elem_type)); 4181 misalign = DR_MISALIGNMENT (first_dr); 4182 } 4183 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, 4184 misalign); 4185 4186 /* Arguments are ready. Create the new vector stmt. */ 4187 new_stmt = gimple_build_assign (data_ref, vec_oprnd); 4188 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4189 4190 if (slp) 4191 continue; 4192 4193 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 4194 if (!next_stmt) 4195 break; 4196 } 4197 } 4198 if (!slp) 4199 { 4200 if (j == 0) 4201 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4202 else 4203 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4204 prev_stmt_info = vinfo_for_stmt (new_stmt); 4205 } 4206 } 4207 4208 dr_chain.release (); 4209 oprnds.release (); 4210 result_chain.release (); 4211 vec_oprnds.release (); 4212 4213 return true; 4214} 4215 4216/* Given a vector type VECTYPE and permutation SEL returns 4217 the VECTOR_CST mask that implements the permutation of the 4218 vector elements. If that is impossible to do, returns NULL. */ 4219 4220tree 4221vect_gen_perm_mask (tree vectype, unsigned char *sel) 4222{ 4223 tree mask_elt_type, mask_type, mask_vec, *mask_elts; 4224 int i, nunits; 4225 4226 nunits = TYPE_VECTOR_SUBPARTS (vectype); 4227 4228 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) 4229 return NULL; 4230 4231 mask_elt_type = lang_hooks.types.type_for_mode 4232 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1); 4233 mask_type = get_vectype_for_scalar_type (mask_elt_type); 4234 4235 mask_elts = XALLOCAVEC (tree, nunits); 4236 for (i = nunits - 1; i >= 0; i--) 4237 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]); 4238 mask_vec = build_vector (mask_type, mask_elts); 4239 4240 return mask_vec; 4241} 4242 4243/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements 4244 reversal of the vector elements. If that is impossible to do, 4245 returns NULL. */ 4246 4247static tree 4248perm_mask_for_reverse (tree vectype) 4249{ 4250 int i, nunits; 4251 unsigned char *sel; 4252 4253 nunits = TYPE_VECTOR_SUBPARTS (vectype); 4254 sel = XALLOCAVEC (unsigned char, nunits); 4255 4256 for (i = 0; i < nunits; ++i) 4257 sel[i] = nunits - 1 - i; 4258 4259 return vect_gen_perm_mask (vectype, sel); 4260} 4261 4262/* Given a vector variable X and Y, that was generated for the scalar 4263 STMT, generate instructions to permute the vector elements of X and Y 4264 using permutation mask MASK_VEC, insert them at *GSI and return the 4265 permuted vector variable. */ 4266 4267static tree 4268permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt, 4269 gimple_stmt_iterator *gsi) 4270{ 4271 tree vectype = TREE_TYPE (x); 4272 tree perm_dest, data_ref; 4273 gimple perm_stmt; 4274 4275 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); 4276 data_ref = make_ssa_name (perm_dest, NULL); 4277 4278 /* Generate the permute statement. */ 4279 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, 4280 x, y, mask_vec); 4281 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 4282 4283 return data_ref; 4284} 4285 4286/* vectorizable_load. 4287 4288 Check if STMT reads a non scalar data-ref (array/pointer/structure) that 4289 can be vectorized. 4290 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 4291 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 4292 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 4293 4294static bool 4295vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, 4296 slp_tree slp_node, slp_instance slp_node_instance) 4297{ 4298 tree scalar_dest; 4299 tree vec_dest = NULL; 4300 tree data_ref = NULL; 4301 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4302 stmt_vec_info prev_stmt_info; 4303 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4304 struct loop *loop = NULL; 4305 struct loop *containing_loop = (gimple_bb (stmt))->loop_father; 4306 bool nested_in_vect_loop = false; 4307 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; 4308 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 4309 tree elem_type; 4310 tree new_temp; 4311 enum machine_mode mode; 4312 gimple new_stmt = NULL; 4313 tree dummy; 4314 enum dr_alignment_support alignment_support_scheme; 4315 tree dataref_ptr = NULL_TREE; 4316 gimple ptr_incr = NULL; 4317 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 4318 int ncopies; 4319 int i, j, group_size; 4320 tree msq = NULL_TREE, lsq; 4321 tree offset = NULL_TREE; 4322 tree byte_offset = NULL_TREE; 4323 tree realignment_token = NULL_TREE; 4324 gimple phi = NULL; 4325 vec<tree> dr_chain = vNULL; 4326 bool grouped_load = false; 4327 bool load_lanes_p = false; 4328 gimple first_stmt; 4329 bool inv_p; 4330 bool negative = false; 4331 bool compute_in_loop = false; 4332 struct loop *at_loop; 4333 int vec_num; 4334 bool slp = (slp_node != NULL); 4335 bool slp_perm = false; 4336 enum tree_code code; 4337 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 4338 int vf; 4339 tree aggr_type; 4340 tree gather_base = NULL_TREE, gather_off = NULL_TREE; 4341 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE; 4342 int gather_scale = 1; 4343 enum vect_def_type gather_dt = vect_unknown_def_type; 4344 4345 if (loop_vinfo) 4346 { 4347 loop = LOOP_VINFO_LOOP (loop_vinfo); 4348 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); 4349 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 4350 } 4351 else 4352 vf = 1; 4353 4354 /* Multiple types in SLP are handled by creating the appropriate number of 4355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 4356 case of SLP. */ 4357 if (slp || PURE_SLP_STMT (stmt_info)) 4358 ncopies = 1; 4359 else 4360 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 4361 4362 gcc_assert (ncopies >= 1); 4363 4364 /* FORNOW. This restriction should be relaxed. */ 4365 if (nested_in_vect_loop && ncopies > 1) 4366 { 4367 if (dump_enabled_p ()) 4368 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4369 "multiple types in nested loop."); 4370 return false; 4371 } 4372 4373 /* Invalidate assumptions made by dependence analysis when vectorization 4374 on the unrolled body effectively re-orders stmts. */ 4375 if (ncopies > 1 4376 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 4377 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo) 4378 > STMT_VINFO_MIN_NEG_DIST (stmt_info))) 4379 { 4380 if (dump_enabled_p ()) 4381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4382 "cannot perform implicit CSE when unrolling " 4383 "with negative dependence distance\n"); 4384 return false; 4385 } 4386 4387 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 4388 return false; 4389 4390 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 4391 return false; 4392 4393 /* Is vectorizable load? */ 4394 if (!is_gimple_assign (stmt)) 4395 return false; 4396 4397 scalar_dest = gimple_assign_lhs (stmt); 4398 if (TREE_CODE (scalar_dest) != SSA_NAME) 4399 return false; 4400 4401 code = gimple_assign_rhs_code (stmt); 4402 if (code != ARRAY_REF 4403 && code != INDIRECT_REF 4404 && code != COMPONENT_REF 4405 && code != IMAGPART_EXPR 4406 && code != REALPART_EXPR 4407 && code != MEM_REF 4408 && TREE_CODE_CLASS (code) != tcc_declaration) 4409 return false; 4410 4411 if (!STMT_VINFO_DATA_REF (stmt_info)) 4412 return false; 4413 4414 elem_type = TREE_TYPE (vectype); 4415 mode = TYPE_MODE (vectype); 4416 4417 /* FORNOW. In some cases can vectorize even if data-type not supported 4418 (e.g. - data copies). */ 4419 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) 4420 { 4421 if (dump_enabled_p ()) 4422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4423 "Aligned load, but unsupported type."); 4424 return false; 4425 } 4426 4427 /* Check if the load is a part of an interleaving chain. */ 4428 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 4429 { 4430 grouped_load = true; 4431 /* FORNOW */ 4432 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info)); 4433 4434 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 4435 4436 /* If this is single-element interleaving with an element distance 4437 that leaves unused vector loads around punt - we at least create 4438 very sub-optimal code in that case (and blow up memory, 4439 see PR65518). */ 4440 if (first_stmt == stmt 4441 && !GROUP_NEXT_ELEMENT (stmt_info) 4442 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype)) 4443 { 4444 if (dump_enabled_p ()) 4445 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4446 "single-element interleaving not supported " 4447 "for not adjacent vector loads\n"); 4448 return false; 4449 } 4450 4451 if (!slp && !PURE_SLP_STMT (stmt_info)) 4452 { 4453 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 4454 if (vect_load_lanes_supported (vectype, group_size)) 4455 load_lanes_p = true; 4456 else if (!vect_grouped_load_supported (vectype, group_size)) 4457 return false; 4458 } 4459 4460 /* Invalidate assumptions made by dependence analysis when vectorization 4461 on the unrolled body effectively re-orders stmts. */ 4462 if (!PURE_SLP_STMT (stmt_info) 4463 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 4464 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo) 4465 > STMT_VINFO_MIN_NEG_DIST (stmt_info))) 4466 { 4467 if (dump_enabled_p ()) 4468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4469 "cannot perform implicit CSE when performing " 4470 "group loads with negative dependence distance\n"); 4471 return false; 4472 } 4473 } 4474 4475 4476 if (STMT_VINFO_GATHER_P (stmt_info)) 4477 { 4478 gimple def_stmt; 4479 tree def; 4480 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base, 4481 &gather_off, &gather_scale); 4482 gcc_assert (gather_decl); 4483 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo, 4484 &def_stmt, &def, &gather_dt, 4485 &gather_off_vectype)) 4486 { 4487 if (dump_enabled_p ()) 4488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4489 "gather index use not simple."); 4490 return false; 4491 } 4492 } 4493 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) 4494 ; 4495 else 4496 { 4497 negative = tree_int_cst_compare (nested_in_vect_loop 4498 ? STMT_VINFO_DR_STEP (stmt_info) 4499 : DR_STEP (dr), 4500 size_zero_node) < 0; 4501 if (negative && ncopies > 1) 4502 { 4503 if (dump_enabled_p ()) 4504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4505 "multiple types with negative step."); 4506 return false; 4507 } 4508 4509 if (negative) 4510 { 4511 gcc_assert (!grouped_load); 4512 alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 4513 if (alignment_support_scheme != dr_aligned 4514 && alignment_support_scheme != dr_unaligned_supported) 4515 { 4516 if (dump_enabled_p ()) 4517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4518 "negative step but alignment required."); 4519 return false; 4520 } 4521 if (!perm_mask_for_reverse (vectype)) 4522 { 4523 if (dump_enabled_p ()) 4524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4525 "negative step and reversing not supported."); 4526 return false; 4527 } 4528 } 4529 } 4530 4531 if (!vec_stmt) /* transformation not required. */ 4532 { 4533 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 4534 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL); 4535 return true; 4536 } 4537 4538 if (dump_enabled_p ()) 4539 dump_printf_loc (MSG_NOTE, vect_location, 4540 "transform load. ncopies = %d", ncopies); 4541 4542 /** Transform. **/ 4543 4544 if (STMT_VINFO_GATHER_P (stmt_info)) 4545 { 4546 tree vec_oprnd0 = NULL_TREE, op; 4547 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl)); 4548 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; 4549 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE; 4550 edge pe = loop_preheader_edge (loop); 4551 gimple_seq seq; 4552 basic_block new_bb; 4553 enum { NARROW, NONE, WIDEN } modifier; 4554 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype); 4555 4556 if (nunits == gather_off_nunits) 4557 modifier = NONE; 4558 else if (nunits == gather_off_nunits / 2) 4559 { 4560 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); 4561 modifier = WIDEN; 4562 4563 for (i = 0; i < gather_off_nunits; ++i) 4564 sel[i] = i | nunits; 4565 4566 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel); 4567 gcc_assert (perm_mask != NULL_TREE); 4568 } 4569 else if (nunits == gather_off_nunits * 2) 4570 { 4571 unsigned char *sel = XALLOCAVEC (unsigned char, nunits); 4572 modifier = NARROW; 4573 4574 for (i = 0; i < nunits; ++i) 4575 sel[i] = i < gather_off_nunits 4576 ? i : i + nunits - gather_off_nunits; 4577 4578 perm_mask = vect_gen_perm_mask (vectype, sel); 4579 gcc_assert (perm_mask != NULL_TREE); 4580 ncopies *= 2; 4581 } 4582 else 4583 gcc_unreachable (); 4584 4585 rettype = TREE_TYPE (TREE_TYPE (gather_decl)); 4586 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 4587 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 4588 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 4589 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 4590 scaletype = TREE_VALUE (arglist); 4591 gcc_checking_assert (types_compatible_p (srctype, rettype) 4592 && types_compatible_p (srctype, masktype)); 4593 4594 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4595 4596 ptr = fold_convert (ptrtype, gather_base); 4597 if (!is_gimple_min_invariant (ptr)) 4598 { 4599 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); 4600 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 4601 gcc_assert (!new_bb); 4602 } 4603 4604 /* Currently we support only unconditional gather loads, 4605 so mask should be all ones. */ 4606 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) 4607 mask = build_int_cst (TREE_TYPE (masktype), -1); 4608 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype))) 4609 { 4610 REAL_VALUE_TYPE r; 4611 long tmp[6]; 4612 for (j = 0; j < 6; ++j) 4613 tmp[j] = -1; 4614 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype))); 4615 mask = build_real (TREE_TYPE (masktype), r); 4616 } 4617 else 4618 gcc_unreachable (); 4619 mask = build_vector_from_val (masktype, mask); 4620 mask = vect_init_vector (stmt, mask, masktype, NULL); 4621 4622 scale = build_int_cst (scaletype, gather_scale); 4623 4624 prev_stmt_info = NULL; 4625 for (j = 0; j < ncopies; ++j) 4626 { 4627 if (modifier == WIDEN && (j & 1)) 4628 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, 4629 perm_mask, stmt, gsi); 4630 else if (j == 0) 4631 op = vec_oprnd0 4632 = vect_get_vec_def_for_operand (gather_off, stmt, NULL); 4633 else 4634 op = vec_oprnd0 4635 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0); 4636 4637 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) 4638 { 4639 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)) 4640 == TYPE_VECTOR_SUBPARTS (idxtype)); 4641 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL); 4642 var = make_ssa_name (var, NULL); 4643 op = build1 (VIEW_CONVERT_EXPR, idxtype, op); 4644 new_stmt 4645 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, 4646 op, NULL_TREE); 4647 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4648 op = var; 4649 } 4650 4651 new_stmt 4652 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale); 4653 4654 if (!useless_type_conversion_p (vectype, rettype)) 4655 { 4656 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype) 4657 == TYPE_VECTOR_SUBPARTS (rettype)); 4658 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL); 4659 op = make_ssa_name (var, new_stmt); 4660 gimple_call_set_lhs (new_stmt, op); 4661 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4662 var = make_ssa_name (vec_dest, NULL); 4663 op = build1 (VIEW_CONVERT_EXPR, vectype, op); 4664 new_stmt 4665 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op, 4666 NULL_TREE); 4667 } 4668 else 4669 { 4670 var = make_ssa_name (vec_dest, new_stmt); 4671 gimple_call_set_lhs (new_stmt, var); 4672 } 4673 4674 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4675 4676 if (modifier == NARROW) 4677 { 4678 if ((j & 1) == 0) 4679 { 4680 prev_res = var; 4681 continue; 4682 } 4683 var = permute_vec_elements (prev_res, var, 4684 perm_mask, stmt, gsi); 4685 new_stmt = SSA_NAME_DEF_STMT (var); 4686 } 4687 4688 if (prev_stmt_info == NULL) 4689 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4690 else 4691 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4692 prev_stmt_info = vinfo_for_stmt (new_stmt); 4693 } 4694 return true; 4695 } 4696 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) 4697 { 4698 gimple_stmt_iterator incr_gsi; 4699 bool insert_after; 4700 gimple incr; 4701 tree offvar; 4702 tree ivstep; 4703 tree running_off; 4704 vec<constructor_elt, va_gc> *v = NULL; 4705 gimple_seq stmts = NULL; 4706 tree stride_base, stride_step, alias_off; 4707 4708 gcc_assert (!nested_in_vect_loop); 4709 4710 stride_base 4711 = fold_build_pointer_plus 4712 (unshare_expr (DR_BASE_ADDRESS (dr)), 4713 size_binop (PLUS_EXPR, 4714 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))), 4715 convert_to_ptrofftype (DR_INIT(dr)))); 4716 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr))); 4717 4718 /* For a load with loop-invariant (but other than power-of-2) 4719 stride (i.e. not a grouped access) like so: 4720 4721 for (i = 0; i < n; i += stride) 4722 ... = array[i]; 4723 4724 we generate a new induction variable and new accesses to 4725 form a new vector (or vectors, depending on ncopies): 4726 4727 for (j = 0; ; j += VF*stride) 4728 tmp1 = array[j]; 4729 tmp2 = array[j + stride]; 4730 ... 4731 vectemp = {tmp1, tmp2, ...} 4732 */ 4733 4734 ivstep = stride_step; 4735 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, 4736 build_int_cst (TREE_TYPE (ivstep), vf)); 4737 4738 standard_iv_increment_position (loop, &incr_gsi, &insert_after); 4739 4740 create_iv (stride_base, ivstep, NULL, 4741 loop, &incr_gsi, insert_after, 4742 &offvar, NULL); 4743 incr = gsi_stmt (incr_gsi); 4744 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); 4745 4746 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE); 4747 if (stmts) 4748 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 4749 4750 prev_stmt_info = NULL; 4751 running_off = offvar; 4752 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0); 4753 for (j = 0; j < ncopies; j++) 4754 { 4755 tree vec_inv; 4756 4757 vec_alloc (v, nunits); 4758 for (i = 0; i < nunits; i++) 4759 { 4760 tree newref, newoff; 4761 gimple incr; 4762 newref = build2 (MEM_REF, TREE_TYPE (vectype), 4763 running_off, alias_off); 4764 4765 newref = force_gimple_operand_gsi (gsi, newref, true, 4766 NULL_TREE, true, 4767 GSI_SAME_STMT); 4768 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref); 4769 newoff = copy_ssa_name (running_off, NULL); 4770 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff, 4771 running_off, stride_step); 4772 vect_finish_stmt_generation (stmt, incr, gsi); 4773 4774 running_off = newoff; 4775 } 4776 4777 vec_inv = build_constructor (vectype, v); 4778 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); 4779 new_stmt = SSA_NAME_DEF_STMT (new_temp); 4780 4781 if (j == 0) 4782 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4783 else 4784 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4785 prev_stmt_info = vinfo_for_stmt (new_stmt); 4786 } 4787 return true; 4788 } 4789 4790 if (grouped_load) 4791 { 4792 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 4793 if (slp 4794 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists () 4795 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0]) 4796 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 4797 4798 /* Check if the chain of loads is already vectorized. */ 4799 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))) 4800 { 4801 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 4802 return true; 4803 } 4804 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 4805 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 4806 4807 /* VEC_NUM is the number of vect stmts to be created for this group. */ 4808 if (slp) 4809 { 4810 grouped_load = false; 4811 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 4812 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ()) 4813 slp_perm = true; 4814 } 4815 else 4816 vec_num = group_size; 4817 } 4818 else 4819 { 4820 first_stmt = stmt; 4821 first_dr = dr; 4822 group_size = vec_num = 1; 4823 } 4824 4825 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); 4826 gcc_assert (alignment_support_scheme); 4827 /* Targets with load-lane instructions must not require explicit 4828 realignment. */ 4829 gcc_assert (!load_lanes_p 4830 || alignment_support_scheme == dr_aligned 4831 || alignment_support_scheme == dr_unaligned_supported); 4832 4833 /* In case the vectorization factor (VF) is bigger than the number 4834 of elements that we can fit in a vectype (nunits), we have to generate 4835 more than one vector stmt - i.e - we need to "unroll" the 4836 vector stmt by a factor VF/nunits. In doing so, we record a pointer 4837 from one copy of the vector stmt to the next, in the field 4838 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 4839 stages to find the correct vector defs to be used when vectorizing 4840 stmts that use the defs of the current stmt. The example below 4841 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we 4842 need to create 4 vectorized stmts): 4843 4844 before vectorization: 4845 RELATED_STMT VEC_STMT 4846 S1: x = memref - - 4847 S2: z = x + 1 - - 4848 4849 step 1: vectorize stmt S1: 4850 We first create the vector stmt VS1_0, and, as usual, record a 4851 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1. 4852 Next, we create the vector stmt VS1_1, and record a pointer to 4853 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0. 4854 Similarly, for VS1_2 and VS1_3. This is the resulting chain of 4855 stmts and pointers: 4856 RELATED_STMT VEC_STMT 4857 VS1_0: vx0 = memref0 VS1_1 - 4858 VS1_1: vx1 = memref1 VS1_2 - 4859 VS1_2: vx2 = memref2 VS1_3 - 4860 VS1_3: vx3 = memref3 - - 4861 S1: x = load - VS1_0 4862 S2: z = x + 1 - - 4863 4864 See in documentation in vect_get_vec_def_for_stmt_copy for how the 4865 information we recorded in RELATED_STMT field is used to vectorize 4866 stmt S2. */ 4867 4868 /* In case of interleaving (non-unit grouped access): 4869 4870 S1: x2 = &base + 2 4871 S2: x0 = &base 4872 S3: x1 = &base + 1 4873 S4: x3 = &base + 3 4874 4875 Vectorized loads are created in the order of memory accesses 4876 starting from the access of the first stmt of the chain: 4877 4878 VS1: vx0 = &base 4879 VS2: vx1 = &base + vec_size*1 4880 VS3: vx3 = &base + vec_size*2 4881 VS4: vx4 = &base + vec_size*3 4882 4883 Then permutation statements are generated: 4884 4885 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } > 4886 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } > 4887 ... 4888 4889 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 4890 (the order of the data-refs in the output of vect_permute_load_chain 4891 corresponds to the order of scalar stmts in the interleaving chain - see 4892 the documentation of vect_permute_load_chain()). 4893 The generation of permutation stmts and recording them in 4894 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load(). 4895 4896 In case of both multiple types and interleaving, the vector loads and 4897 permutation stmts above are created for every copy. The result vector 4898 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the 4899 corresponding STMT_VINFO_RELATED_STMT for the next copies. */ 4900 4901 /* If the data reference is aligned (dr_aligned) or potentially unaligned 4902 on a target that supports unaligned accesses (dr_unaligned_supported) 4903 we generate the following code: 4904 p = initial_addr; 4905 indx = 0; 4906 loop { 4907 p = p + indx * vectype_size; 4908 vec_dest = *(p); 4909 indx = indx + 1; 4910 } 4911 4912 Otherwise, the data reference is potentially unaligned on a target that 4913 does not support unaligned accesses (dr_explicit_realign_optimized) - 4914 then generate the following code, in which the data in each iteration is 4915 obtained by two vector loads, one from the previous iteration, and one 4916 from the current iteration: 4917 p1 = initial_addr; 4918 msq_init = *(floor(p1)) 4919 p2 = initial_addr + VS - 1; 4920 realignment_token = call target_builtin; 4921 indx = 0; 4922 loop { 4923 p2 = p2 + indx * vectype_size 4924 lsq = *(floor(p2)) 4925 vec_dest = realign_load (msq, lsq, realignment_token) 4926 indx = indx + 1; 4927 msq = lsq; 4928 } */ 4929 4930 /* If the misalignment remains the same throughout the execution of the 4931 loop, we can create the init_addr and permutation mask at the loop 4932 preheader. Otherwise, it needs to be created inside the loop. 4933 This can only occur when vectorizing memory accesses in the inner-loop 4934 nested within an outer-loop that is being vectorized. */ 4935 4936 if (nested_in_vect_loop 4937 && (TREE_INT_CST_LOW (DR_STEP (dr)) 4938 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)) 4939 { 4940 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized); 4941 compute_in_loop = true; 4942 } 4943 4944 if ((alignment_support_scheme == dr_explicit_realign_optimized 4945 || alignment_support_scheme == dr_explicit_realign) 4946 && !compute_in_loop) 4947 { 4948 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token, 4949 alignment_support_scheme, NULL_TREE, 4950 &at_loop); 4951 if (alignment_support_scheme == dr_explicit_realign_optimized) 4952 { 4953 phi = SSA_NAME_DEF_STMT (msq); 4954 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype), 4955 size_one_node); 4956 } 4957 } 4958 else 4959 at_loop = loop; 4960 4961 if (negative) 4962 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); 4963 4964 if (load_lanes_p) 4965 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 4966 else 4967 aggr_type = vectype; 4968 4969 prev_stmt_info = NULL; 4970 for (j = 0; j < ncopies; j++) 4971 { 4972 /* 1. Create the vector or array pointer update chain. */ 4973 if (j == 0) 4974 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop, 4975 offset, &dummy, gsi, 4976 &ptr_incr, false, &inv_p, 4977 byte_offset); 4978 else 4979 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 4980 TYPE_SIZE_UNIT (aggr_type)); 4981 4982 if (grouped_load || slp_perm) 4983 dr_chain.create (vec_num); 4984 4985 if (load_lanes_p) 4986 { 4987 tree vec_array; 4988 4989 vec_array = create_vector_array (vectype, vec_num); 4990 4991 /* Emit: 4992 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */ 4993 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); 4994 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref); 4995 gimple_call_set_lhs (new_stmt, vec_array); 4996 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4997 4998 /* Extract each vector into an SSA_NAME. */ 4999 for (i = 0; i < vec_num; i++) 5000 { 5001 new_temp = read_vector_array (stmt, gsi, scalar_dest, 5002 vec_array, i); 5003 dr_chain.quick_push (new_temp); 5004 } 5005 5006 /* Record the mapping between SSA_NAMEs and statements. */ 5007 vect_record_grouped_load_vectors (stmt, dr_chain); 5008 } 5009 else 5010 { 5011 for (i = 0; i < vec_num; i++) 5012 { 5013 if (i > 0) 5014 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 5015 stmt, NULL_TREE); 5016 5017 /* 2. Create the vector-load in the loop. */ 5018 switch (alignment_support_scheme) 5019 { 5020 case dr_aligned: 5021 case dr_unaligned_supported: 5022 { 5023 unsigned int align, misalign; 5024 5025 data_ref 5026 = build2 (MEM_REF, vectype, dataref_ptr, 5027 build_int_cst (reference_alias_ptr_type 5028 (DR_REF (first_dr)), 0)); 5029 align = TYPE_ALIGN_UNIT (vectype); 5030 if (alignment_support_scheme == dr_aligned) 5031 { 5032 gcc_assert (aligned_access_p (first_dr)); 5033 misalign = 0; 5034 } 5035 else if (DR_MISALIGNMENT (first_dr) == -1) 5036 { 5037 TREE_TYPE (data_ref) 5038 = build_aligned_type (TREE_TYPE (data_ref), 5039 TYPE_ALIGN (elem_type)); 5040 align = TYPE_ALIGN_UNIT (elem_type); 5041 misalign = 0; 5042 } 5043 else 5044 { 5045 TREE_TYPE (data_ref) 5046 = build_aligned_type (TREE_TYPE (data_ref), 5047 TYPE_ALIGN (elem_type)); 5048 misalign = DR_MISALIGNMENT (first_dr); 5049 } 5050 set_ptr_info_alignment (get_ptr_info (dataref_ptr), 5051 align, misalign); 5052 break; 5053 } 5054 case dr_explicit_realign: 5055 { 5056 tree ptr, bump; 5057 tree vs_minus_1; 5058 5059 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); 5060 5061 if (compute_in_loop) 5062 msq = vect_setup_realignment (first_stmt, gsi, 5063 &realignment_token, 5064 dr_explicit_realign, 5065 dataref_ptr, NULL); 5066 5067 ptr = copy_ssa_name (dataref_ptr, NULL); 5068 new_stmt = gimple_build_assign_with_ops 5069 (BIT_AND_EXPR, ptr, dataref_ptr, 5070 build_int_cst 5071 (TREE_TYPE (dataref_ptr), 5072 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); 5073 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5074 data_ref 5075 = build2 (MEM_REF, vectype, ptr, 5076 build_int_cst (reference_alias_ptr_type 5077 (DR_REF (first_dr)), 0)); 5078 vec_dest = vect_create_destination_var (scalar_dest, 5079 vectype); 5080 new_stmt = gimple_build_assign (vec_dest, data_ref); 5081 new_temp = make_ssa_name (vec_dest, new_stmt); 5082 gimple_assign_set_lhs (new_stmt, new_temp); 5083 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); 5084 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); 5085 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5086 msq = new_temp; 5087 5088 bump = size_binop (MULT_EXPR, vs_minus_1, 5089 TYPE_SIZE_UNIT (elem_type)); 5090 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); 5091 new_stmt = gimple_build_assign_with_ops 5092 (BIT_AND_EXPR, NULL_TREE, ptr, 5093 build_int_cst 5094 (TREE_TYPE (ptr), 5095 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); 5096 ptr = copy_ssa_name (dataref_ptr, new_stmt); 5097 gimple_assign_set_lhs (new_stmt, ptr); 5098 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5099 data_ref 5100 = build2 (MEM_REF, vectype, ptr, 5101 build_int_cst (reference_alias_ptr_type 5102 (DR_REF (first_dr)), 0)); 5103 break; 5104 } 5105 case dr_explicit_realign_optimized: 5106 new_temp = copy_ssa_name (dataref_ptr, NULL); 5107 new_stmt = gimple_build_assign_with_ops 5108 (BIT_AND_EXPR, new_temp, dataref_ptr, 5109 build_int_cst 5110 (TREE_TYPE (dataref_ptr), 5111 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); 5112 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5113 data_ref 5114 = build2 (MEM_REF, vectype, new_temp, 5115 build_int_cst (reference_alias_ptr_type 5116 (DR_REF (first_dr)), 0)); 5117 break; 5118 default: 5119 gcc_unreachable (); 5120 } 5121 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5122 new_stmt = gimple_build_assign (vec_dest, data_ref); 5123 new_temp = make_ssa_name (vec_dest, new_stmt); 5124 gimple_assign_set_lhs (new_stmt, new_temp); 5125 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5126 5127 /* 3. Handle explicit realignment if necessary/supported. 5128 Create in loop: 5129 vec_dest = realign_load (msq, lsq, realignment_token) */ 5130 if (alignment_support_scheme == dr_explicit_realign_optimized 5131 || alignment_support_scheme == dr_explicit_realign) 5132 { 5133 lsq = gimple_assign_lhs (new_stmt); 5134 if (!realignment_token) 5135 realignment_token = dataref_ptr; 5136 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5137 new_stmt 5138 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR, 5139 vec_dest, msq, lsq, 5140 realignment_token); 5141 new_temp = make_ssa_name (vec_dest, new_stmt); 5142 gimple_assign_set_lhs (new_stmt, new_temp); 5143 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5144 5145 if (alignment_support_scheme == dr_explicit_realign_optimized) 5146 { 5147 gcc_assert (phi); 5148 if (i == vec_num - 1 && j == ncopies - 1) 5149 add_phi_arg (phi, lsq, 5150 loop_latch_edge (containing_loop), 5151 UNKNOWN_LOCATION); 5152 msq = lsq; 5153 } 5154 } 5155 5156 /* 4. Handle invariant-load. */ 5157 if (inv_p && !bb_vinfo) 5158 { 5159 gimple_stmt_iterator gsi2 = *gsi; 5160 gcc_assert (!grouped_load); 5161 gsi_next (&gsi2); 5162 new_temp = vect_init_vector (stmt, scalar_dest, 5163 vectype, &gsi2); 5164 new_stmt = SSA_NAME_DEF_STMT (new_temp); 5165 } 5166 5167 if (negative) 5168 { 5169 tree perm_mask = perm_mask_for_reverse (vectype); 5170 new_temp = permute_vec_elements (new_temp, new_temp, 5171 perm_mask, stmt, gsi); 5172 new_stmt = SSA_NAME_DEF_STMT (new_temp); 5173 } 5174 5175 /* Collect vector loads and later create their permutation in 5176 vect_transform_grouped_load (). */ 5177 if (grouped_load || slp_perm) 5178 dr_chain.quick_push (new_temp); 5179 5180 /* Store vector loads in the corresponding SLP_NODE. */ 5181 if (slp && !slp_perm) 5182 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 5183 } 5184 } 5185 5186 if (slp && !slp_perm) 5187 continue; 5188 5189 if (slp_perm) 5190 { 5191 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf, 5192 slp_node_instance, false)) 5193 { 5194 dr_chain.release (); 5195 return false; 5196 } 5197 } 5198 else 5199 { 5200 if (grouped_load) 5201 { 5202 if (!load_lanes_p) 5203 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi); 5204 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 5205 } 5206 else 5207 { 5208 if (j == 0) 5209 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 5210 else 5211 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 5212 prev_stmt_info = vinfo_for_stmt (new_stmt); 5213 } 5214 } 5215 dr_chain.release (); 5216 } 5217 5218 return true; 5219} 5220 5221/* Function vect_is_simple_cond. 5222 5223 Input: 5224 LOOP - the loop that is being vectorized. 5225 COND - Condition that is checked for simple use. 5226 5227 Output: 5228 *COMP_VECTYPE - the vector type for the comparison. 5229 5230 Returns whether a COND can be vectorized. Checks whether 5231 condition operands are supportable using vec_is_simple_use. */ 5232 5233static bool 5234vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo, 5235 bb_vec_info bb_vinfo, tree *comp_vectype) 5236{ 5237 tree lhs, rhs; 5238 tree def; 5239 enum vect_def_type dt; 5240 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 5241 5242 if (!COMPARISON_CLASS_P (cond)) 5243 return false; 5244 5245 lhs = TREE_OPERAND (cond, 0); 5246 rhs = TREE_OPERAND (cond, 1); 5247 5248 if (TREE_CODE (lhs) == SSA_NAME) 5249 { 5250 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); 5251 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo, 5252 &lhs_def_stmt, &def, &dt, &vectype1)) 5253 return false; 5254 } 5255 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST 5256 && TREE_CODE (lhs) != FIXED_CST) 5257 return false; 5258 5259 if (TREE_CODE (rhs) == SSA_NAME) 5260 { 5261 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); 5262 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo, 5263 &rhs_def_stmt, &def, &dt, &vectype2)) 5264 return false; 5265 } 5266 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST 5267 && TREE_CODE (rhs) != FIXED_CST) 5268 return false; 5269 5270 *comp_vectype = vectype1 ? vectype1 : vectype2; 5271 return true; 5272} 5273 5274/* vectorizable_condition. 5275 5276 Check if STMT is conditional modify expression that can be vectorized. 5277 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 5278 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it 5279 at GSI. 5280 5281 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable 5282 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in 5283 else caluse if it is 2). 5284 5285 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 5286 5287bool 5288vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, 5289 gimple *vec_stmt, tree reduc_def, int reduc_index, 5290 slp_tree slp_node) 5291{ 5292 tree scalar_dest = NULL_TREE; 5293 tree vec_dest = NULL_TREE; 5294 tree cond_expr, then_clause, else_clause; 5295 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5296 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 5297 tree comp_vectype = NULL_TREE; 5298 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE; 5299 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE; 5300 tree vec_compare, vec_cond_expr; 5301 tree new_temp; 5302 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5303 tree def; 5304 enum vect_def_type dt, dts[4]; 5305 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 5306 int ncopies; 5307 enum tree_code code; 5308 stmt_vec_info prev_stmt_info = NULL; 5309 int i, j; 5310 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5311 vec<tree> vec_oprnds0 = vNULL; 5312 vec<tree> vec_oprnds1 = vNULL; 5313 vec<tree> vec_oprnds2 = vNULL; 5314 vec<tree> vec_oprnds3 = vNULL; 5315 tree vec_cmp_type = vectype; 5316 5317 if (slp_node || PURE_SLP_STMT (stmt_info)) 5318 ncopies = 1; 5319 else 5320 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 5321 5322 gcc_assert (ncopies >= 1); 5323 if (reduc_index && ncopies > 1) 5324 return false; /* FORNOW */ 5325 5326 if (reduc_index && STMT_SLP_TYPE (stmt_info)) 5327 return false; 5328 5329 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5330 return false; 5331 5332 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 5333 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle 5334 && reduc_def)) 5335 return false; 5336 5337 /* FORNOW: not yet supported. */ 5338 if (STMT_VINFO_LIVE_P (stmt_info)) 5339 { 5340 if (dump_enabled_p ()) 5341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5342 "value used after loop."); 5343 return false; 5344 } 5345 5346 /* Is vectorizable conditional operation? */ 5347 if (!is_gimple_assign (stmt)) 5348 return false; 5349 5350 code = gimple_assign_rhs_code (stmt); 5351 5352 if (code != COND_EXPR) 5353 return false; 5354 5355 cond_expr = gimple_assign_rhs1 (stmt); 5356 then_clause = gimple_assign_rhs2 (stmt); 5357 else_clause = gimple_assign_rhs3 (stmt); 5358 5359 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo, 5360 &comp_vectype) 5361 || !comp_vectype) 5362 return false; 5363 5364 if (TREE_CODE (then_clause) == SSA_NAME) 5365 { 5366 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause); 5367 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo, 5368 &then_def_stmt, &def, &dt)) 5369 return false; 5370 } 5371 else if (TREE_CODE (then_clause) != INTEGER_CST 5372 && TREE_CODE (then_clause) != REAL_CST 5373 && TREE_CODE (then_clause) != FIXED_CST) 5374 return false; 5375 5376 if (TREE_CODE (else_clause) == SSA_NAME) 5377 { 5378 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause); 5379 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo, 5380 &else_def_stmt, &def, &dt)) 5381 return false; 5382 } 5383 else if (TREE_CODE (else_clause) != INTEGER_CST 5384 && TREE_CODE (else_clause) != REAL_CST 5385 && TREE_CODE (else_clause) != FIXED_CST) 5386 return false; 5387 5388 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))) 5389 { 5390 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))); 5391 tree cmp_type = build_nonstandard_integer_type (prec, 1); 5392 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype); 5393 if (vec_cmp_type == NULL_TREE) 5394 return false; 5395 } 5396 5397 if (!vec_stmt) 5398 { 5399 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; 5400 return expand_vec_cond_expr_p (vectype, comp_vectype); 5401 } 5402 5403 /* Transform. */ 5404 5405 if (!slp_node) 5406 { 5407 vec_oprnds0.create (1); 5408 vec_oprnds1.create (1); 5409 vec_oprnds2.create (1); 5410 vec_oprnds3.create (1); 5411 } 5412 5413 /* Handle def. */ 5414 scalar_dest = gimple_assign_lhs (stmt); 5415 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5416 5417 /* Handle cond expr. */ 5418 for (j = 0; j < ncopies; j++) 5419 { 5420 gimple new_stmt = NULL; 5421 if (j == 0) 5422 { 5423 if (slp_node) 5424 { 5425 vec<tree> ops; 5426 ops.create (4); 5427 vec<vec<tree> > vec_defs; 5428 5429 vec_defs.create (4); 5430 ops.safe_push (TREE_OPERAND (cond_expr, 0)); 5431 ops.safe_push (TREE_OPERAND (cond_expr, 1)); 5432 ops.safe_push (then_clause); 5433 ops.safe_push (else_clause); 5434 vect_get_slp_defs (ops, slp_node, &vec_defs, -1); 5435 vec_oprnds3 = vec_defs.pop (); 5436 vec_oprnds2 = vec_defs.pop (); 5437 vec_oprnds1 = vec_defs.pop (); 5438 vec_oprnds0 = vec_defs.pop (); 5439 5440 ops.release (); 5441 vec_defs.release (); 5442 } 5443 else 5444 { 5445 gimple gtemp; 5446 vec_cond_lhs = 5447 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), 5448 stmt, NULL); 5449 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt, 5450 loop_vinfo, NULL, >emp, &def, &dts[0]); 5451 5452 vec_cond_rhs = 5453 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), 5454 stmt, NULL); 5455 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt, 5456 loop_vinfo, NULL, >emp, &def, &dts[1]); 5457 if (reduc_index == 1) 5458 vec_then_clause = reduc_def; 5459 else 5460 { 5461 vec_then_clause = vect_get_vec_def_for_operand (then_clause, 5462 stmt, NULL); 5463 vect_is_simple_use (then_clause, stmt, loop_vinfo, 5464 NULL, >emp, &def, &dts[2]); 5465 } 5466 if (reduc_index == 2) 5467 vec_else_clause = reduc_def; 5468 else 5469 { 5470 vec_else_clause = vect_get_vec_def_for_operand (else_clause, 5471 stmt, NULL); 5472 vect_is_simple_use (else_clause, stmt, loop_vinfo, 5473 NULL, >emp, &def, &dts[3]); 5474 } 5475 } 5476 } 5477 else 5478 { 5479 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], 5480 vec_oprnds0.pop ()); 5481 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], 5482 vec_oprnds1.pop ()); 5483 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2], 5484 vec_oprnds2.pop ()); 5485 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3], 5486 vec_oprnds3.pop ()); 5487 } 5488 5489 if (!slp_node) 5490 { 5491 vec_oprnds0.quick_push (vec_cond_lhs); 5492 vec_oprnds1.quick_push (vec_cond_rhs); 5493 vec_oprnds2.quick_push (vec_then_clause); 5494 vec_oprnds3.quick_push (vec_else_clause); 5495 } 5496 5497 /* Arguments are ready. Create the new vector stmt. */ 5498 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs) 5499 { 5500 vec_cond_rhs = vec_oprnds1[i]; 5501 vec_then_clause = vec_oprnds2[i]; 5502 vec_else_clause = vec_oprnds3[i]; 5503 5504 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type, 5505 vec_cond_lhs, vec_cond_rhs); 5506 vec_cond_expr = build3 (VEC_COND_EXPR, vectype, 5507 vec_compare, vec_then_clause, vec_else_clause); 5508 5509 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); 5510 new_temp = make_ssa_name (vec_dest, new_stmt); 5511 gimple_assign_set_lhs (new_stmt, new_temp); 5512 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5513 if (slp_node) 5514 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 5515 } 5516 5517 if (slp_node) 5518 continue; 5519 5520 if (j == 0) 5521 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 5522 else 5523 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 5524 5525 prev_stmt_info = vinfo_for_stmt (new_stmt); 5526 } 5527 5528 vec_oprnds0.release (); 5529 vec_oprnds1.release (); 5530 vec_oprnds2.release (); 5531 vec_oprnds3.release (); 5532 5533 return true; 5534} 5535 5536 5537/* Make sure the statement is vectorizable. */ 5538 5539bool 5540vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) 5541{ 5542 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5543 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5544 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); 5545 bool ok; 5546 tree scalar_type, vectype; 5547 gimple pattern_stmt; 5548 gimple_seq pattern_def_seq; 5549 5550 if (dump_enabled_p ()) 5551 { 5552 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: "); 5553 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 5554 } 5555 5556 if (gimple_has_volatile_ops (stmt)) 5557 { 5558 if (dump_enabled_p ()) 5559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5560 "not vectorized: stmt has volatile operands"); 5561 5562 return false; 5563 } 5564 5565 /* Skip stmts that do not need to be vectorized. In loops this is expected 5566 to include: 5567 - the COND_EXPR which is the loop exit condition 5568 - any LABEL_EXPRs in the loop 5569 - computations that are used only for array indexing or loop control. 5570 In basic blocks we only analyze statements that are a part of some SLP 5571 instance, therefore, all the statements are relevant. 5572 5573 Pattern statement needs to be analyzed instead of the original statement 5574 if the original statement is not relevant. Otherwise, we analyze both 5575 statements. In basic blocks we are called from some SLP instance 5576 traversal, don't analyze pattern stmts instead, the pattern stmts 5577 already will be part of SLP instance. */ 5578 5579 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 5580 if (!STMT_VINFO_RELEVANT_P (stmt_info) 5581 && !STMT_VINFO_LIVE_P (stmt_info)) 5582 { 5583 if (STMT_VINFO_IN_PATTERN_P (stmt_info) 5584 && pattern_stmt 5585 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) 5586 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) 5587 { 5588 /* Analyze PATTERN_STMT instead of the original stmt. */ 5589 stmt = pattern_stmt; 5590 stmt_info = vinfo_for_stmt (pattern_stmt); 5591 if (dump_enabled_p ()) 5592 { 5593 dump_printf_loc (MSG_NOTE, vect_location, 5594 "==> examining pattern statement: "); 5595 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 5596 } 5597 } 5598 else 5599 { 5600 if (dump_enabled_p ()) 5601 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant."); 5602 5603 return true; 5604 } 5605 } 5606 else if (STMT_VINFO_IN_PATTERN_P (stmt_info) 5607 && node == NULL 5608 && pattern_stmt 5609 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) 5610 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) 5611 { 5612 /* Analyze PATTERN_STMT too. */ 5613 if (dump_enabled_p ()) 5614 { 5615 dump_printf_loc (MSG_NOTE, vect_location, 5616 "==> examining pattern statement: "); 5617 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 5618 } 5619 5620 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node)) 5621 return false; 5622 } 5623 5624 if (is_pattern_stmt_p (stmt_info) 5625 && node == NULL 5626 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))) 5627 { 5628 gimple_stmt_iterator si; 5629 5630 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si)) 5631 { 5632 gimple pattern_def_stmt = gsi_stmt (si); 5633 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) 5634 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))) 5635 { 5636 /* Analyze def stmt of STMT if it's a pattern stmt. */ 5637 if (dump_enabled_p ()) 5638 { 5639 dump_printf_loc (MSG_NOTE, vect_location, 5640 "==> examining pattern def statement: "); 5641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0); 5642 } 5643 5644 if (!vect_analyze_stmt (pattern_def_stmt, 5645 need_to_vectorize, node)) 5646 return false; 5647 } 5648 } 5649 } 5650 5651 switch (STMT_VINFO_DEF_TYPE (stmt_info)) 5652 { 5653 case vect_internal_def: 5654 break; 5655 5656 case vect_reduction_def: 5657 case vect_nested_cycle: 5658 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer 5659 || relevance == vect_used_in_outer_by_reduction 5660 || relevance == vect_unused_in_scope)); 5661 break; 5662 5663 case vect_induction_def: 5664 case vect_constant_def: 5665 case vect_external_def: 5666 case vect_unknown_def_type: 5667 default: 5668 gcc_unreachable (); 5669 } 5670 5671 if (bb_vinfo) 5672 { 5673 gcc_assert (PURE_SLP_STMT (stmt_info)); 5674 5675 scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); 5676 if (dump_enabled_p ()) 5677 { 5678 dump_printf_loc (MSG_NOTE, vect_location, 5679 "get vectype for scalar type: "); 5680 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); 5681 } 5682 5683 vectype = get_vectype_for_scalar_type (scalar_type); 5684 if (!vectype) 5685 { 5686 if (dump_enabled_p ()) 5687 { 5688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5689 "not SLPed: unsupported data-type "); 5690 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, 5691 scalar_type); 5692 } 5693 return false; 5694 } 5695 5696 if (dump_enabled_p ()) 5697 { 5698 dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); 5699 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); 5700 } 5701 5702 STMT_VINFO_VECTYPE (stmt_info) = vectype; 5703 } 5704 5705 if (STMT_VINFO_RELEVANT_P (stmt_info)) 5706 { 5707 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))); 5708 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); 5709 *need_to_vectorize = true; 5710 } 5711 5712 ok = true; 5713 if (!bb_vinfo 5714 && (STMT_VINFO_RELEVANT_P (stmt_info) 5715 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) 5716 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL) 5717 || vectorizable_shift (stmt, NULL, NULL, NULL) 5718 || vectorizable_operation (stmt, NULL, NULL, NULL) 5719 || vectorizable_assignment (stmt, NULL, NULL, NULL) 5720 || vectorizable_load (stmt, NULL, NULL, NULL, NULL) 5721 || vectorizable_call (stmt, NULL, NULL, NULL) 5722 || vectorizable_store (stmt, NULL, NULL, NULL) 5723 || vectorizable_reduction (stmt, NULL, NULL, NULL) 5724 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL)); 5725 else 5726 { 5727 if (bb_vinfo) 5728 ok = (vectorizable_conversion (stmt, NULL, NULL, node) 5729 || vectorizable_shift (stmt, NULL, NULL, node) 5730 || vectorizable_operation (stmt, NULL, NULL, node) 5731 || vectorizable_assignment (stmt, NULL, NULL, node) 5732 || vectorizable_load (stmt, NULL, NULL, node, NULL) 5733 || vectorizable_call (stmt, NULL, NULL, node) 5734 || vectorizable_store (stmt, NULL, NULL, node) 5735 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); 5736 } 5737 5738 if (!ok) 5739 { 5740 if (dump_enabled_p ()) 5741 { 5742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5743 "not vectorized: relevant stmt not "); 5744 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: "); 5745 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); 5746 } 5747 5748 return false; 5749 } 5750 5751 if (bb_vinfo) 5752 return true; 5753 5754 /* Stmts that are (also) "live" (i.e. - that are used out of the loop) 5755 need extra handling, except for vectorizable reductions. */ 5756 if (STMT_VINFO_LIVE_P (stmt_info) 5757 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type) 5758 ok = vectorizable_live_operation (stmt, NULL, NULL); 5759 5760 if (!ok) 5761 { 5762 if (dump_enabled_p ()) 5763 { 5764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5765 "not vectorized: live stmt not "); 5766 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: "); 5767 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); 5768 } 5769 5770 return false; 5771 } 5772 5773 return true; 5774} 5775 5776 5777/* Function vect_transform_stmt. 5778 5779 Create a vectorized stmt to replace STMT, and insert it at BSI. */ 5780 5781bool 5782vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi, 5783 bool *grouped_store, slp_tree slp_node, 5784 slp_instance slp_node_instance) 5785{ 5786 bool is_store = false; 5787 gimple vec_stmt = NULL; 5788 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5789 bool done; 5790 5791 switch (STMT_VINFO_TYPE (stmt_info)) 5792 { 5793 case type_demotion_vec_info_type: 5794 case type_promotion_vec_info_type: 5795 case type_conversion_vec_info_type: 5796 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node); 5797 gcc_assert (done); 5798 break; 5799 5800 case induc_vec_info_type: 5801 gcc_assert (!slp_node); 5802 done = vectorizable_induction (stmt, gsi, &vec_stmt); 5803 gcc_assert (done); 5804 break; 5805 5806 case shift_vec_info_type: 5807 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node); 5808 gcc_assert (done); 5809 break; 5810 5811 case op_vec_info_type: 5812 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node); 5813 gcc_assert (done); 5814 break; 5815 5816 case assignment_vec_info_type: 5817 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node); 5818 gcc_assert (done); 5819 break; 5820 5821 case load_vec_info_type: 5822 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node, 5823 slp_node_instance); 5824 gcc_assert (done); 5825 break; 5826 5827 case store_vec_info_type: 5828 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node); 5829 gcc_assert (done); 5830 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node) 5831 { 5832 /* In case of interleaving, the whole chain is vectorized when the 5833 last store in the chain is reached. Store stmts before the last 5834 one are skipped, and there vec_stmt_info shouldn't be freed 5835 meanwhile. */ 5836 *grouped_store = true; 5837 if (STMT_VINFO_VEC_STMT (stmt_info)) 5838 is_store = true; 5839 } 5840 else 5841 is_store = true; 5842 break; 5843 5844 case condition_vec_info_type: 5845 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node); 5846 gcc_assert (done); 5847 break; 5848 5849 case call_vec_info_type: 5850 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node); 5851 stmt = gsi_stmt (*gsi); 5852 break; 5853 5854 case reduc_vec_info_type: 5855 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node); 5856 gcc_assert (done); 5857 break; 5858 5859 default: 5860 if (!STMT_VINFO_LIVE_P (stmt_info)) 5861 { 5862 if (dump_enabled_p ()) 5863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5864 "stmt not supported."); 5865 gcc_unreachable (); 5866 } 5867 } 5868 5869 /* Handle inner-loop stmts whose DEF is used in the loop-nest that 5870 is being vectorized, but outside the immediately enclosing loop. */ 5871 if (vec_stmt 5872 && STMT_VINFO_LOOP_VINFO (stmt_info) 5873 && nested_in_vect_loop_p (LOOP_VINFO_LOOP ( 5874 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt) 5875 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type 5876 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer 5877 || STMT_VINFO_RELEVANT (stmt_info) == 5878 vect_used_in_outer_by_reduction)) 5879 { 5880 struct loop *innerloop = LOOP_VINFO_LOOP ( 5881 STMT_VINFO_LOOP_VINFO (stmt_info))->inner; 5882 imm_use_iterator imm_iter; 5883 use_operand_p use_p; 5884 tree scalar_dest; 5885 gimple exit_phi; 5886 5887 if (dump_enabled_p ()) 5888 dump_printf_loc (MSG_NOTE, vect_location, 5889 "Record the vdef for outer-loop vectorization."); 5890 5891 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there 5892 (to be used when vectorizing outer-loop stmts that use the DEF of 5893 STMT). */ 5894 if (gimple_code (stmt) == GIMPLE_PHI) 5895 scalar_dest = PHI_RESULT (stmt); 5896 else 5897 scalar_dest = gimple_assign_lhs (stmt); 5898 5899 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) 5900 { 5901 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p)))) 5902 { 5903 exit_phi = USE_STMT (use_p); 5904 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt; 5905 } 5906 } 5907 } 5908 5909 /* Handle stmts whose DEF is used outside the loop-nest that is 5910 being vectorized. */ 5911 if (STMT_VINFO_LIVE_P (stmt_info) 5912 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type) 5913 { 5914 done = vectorizable_live_operation (stmt, gsi, &vec_stmt); 5915 gcc_assert (done); 5916 } 5917 5918 if (vec_stmt) 5919 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 5920 5921 return is_store; 5922} 5923 5924 5925/* Remove a group of stores (for SLP or interleaving), free their 5926 stmt_vec_info. */ 5927 5928void 5929vect_remove_stores (gimple first_stmt) 5930{ 5931 gimple next = first_stmt; 5932 gimple tmp; 5933 gimple_stmt_iterator next_si; 5934 5935 while (next) 5936 { 5937 stmt_vec_info stmt_info = vinfo_for_stmt (next); 5938 5939 tmp = GROUP_NEXT_ELEMENT (stmt_info); 5940 if (is_pattern_stmt_p (stmt_info)) 5941 next = STMT_VINFO_RELATED_STMT (stmt_info); 5942 /* Free the attached stmt_vec_info and remove the stmt. */ 5943 next_si = gsi_for_stmt (next); 5944 unlink_stmt_vdef (next); 5945 gsi_remove (&next_si, true); 5946 release_defs (next); 5947 free_stmt_vec_info (next); 5948 next = tmp; 5949 } 5950} 5951 5952 5953/* Function new_stmt_vec_info. 5954 5955 Create and initialize a new stmt_vec_info struct for STMT. */ 5956 5957stmt_vec_info 5958new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo, 5959 bb_vec_info bb_vinfo) 5960{ 5961 stmt_vec_info res; 5962 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); 5963 5964 STMT_VINFO_TYPE (res) = undef_vec_info_type; 5965 STMT_VINFO_STMT (res) = stmt; 5966 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo; 5967 STMT_VINFO_BB_VINFO (res) = bb_vinfo; 5968 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope; 5969 STMT_VINFO_LIVE_P (res) = false; 5970 STMT_VINFO_VECTYPE (res) = NULL; 5971 STMT_VINFO_VEC_STMT (res) = NULL; 5972 STMT_VINFO_VECTORIZABLE (res) = true; 5973 STMT_VINFO_IN_PATTERN_P (res) = false; 5974 STMT_VINFO_RELATED_STMT (res) = NULL; 5975 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL; 5976 STMT_VINFO_DATA_REF (res) = NULL; 5977 5978 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; 5979 STMT_VINFO_DR_OFFSET (res) = NULL; 5980 STMT_VINFO_DR_INIT (res) = NULL; 5981 STMT_VINFO_DR_STEP (res) = NULL; 5982 STMT_VINFO_DR_ALIGNED_TO (res) = NULL; 5983 5984 if (gimple_code (stmt) == GIMPLE_PHI 5985 && is_loop_header_bb_p (gimple_bb (stmt))) 5986 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; 5987 else 5988 STMT_VINFO_DEF_TYPE (res) = vect_internal_def; 5989 5990 STMT_VINFO_SAME_ALIGN_REFS (res).create (0); 5991 STMT_SLP_TYPE (res) = loop_vect; 5992 GROUP_FIRST_ELEMENT (res) = NULL; 5993 GROUP_NEXT_ELEMENT (res) = NULL; 5994 GROUP_SIZE (res) = 0; 5995 GROUP_STORE_COUNT (res) = 0; 5996 GROUP_GAP (res) = 0; 5997 GROUP_SAME_DR_STMT (res) = NULL; 5998 GROUP_READ_WRITE_DEPENDENCE (res) = false; 5999 6000 return res; 6001} 6002 6003 6004/* Create a hash table for stmt_vec_info. */ 6005 6006void 6007init_stmt_vec_info_vec (void) 6008{ 6009 gcc_assert (!stmt_vec_info_vec.exists ()); 6010 stmt_vec_info_vec.create (50); 6011} 6012 6013 6014/* Free hash table for stmt_vec_info. */ 6015 6016void 6017free_stmt_vec_info_vec (void) 6018{ 6019 unsigned int i; 6020 vec_void_p info; 6021 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info) 6022 if (info != NULL) 6023 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info)); 6024 gcc_assert (stmt_vec_info_vec.exists ()); 6025 stmt_vec_info_vec.release (); 6026} 6027 6028 6029/* Free stmt vectorization related info. */ 6030 6031void 6032free_stmt_vec_info (gimple stmt) 6033{ 6034 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 6035 6036 if (!stmt_info) 6037 return; 6038 6039 /* Check if this statement has a related "pattern stmt" 6040 (introduced by the vectorizer during the pattern recognition 6041 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info 6042 too. */ 6043 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 6044 { 6045 stmt_vec_info patt_info 6046 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); 6047 if (patt_info) 6048 { 6049 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info); 6050 if (seq) 6051 { 6052 gimple_stmt_iterator si; 6053 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si)) 6054 free_stmt_vec_info (gsi_stmt (si)); 6055 } 6056 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info)); 6057 } 6058 } 6059 6060 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release (); 6061 set_vinfo_for_stmt (stmt, NULL); 6062 free (stmt_info); 6063} 6064 6065 6066/* Function get_vectype_for_scalar_type_and_size. 6067 6068 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported 6069 by the target. */ 6070 6071static tree 6072get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size) 6073{ 6074 enum machine_mode inner_mode = TYPE_MODE (scalar_type); 6075 enum machine_mode simd_mode; 6076 unsigned int nbytes = GET_MODE_SIZE (inner_mode); 6077 int nunits; 6078 tree vectype; 6079 6080 if (nbytes == 0) 6081 return NULL_TREE; 6082 6083 if (GET_MODE_CLASS (inner_mode) != MODE_INT 6084 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT) 6085 return NULL_TREE; 6086 6087 /* For vector types of elements whose mode precision doesn't 6088 match their types precision we use a element type of mode 6089 precision. The vectorization routines will have to make sure 6090 they support the proper result truncation/extension. 6091 We also make sure to build vector types with INTEGER_TYPE 6092 component type only. */ 6093 if (INTEGRAL_TYPE_P (scalar_type) 6094 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type) 6095 || TREE_CODE (scalar_type) != INTEGER_TYPE)) 6096 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode), 6097 TYPE_UNSIGNED (scalar_type)); 6098 6099 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components. 6100 When the component mode passes the above test simply use a type 6101 corresponding to that mode. The theory is that any use that 6102 would cause problems with this will disable vectorization anyway. */ 6103 else if (!SCALAR_FLOAT_TYPE_P (scalar_type) 6104 && !INTEGRAL_TYPE_P (scalar_type)) 6105 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1); 6106 6107 /* We can't build a vector type of elements with alignment bigger than 6108 their size. */ 6109 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) 6110 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 6111 TYPE_UNSIGNED (scalar_type)); 6112 6113 /* If we felt back to using the mode fail if there was 6114 no scalar type for it. */ 6115 if (scalar_type == NULL_TREE) 6116 return NULL_TREE; 6117 6118 /* If no size was supplied use the mode the target prefers. Otherwise 6119 lookup a vector mode of the specified size. */ 6120 if (size == 0) 6121 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode); 6122 else 6123 simd_mode = mode_for_vector (inner_mode, size / nbytes); 6124 nunits = GET_MODE_SIZE (simd_mode) / nbytes; 6125 if (nunits <= 1) 6126 return NULL_TREE; 6127 6128 vectype = build_vector_type (scalar_type, nunits); 6129 if (dump_enabled_p ()) 6130 { 6131 dump_printf_loc (MSG_NOTE, vect_location, 6132 "get vectype with %d units of type ", nunits); 6133 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); 6134 } 6135 6136 if (!vectype) 6137 return NULL_TREE; 6138 6139 if (dump_enabled_p ()) 6140 { 6141 dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); 6142 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); 6143 } 6144 6145 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 6146 && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) 6147 { 6148 if (dump_enabled_p ()) 6149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6150 "mode not supported by target."); 6151 return NULL_TREE; 6152 } 6153 6154 return vectype; 6155} 6156 6157unsigned int current_vector_size; 6158 6159/* Function get_vectype_for_scalar_type. 6160 6161 Returns the vector type corresponding to SCALAR_TYPE as supported 6162 by the target. */ 6163 6164tree 6165get_vectype_for_scalar_type (tree scalar_type) 6166{ 6167 tree vectype; 6168 vectype = get_vectype_for_scalar_type_and_size (scalar_type, 6169 current_vector_size); 6170 if (vectype 6171 && current_vector_size == 0) 6172 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype)); 6173 return vectype; 6174} 6175 6176/* Function get_same_sized_vectype 6177 6178 Returns a vector type corresponding to SCALAR_TYPE of size 6179 VECTOR_TYPE if supported by the target. */ 6180 6181tree 6182get_same_sized_vectype (tree scalar_type, tree vector_type) 6183{ 6184 return get_vectype_for_scalar_type_and_size 6185 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type))); 6186} 6187 6188/* Function vect_is_simple_use. 6189 6190 Input: 6191 LOOP_VINFO - the vect info of the loop that is being vectorized. 6192 BB_VINFO - the vect info of the basic block that is being vectorized. 6193 OPERAND - operand of STMT in the loop or bb. 6194 DEF - the defining stmt in case OPERAND is an SSA_NAME. 6195 6196 Returns whether a stmt with OPERAND can be vectorized. 6197 For loops, supportable operands are constants, loop invariants, and operands 6198 that are defined by the current iteration of the loop. Unsupportable 6199 operands are those that are defined by a previous iteration of the loop (as 6200 is the case in reduction/induction computations). 6201 For basic blocks, supportable operands are constants and bb invariants. 6202 For now, operands defined outside the basic block are not supported. */ 6203 6204bool 6205vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo, 6206 bb_vec_info bb_vinfo, gimple *def_stmt, 6207 tree *def, enum vect_def_type *dt) 6208{ 6209 basic_block bb; 6210 stmt_vec_info stmt_vinfo; 6211 struct loop *loop = NULL; 6212 6213 if (loop_vinfo) 6214 loop = LOOP_VINFO_LOOP (loop_vinfo); 6215 6216 *def_stmt = NULL; 6217 *def = NULL_TREE; 6218 6219 if (dump_enabled_p ()) 6220 { 6221 dump_printf_loc (MSG_NOTE, vect_location, 6222 "vect_is_simple_use: operand "); 6223 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand); 6224 } 6225 6226 if (CONSTANT_CLASS_P (operand)) 6227 { 6228 *dt = vect_constant_def; 6229 return true; 6230 } 6231 6232 if (is_gimple_min_invariant (operand)) 6233 { 6234 *def = operand; 6235 *dt = vect_external_def; 6236 return true; 6237 } 6238 6239 if (TREE_CODE (operand) == PAREN_EXPR) 6240 { 6241 if (dump_enabled_p ()) 6242 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy."); 6243 operand = TREE_OPERAND (operand, 0); 6244 } 6245 6246 if (TREE_CODE (operand) != SSA_NAME) 6247 { 6248 if (dump_enabled_p ()) 6249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6250 "not ssa-name."); 6251 return false; 6252 } 6253 6254 *def_stmt = SSA_NAME_DEF_STMT (operand); 6255 if (*def_stmt == NULL) 6256 { 6257 if (dump_enabled_p ()) 6258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6259 "no def_stmt."); 6260 return false; 6261 } 6262 6263 if (dump_enabled_p ()) 6264 { 6265 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: "); 6266 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0); 6267 } 6268 6269 /* Empty stmt is expected only in case of a function argument. 6270 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */ 6271 if (gimple_nop_p (*def_stmt)) 6272 { 6273 *def = operand; 6274 *dt = vect_external_def; 6275 return true; 6276 } 6277 6278 bb = gimple_bb (*def_stmt); 6279 6280 if ((loop && !flow_bb_inside_loop_p (loop, bb)) 6281 || (!loop && bb != BB_VINFO_BB (bb_vinfo)) 6282 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI)) 6283 *dt = vect_external_def; 6284 else 6285 { 6286 stmt_vinfo = vinfo_for_stmt (*def_stmt); 6287 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); 6288 } 6289 6290 if (*dt == vect_unknown_def_type 6291 || (stmt 6292 && *dt == vect_double_reduction_def 6293 && gimple_code (stmt) != GIMPLE_PHI)) 6294 { 6295 if (dump_enabled_p ()) 6296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6297 "Unsupported pattern."); 6298 return false; 6299 } 6300 6301 if (dump_enabled_p ()) 6302 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt); 6303 6304 switch (gimple_code (*def_stmt)) 6305 { 6306 case GIMPLE_PHI: 6307 *def = gimple_phi_result (*def_stmt); 6308 break; 6309 6310 case GIMPLE_ASSIGN: 6311 *def = gimple_assign_lhs (*def_stmt); 6312 break; 6313 6314 case GIMPLE_CALL: 6315 *def = gimple_call_lhs (*def_stmt); 6316 if (*def != NULL) 6317 break; 6318 /* FALLTHRU */ 6319 default: 6320 if (dump_enabled_p ()) 6321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6322 "unsupported defining stmt: "); 6323 return false; 6324 } 6325 6326 return true; 6327} 6328 6329/* Function vect_is_simple_use_1. 6330 6331 Same as vect_is_simple_use_1 but also determines the vector operand 6332 type of OPERAND and stores it to *VECTYPE. If the definition of 6333 OPERAND is vect_uninitialized_def, vect_constant_def or 6334 vect_external_def *VECTYPE will be set to NULL_TREE and the caller 6335 is responsible to compute the best suited vector type for the 6336 scalar operand. */ 6337 6338bool 6339vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo, 6340 bb_vec_info bb_vinfo, gimple *def_stmt, 6341 tree *def, enum vect_def_type *dt, tree *vectype) 6342{ 6343 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt, 6344 def, dt)) 6345 return false; 6346 6347 /* Now get a vector type if the def is internal, otherwise supply 6348 NULL_TREE and leave it up to the caller to figure out a proper 6349 type for the use stmt. */ 6350 if (*dt == vect_internal_def 6351 || *dt == vect_induction_def 6352 || *dt == vect_reduction_def 6353 || *dt == vect_double_reduction_def 6354 || *dt == vect_nested_cycle) 6355 { 6356 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); 6357 6358 if (STMT_VINFO_IN_PATTERN_P (stmt_info) 6359 && !STMT_VINFO_RELEVANT (stmt_info) 6360 && !STMT_VINFO_LIVE_P (stmt_info)) 6361 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); 6362 6363 *vectype = STMT_VINFO_VECTYPE (stmt_info); 6364 gcc_assert (*vectype != NULL_TREE); 6365 } 6366 else if (*dt == vect_uninitialized_def 6367 || *dt == vect_constant_def 6368 || *dt == vect_external_def) 6369 *vectype = NULL_TREE; 6370 else 6371 gcc_unreachable (); 6372 6373 return true; 6374} 6375 6376 6377/* Function supportable_widening_operation 6378 6379 Check whether an operation represented by the code CODE is a 6380 widening operation that is supported by the target platform in 6381 vector form (i.e., when operating on arguments of type VECTYPE_IN 6382 producing a result of type VECTYPE_OUT). 6383 6384 Widening operations we currently support are NOP (CONVERT), FLOAT 6385 and WIDEN_MULT. This function checks if these operations are supported 6386 by the target platform either directly (via vector tree-codes), or via 6387 target builtins. 6388 6389 Output: 6390 - CODE1 and CODE2 are codes of vector operations to be used when 6391 vectorizing the operation, if available. 6392 - MULTI_STEP_CVT determines the number of required intermediate steps in 6393 case of multi-step conversion (like char->short->int - in that case 6394 MULTI_STEP_CVT will be 1). 6395 - INTERM_TYPES contains the intermediate type required to perform the 6396 widening operation (short in the above example). */ 6397 6398bool 6399supportable_widening_operation (enum tree_code code, gimple stmt, 6400 tree vectype_out, tree vectype_in, 6401 enum tree_code *code1, enum tree_code *code2, 6402 int *multi_step_cvt, 6403 vec<tree> *interm_types) 6404{ 6405 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 6406 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); 6407 struct loop *vect_loop = NULL; 6408 enum machine_mode vec_mode; 6409 enum insn_code icode1, icode2; 6410 optab optab1, optab2; 6411 tree vectype = vectype_in; 6412 tree wide_vectype = vectype_out; 6413 enum tree_code c1, c2; 6414 int i; 6415 tree prev_type, intermediate_type; 6416 enum machine_mode intermediate_mode, prev_mode; 6417 optab optab3, optab4; 6418 6419 *multi_step_cvt = 0; 6420 if (loop_info) 6421 vect_loop = LOOP_VINFO_LOOP (loop_info); 6422 6423 switch (code) 6424 { 6425 case WIDEN_MULT_EXPR: 6426 /* The result of a vectorized widening operation usually requires 6427 two vectors (because the widened results do not fit into one vector). 6428 The generated vector results would normally be expected to be 6429 generated in the same order as in the original scalar computation, 6430 i.e. if 8 results are generated in each vector iteration, they are 6431 to be organized as follows: 6432 vect1: [res1,res2,res3,res4], 6433 vect2: [res5,res6,res7,res8]. 6434 6435 However, in the special case that the result of the widening 6436 operation is used in a reduction computation only, the order doesn't 6437 matter (because when vectorizing a reduction we change the order of 6438 the computation). Some targets can take advantage of this and 6439 generate more efficient code. For example, targets like Altivec, 6440 that support widen_mult using a sequence of {mult_even,mult_odd} 6441 generate the following vectors: 6442 vect1: [res1,res3,res5,res7], 6443 vect2: [res2,res4,res6,res8]. 6444 6445 When vectorizing outer-loops, we execute the inner-loop sequentially 6446 (each vectorized inner-loop iteration contributes to VF outer-loop 6447 iterations in parallel). We therefore don't allow to change the 6448 order of the computation in the inner-loop during outer-loop 6449 vectorization. */ 6450 /* TODO: Another case in which order doesn't *really* matter is when we 6451 widen and then contract again, e.g. (short)((int)x * y >> 8). 6452 Normally, pack_trunc performs an even/odd permute, whereas the 6453 repack from an even/odd expansion would be an interleave, which 6454 would be significantly simpler for e.g. AVX2. */ 6455 /* In any case, in order to avoid duplicating the code below, recurse 6456 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values 6457 are properly set up for the caller. If we fail, we'll continue with 6458 a VEC_WIDEN_MULT_LO/HI_EXPR check. */ 6459 if (vect_loop 6460 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction 6461 && !nested_in_vect_loop_p (vect_loop, stmt) 6462 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR, 6463 stmt, vectype_out, vectype_in, 6464 code1, code2, multi_step_cvt, 6465 interm_types)) 6466 { 6467 /* Elements in a vector with vect_used_by_reduction property cannot 6468 be reordered if the use chain with this property does not have the 6469 same operation. One such an example is s += a * b, where elements 6470 in a and b cannot be reordered. Here we check if the vector defined 6471 by STMT is only directly used in the reduction statement. */ 6472 tree lhs = gimple_assign_lhs (stmt); 6473 use_operand_p dummy; 6474 gimple use_stmt; 6475 stmt_vec_info use_stmt_info = NULL; 6476 if (single_imm_use (lhs, &dummy, &use_stmt) 6477 && (use_stmt_info = vinfo_for_stmt (use_stmt)) 6478 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def) 6479 return true; 6480 } 6481 c1 = VEC_WIDEN_MULT_LO_EXPR; 6482 c2 = VEC_WIDEN_MULT_HI_EXPR; 6483 break; 6484 6485 case VEC_WIDEN_MULT_EVEN_EXPR: 6486 /* Support the recursion induced just above. */ 6487 c1 = VEC_WIDEN_MULT_EVEN_EXPR; 6488 c2 = VEC_WIDEN_MULT_ODD_EXPR; 6489 break; 6490 6491 case WIDEN_LSHIFT_EXPR: 6492 c1 = VEC_WIDEN_LSHIFT_LO_EXPR; 6493 c2 = VEC_WIDEN_LSHIFT_HI_EXPR; 6494 break; 6495 6496 CASE_CONVERT: 6497 c1 = VEC_UNPACK_LO_EXPR; 6498 c2 = VEC_UNPACK_HI_EXPR; 6499 break; 6500 6501 case FLOAT_EXPR: 6502 c1 = VEC_UNPACK_FLOAT_LO_EXPR; 6503 c2 = VEC_UNPACK_FLOAT_HI_EXPR; 6504 break; 6505 6506 case FIX_TRUNC_EXPR: 6507 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/ 6508 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for 6509 computing the operation. */ 6510 return false; 6511 6512 default: 6513 gcc_unreachable (); 6514 } 6515 6516 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR) 6517 { 6518 enum tree_code ctmp = c1; 6519 c1 = c2; 6520 c2 = ctmp; 6521 } 6522 6523 if (code == FIX_TRUNC_EXPR) 6524 { 6525 /* The signedness is determined from output operand. */ 6526 optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 6527 optab2 = optab_for_tree_code (c2, vectype_out, optab_default); 6528 } 6529 else 6530 { 6531 optab1 = optab_for_tree_code (c1, vectype, optab_default); 6532 optab2 = optab_for_tree_code (c2, vectype, optab_default); 6533 } 6534 6535 if (!optab1 || !optab2) 6536 return false; 6537 6538 vec_mode = TYPE_MODE (vectype); 6539 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing 6540 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing) 6541 return false; 6542 6543 *code1 = c1; 6544 *code2 = c2; 6545 6546 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 6547 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 6548 return true; 6549 6550 /* Check if it's a multi-step conversion that can be done using intermediate 6551 types. */ 6552 6553 prev_type = vectype; 6554 prev_mode = vec_mode; 6555 6556 if (!CONVERT_EXPR_CODE_P (code)) 6557 return false; 6558 6559 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 6560 intermediate steps in promotion sequence. We try 6561 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do 6562 not. */ 6563 interm_types->create (MAX_INTERM_CVT_STEPS); 6564 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 6565 { 6566 intermediate_mode = insn_data[icode1].operand[0].mode; 6567 intermediate_type 6568 = lang_hooks.types.type_for_mode (intermediate_mode, 6569 TYPE_UNSIGNED (prev_type)); 6570 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); 6571 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); 6572 6573 if (!optab3 || !optab4 6574 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing 6575 || insn_data[icode1].operand[0].mode != intermediate_mode 6576 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing 6577 || insn_data[icode2].operand[0].mode != intermediate_mode 6578 || ((icode1 = optab_handler (optab3, intermediate_mode)) 6579 == CODE_FOR_nothing) 6580 || ((icode2 = optab_handler (optab4, intermediate_mode)) 6581 == CODE_FOR_nothing)) 6582 break; 6583 6584 interm_types->quick_push (intermediate_type); 6585 (*multi_step_cvt)++; 6586 6587 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 6588 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 6589 return true; 6590 6591 prev_type = intermediate_type; 6592 prev_mode = intermediate_mode; 6593 } 6594 6595 interm_types->release (); 6596 return false; 6597} 6598 6599 6600/* Function supportable_narrowing_operation 6601 6602 Check whether an operation represented by the code CODE is a 6603 narrowing operation that is supported by the target platform in 6604 vector form (i.e., when operating on arguments of type VECTYPE_IN 6605 and producing a result of type VECTYPE_OUT). 6606 6607 Narrowing operations we currently support are NOP (CONVERT) and 6608 FIX_TRUNC. This function checks if these operations are supported by 6609 the target platform directly via vector tree-codes. 6610 6611 Output: 6612 - CODE1 is the code of a vector operation to be used when 6613 vectorizing the operation, if available. 6614 - MULTI_STEP_CVT determines the number of required intermediate steps in 6615 case of multi-step conversion (like int->short->char - in that case 6616 MULTI_STEP_CVT will be 1). 6617 - INTERM_TYPES contains the intermediate type required to perform the 6618 narrowing operation (short in the above example). */ 6619 6620bool 6621supportable_narrowing_operation (enum tree_code code, 6622 tree vectype_out, tree vectype_in, 6623 enum tree_code *code1, int *multi_step_cvt, 6624 vec<tree> *interm_types) 6625{ 6626 enum machine_mode vec_mode; 6627 enum insn_code icode1; 6628 optab optab1, interm_optab; 6629 tree vectype = vectype_in; 6630 tree narrow_vectype = vectype_out; 6631 enum tree_code c1; 6632 tree intermediate_type; 6633 enum machine_mode intermediate_mode, prev_mode; 6634 int i; 6635 bool uns; 6636 6637 *multi_step_cvt = 0; 6638 switch (code) 6639 { 6640 CASE_CONVERT: 6641 c1 = VEC_PACK_TRUNC_EXPR; 6642 break; 6643 6644 case FIX_TRUNC_EXPR: 6645 c1 = VEC_PACK_FIX_TRUNC_EXPR; 6646 break; 6647 6648 case FLOAT_EXPR: 6649 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR 6650 tree code and optabs used for computing the operation. */ 6651 return false; 6652 6653 default: 6654 gcc_unreachable (); 6655 } 6656 6657 if (code == FIX_TRUNC_EXPR) 6658 /* The signedness is determined from output operand. */ 6659 optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 6660 else 6661 optab1 = optab_for_tree_code (c1, vectype, optab_default); 6662 6663 if (!optab1) 6664 return false; 6665 6666 vec_mode = TYPE_MODE (vectype); 6667 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing) 6668 return false; 6669 6670 *code1 = c1; 6671 6672 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 6673 return true; 6674 6675 /* Check if it's a multi-step conversion that can be done using intermediate 6676 types. */ 6677 prev_mode = vec_mode; 6678 if (code == FIX_TRUNC_EXPR) 6679 uns = TYPE_UNSIGNED (vectype_out); 6680 else 6681 uns = TYPE_UNSIGNED (vectype); 6682 6683 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer 6684 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more 6685 costly than signed. */ 6686 if (code == FIX_TRUNC_EXPR && uns) 6687 { 6688 enum insn_code icode2; 6689 6690 intermediate_type 6691 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0); 6692 interm_optab 6693 = optab_for_tree_code (c1, intermediate_type, optab_default); 6694 if (interm_optab != unknown_optab 6695 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing 6696 && insn_data[icode1].operand[0].mode 6697 == insn_data[icode2].operand[0].mode) 6698 { 6699 uns = false; 6700 optab1 = interm_optab; 6701 icode1 = icode2; 6702 } 6703 } 6704 6705 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 6706 intermediate steps in promotion sequence. We try 6707 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */ 6708 interm_types->create (MAX_INTERM_CVT_STEPS); 6709 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 6710 { 6711 intermediate_mode = insn_data[icode1].operand[0].mode; 6712 intermediate_type 6713 = lang_hooks.types.type_for_mode (intermediate_mode, uns); 6714 interm_optab 6715 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type, 6716 optab_default); 6717 if (!interm_optab 6718 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing) 6719 || insn_data[icode1].operand[0].mode != intermediate_mode 6720 || ((icode1 = optab_handler (interm_optab, intermediate_mode)) 6721 == CODE_FOR_nothing)) 6722 break; 6723 6724 interm_types->quick_push (intermediate_type); 6725 (*multi_step_cvt)++; 6726 6727 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 6728 return true; 6729 6730 prev_mode = intermediate_mode; 6731 optab1 = interm_optab; 6732 } 6733 6734 interm_types->release (); 6735 return false; 6736} 6737