1/* Analysis Utilities for Loop Vectorization. 2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc. 3 Contributed by Dorit Nuzman <dorit@il.ibm.com> 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21#include "config.h" 22#include "system.h" 23#include "coretypes.h" 24#include "tm.h" 25#include "ggc.h" 26#include "tree.h" 27#include "target.h" 28#include "basic-block.h" 29#include "diagnostic.h" 30#include "tree-flow.h" 31#include "tree-dump.h" 32#include "cfgloop.h" 33#include "expr.h" 34#include "optabs.h" 35#include "params.h" 36#include "tree-data-ref.h" 37#include "tree-vectorizer.h" 38#include "recog.h" 39#include "toplev.h" 40 41/* Function prototypes */ 42static void vect_pattern_recog_1 43 (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator); 44static bool widened_name_p (tree, gimple, tree *, gimple *); 45 46/* Pattern recognition functions */ 47static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *); 48static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *); 49static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *); 50static gimple vect_recog_pow_pattern (gimple, tree *, tree *); 51static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { 52 vect_recog_widen_mult_pattern, 53 vect_recog_widen_sum_pattern, 54 vect_recog_dot_prod_pattern, 55 vect_recog_pow_pattern}; 56 57 58/* Function widened_name_p 59 60 Check whether NAME, an ssa-name used in USE_STMT, 61 is a result of a type-promotion, such that: 62 DEF_STMT: NAME = NOP (name0) 63 where the type of name0 (HALF_TYPE) is smaller than the type of NAME. 64*/ 65 66static bool 67widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt) 68{ 69 tree dummy; 70 gimple dummy_gimple; 71 loop_vec_info loop_vinfo; 72 stmt_vec_info stmt_vinfo; 73 tree type = TREE_TYPE (name); 74 tree oprnd0; 75 enum vect_def_type dt; 76 tree def; 77 78 stmt_vinfo = vinfo_for_stmt (use_stmt); 79 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 80 81 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt)) 82 return false; 83 84 if (dt != vect_internal_def 85 && dt != vect_external_def && dt != vect_constant_def) 86 return false; 87 88 if (! *def_stmt) 89 return false; 90 91 if (!is_gimple_assign (*def_stmt)) 92 return false; 93 94 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR) 95 return false; 96 97 oprnd0 = gimple_assign_rhs1 (*def_stmt); 98 99 *half_type = TREE_TYPE (oprnd0); 100 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) 101 || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) 102 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) 103 return false; 104 105 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy, 106 &dt)) 107 return false; 108 109 return true; 110} 111 112/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT 113 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */ 114 115static tree 116vect_recog_temp_ssa_var (tree type, gimple stmt) 117{ 118 tree var = create_tmp_var (type, "patt"); 119 120 add_referenced_var (var); 121 var = make_ssa_name (var, stmt); 122 return var; 123} 124 125/* Function vect_recog_dot_prod_pattern 126 127 Try to find the following pattern: 128 129 type x_t, y_t; 130 TYPE1 prod; 131 TYPE2 sum = init; 132 loop: 133 sum_0 = phi <init, sum_1> 134 S1 x_t = ... 135 S2 y_t = ... 136 S3 x_T = (TYPE1) x_t; 137 S4 y_T = (TYPE1) y_t; 138 S5 prod = x_T * y_T; 139 [S6 prod = (TYPE2) prod; #optional] 140 S7 sum_1 = prod + sum_0; 141 142 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the 143 same size of 'TYPE1' or bigger. This is a special case of a reduction 144 computation. 145 146 Input: 147 148 * LAST_STMT: A stmt from which the pattern search begins. In the example, 149 when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be 150 detected. 151 152 Output: 153 154 * TYPE_IN: The type of the input arguments to the pattern. 155 156 * TYPE_OUT: The type of the output of this pattern. 157 158 * Return value: A new stmt that will be used to replace the sequence of 159 stmts that constitute the pattern. In this case it will be: 160 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0> 161 162 Note: The dot-prod idiom is a widening reduction pattern that is 163 vectorized without preserving all the intermediate results. It 164 produces only N/2 (widened) results (by summing up pairs of 165 intermediate results) rather than all N results. Therefore, we 166 cannot allow this pattern when we want to get all the results and in 167 the correct order (as is the case when this computation is in an 168 inner-loop nested in an outer-loop that us being vectorized). */ 169 170static gimple 171vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out) 172{ 173 gimple stmt; 174 tree oprnd0, oprnd1; 175 tree oprnd00, oprnd01; 176 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 177 tree type, half_type; 178 gimple pattern_stmt; 179 tree prod_type; 180 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 181 struct loop *loop = LOOP_VINFO_LOOP (loop_info); 182 tree var, rhs; 183 184 if (!is_gimple_assign (last_stmt)) 185 return NULL; 186 187 type = gimple_expr_type (last_stmt); 188 189 /* Look for the following pattern 190 DX = (TYPE1) X; 191 DY = (TYPE1) Y; 192 DPROD = DX * DY; 193 DDPROD = (TYPE2) DPROD; 194 sum_1 = DDPROD + sum_0; 195 In which 196 - DX is double the size of X 197 - DY is double the size of Y 198 - DX, DY, DPROD all have the same type 199 - sum is the same size of DPROD or bigger 200 - sum has been recognized as a reduction variable. 201 202 This is equivalent to: 203 DPROD = X w* Y; #widen mult 204 sum_1 = DPROD w+ sum_0; #widen summation 205 or 206 DPROD = X w* Y; #widen mult 207 sum_1 = DPROD + sum_0; #summation 208 */ 209 210 /* Starting from LAST_STMT, follow the defs of its uses in search 211 of the above pattern. */ 212 213 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 214 return NULL; 215 216 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 217 { 218 /* Has been detected as widening-summation? */ 219 220 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 221 type = gimple_expr_type (stmt); 222 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR) 223 return NULL; 224 oprnd0 = gimple_assign_rhs1 (stmt); 225 oprnd1 = gimple_assign_rhs2 (stmt); 226 half_type = TREE_TYPE (oprnd0); 227 } 228 else 229 { 230 gimple def_stmt; 231 232 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) 233 return NULL; 234 oprnd0 = gimple_assign_rhs1 (last_stmt); 235 oprnd1 = gimple_assign_rhs2 (last_stmt); 236 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 237 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 238 return NULL; 239 stmt = last_stmt; 240 241 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt)) 242 { 243 stmt = def_stmt; 244 oprnd0 = gimple_assign_rhs1 (stmt); 245 } 246 else 247 half_type = type; 248 } 249 250 /* So far so good. Since last_stmt was detected as a (summation) reduction, 251 we know that oprnd1 is the reduction variable (defined by a loop-header 252 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 253 Left to check that oprnd0 is defined by a (widen_)mult_expr */ 254 255 prod_type = half_type; 256 stmt = SSA_NAME_DEF_STMT (oprnd0); 257 258 /* It could not be the dot_prod pattern if the stmt is outside the loop. */ 259 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) 260 return NULL; 261 262 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 263 inside the loop (in case we are analyzing an outer-loop). */ 264 if (!is_gimple_assign (stmt)) 265 return NULL; 266 stmt_vinfo = vinfo_for_stmt (stmt); 267 gcc_assert (stmt_vinfo); 268 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) 269 return NULL; 270 if (gimple_assign_rhs_code (stmt) != MULT_EXPR) 271 return NULL; 272 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 273 { 274 /* Has been detected as a widening multiplication? */ 275 276 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 277 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR) 278 return NULL; 279 stmt_vinfo = vinfo_for_stmt (stmt); 280 gcc_assert (stmt_vinfo); 281 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def); 282 oprnd00 = gimple_assign_rhs1 (stmt); 283 oprnd01 = gimple_assign_rhs2 (stmt); 284 } 285 else 286 { 287 tree half_type0, half_type1; 288 gimple def_stmt; 289 tree oprnd0, oprnd1; 290 291 oprnd0 = gimple_assign_rhs1 (stmt); 292 oprnd1 = gimple_assign_rhs2 (stmt); 293 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) 294 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) 295 return NULL; 296 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt)) 297 return NULL; 298 oprnd00 = gimple_assign_rhs1 (def_stmt); 299 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt)) 300 return NULL; 301 oprnd01 = gimple_assign_rhs1 (def_stmt); 302 if (!types_compatible_p (half_type0, half_type1)) 303 return NULL; 304 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2) 305 return NULL; 306 } 307 308 half_type = TREE_TYPE (oprnd00); 309 *type_in = half_type; 310 *type_out = type; 311 312 /* Pattern detected. Create a stmt to be used to replace the pattern: */ 313 var = vect_recog_temp_ssa_var (type, NULL); 314 rhs = build3 (DOT_PROD_EXPR, type, oprnd00, oprnd01, oprnd1), 315 pattern_stmt = gimple_build_assign (var, rhs); 316 317 if (vect_print_dump_info (REPORT_DETAILS)) 318 { 319 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: "); 320 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 321 } 322 323 /* We don't allow changing the order of the computation in the inner-loop 324 when doing outer-loop vectorization. */ 325 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); 326 327 return pattern_stmt; 328} 329 330/* Function vect_recog_widen_mult_pattern 331 332 Try to find the following pattern: 333 334 type a_t, b_t; 335 TYPE a_T, b_T, prod_T; 336 337 S1 a_t = ; 338 S2 b_t = ; 339 S3 a_T = (TYPE) a_t; 340 S4 b_T = (TYPE) b_t; 341 S5 prod_T = a_T * b_T; 342 343 where type 'TYPE' is at least double the size of type 'type'. 344 345 Input: 346 347 * LAST_STMT: A stmt from which the pattern search begins. In the example, 348 when this function is called with S5, the pattern {S3,S4,S5} is be detected. 349 350 Output: 351 352 * TYPE_IN: The type of the input arguments to the pattern. 353 354 * TYPE_OUT: The type of the output of this pattern. 355 356 * Return value: A new stmt that will be used to replace the sequence of 357 stmts that constitute the pattern. In this case it will be: 358 WIDEN_MULT <a_t, b_t> 359*/ 360 361static gimple 362vect_recog_widen_mult_pattern (gimple last_stmt, 363 tree *type_in, 364 tree *type_out) 365{ 366 gimple def_stmt0, def_stmt1; 367 tree oprnd0, oprnd1; 368 tree type, half_type0, half_type1; 369 gimple pattern_stmt; 370 tree vectype; 371 tree dummy; 372 tree var; 373 enum tree_code dummy_code; 374 int dummy_int; 375 VEC (tree, heap) *dummy_vec; 376 377 if (!is_gimple_assign (last_stmt)) 378 return NULL; 379 380 type = gimple_expr_type (last_stmt); 381 382 /* Starting from LAST_STMT, follow the defs of its uses in search 383 of the above pattern. */ 384 385 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) 386 return NULL; 387 388 oprnd0 = gimple_assign_rhs1 (last_stmt); 389 oprnd1 = gimple_assign_rhs2 (last_stmt); 390 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 391 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 392 return NULL; 393 394 /* Check argument 0 */ 395 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0)) 396 return NULL; 397 oprnd0 = gimple_assign_rhs1 (def_stmt0); 398 399 /* Check argument 1 */ 400 if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1)) 401 return NULL; 402 oprnd1 = gimple_assign_rhs1 (def_stmt1); 403 404 if (!types_compatible_p (half_type0, half_type1)) 405 return NULL; 406 407 /* Pattern detected. */ 408 if (vect_print_dump_info (REPORT_DETAILS)) 409 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: "); 410 411 /* Check target support */ 412 vectype = get_vectype_for_scalar_type (half_type0); 413 if (!vectype 414 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, vectype, 415 &dummy, &dummy, &dummy_code, 416 &dummy_code, &dummy_int, &dummy_vec)) 417 return NULL; 418 419 *type_in = vectype; 420 *type_out = NULL_TREE; 421 422 /* Pattern supported. Create a stmt to be used to replace the pattern: */ 423 var = vect_recog_temp_ssa_var (type, NULL); 424 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0, 425 oprnd1); 426 SSA_NAME_DEF_STMT (var) = pattern_stmt; 427 428 if (vect_print_dump_info (REPORT_DETAILS)) 429 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 430 431 return pattern_stmt; 432} 433 434 435/* Function vect_recog_pow_pattern 436 437 Try to find the following pattern: 438 439 x = POW (y, N); 440 441 with POW being one of pow, powf, powi, powif and N being 442 either 2 or 0.5. 443 444 Input: 445 446 * LAST_STMT: A stmt from which the pattern search begins. 447 448 Output: 449 450 * TYPE_IN: The type of the input arguments to the pattern. 451 452 * TYPE_OUT: The type of the output of this pattern. 453 454 * Return value: A new stmt that will be used to replace the sequence of 455 stmts that constitute the pattern. In this case it will be: 456 x = x * x 457 or 458 x = sqrt (x) 459*/ 460 461static gimple 462vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out) 463{ 464 tree fn, base, exp = NULL; 465 gimple stmt; 466 tree var; 467 468 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) 469 return NULL; 470 471 fn = gimple_call_fndecl (last_stmt); 472 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) 473 return NULL; 474 475 switch (DECL_FUNCTION_CODE (fn)) 476 { 477 case BUILT_IN_POWIF: 478 case BUILT_IN_POWI: 479 case BUILT_IN_POWF: 480 case BUILT_IN_POW: 481 base = gimple_call_arg (last_stmt, 0); 482 exp = gimple_call_arg (last_stmt, 1); 483 if (TREE_CODE (exp) != REAL_CST 484 && TREE_CODE (exp) != INTEGER_CST) 485 return NULL; 486 break; 487 488 default: 489 return NULL; 490 } 491 492 /* We now have a pow or powi builtin function call with a constant 493 exponent. */ 494 495 *type_out = NULL_TREE; 496 497 /* Catch squaring. */ 498 if ((host_integerp (exp, 0) 499 && tree_low_cst (exp, 0) == 2) 500 || (TREE_CODE (exp) == REAL_CST 501 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2))) 502 { 503 *type_in = TREE_TYPE (base); 504 505 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); 506 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base); 507 SSA_NAME_DEF_STMT (var) = stmt; 508 return stmt; 509 } 510 511 /* Catch square root. */ 512 if (TREE_CODE (exp) == REAL_CST 513 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf)) 514 { 515 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT); 516 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base)); 517 if (*type_in) 518 { 519 gimple stmt = gimple_build_call (newfn, 1, base); 520 if (vectorizable_function (stmt, *type_in, *type_in) 521 != NULL_TREE) 522 { 523 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt); 524 gimple_call_set_lhs (stmt, var); 525 return stmt; 526 } 527 } 528 } 529 530 return NULL; 531} 532 533 534/* Function vect_recog_widen_sum_pattern 535 536 Try to find the following pattern: 537 538 type x_t; 539 TYPE x_T, sum = init; 540 loop: 541 sum_0 = phi <init, sum_1> 542 S1 x_t = *p; 543 S2 x_T = (TYPE) x_t; 544 S3 sum_1 = x_T + sum_0; 545 546 where type 'TYPE' is at least double the size of type 'type', i.e - we're 547 summing elements of type 'type' into an accumulator of type 'TYPE'. This is 548 a special case of a reduction computation. 549 550 Input: 551 552 * LAST_STMT: A stmt from which the pattern search begins. In the example, 553 when this function is called with S3, the pattern {S2,S3} will be detected. 554 555 Output: 556 557 * TYPE_IN: The type of the input arguments to the pattern. 558 559 * TYPE_OUT: The type of the output of this pattern. 560 561 * Return value: A new stmt that will be used to replace the sequence of 562 stmts that constitute the pattern. In this case it will be: 563 WIDEN_SUM <x_t, sum_0> 564 565 Note: The widening-sum idiom is a widening reduction pattern that is 566 vectorized without preserving all the intermediate results. It 567 produces only N/2 (widened) results (by summing up pairs of 568 intermediate results) rather than all N results. Therefore, we 569 cannot allow this pattern when we want to get all the results and in 570 the correct order (as is the case when this computation is in an 571 inner-loop nested in an outer-loop that us being vectorized). */ 572 573static gimple 574vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out) 575{ 576 gimple stmt; 577 tree oprnd0, oprnd1; 578 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 579 tree type, half_type; 580 gimple pattern_stmt; 581 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 582 struct loop *loop = LOOP_VINFO_LOOP (loop_info); 583 tree var; 584 585 if (!is_gimple_assign (last_stmt)) 586 return NULL; 587 588 type = gimple_expr_type (last_stmt); 589 590 /* Look for the following pattern 591 DX = (TYPE) X; 592 sum_1 = DX + sum_0; 593 In which DX is at least double the size of X, and sum_1 has been 594 recognized as a reduction variable. 595 */ 596 597 /* Starting from LAST_STMT, follow the defs of its uses in search 598 of the above pattern. */ 599 600 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 601 return NULL; 602 603 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) 604 return NULL; 605 606 oprnd0 = gimple_assign_rhs1 (last_stmt); 607 oprnd1 = gimple_assign_rhs2 (last_stmt); 608 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 609 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 610 return NULL; 611 612 /* So far so good. Since last_stmt was detected as a (summation) reduction, 613 we know that oprnd1 is the reduction variable (defined by a loop-header 614 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 615 Left to check that oprnd0 is defined by a cast from type 'type' to type 616 'TYPE'. */ 617 618 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt)) 619 return NULL; 620 621 oprnd0 = gimple_assign_rhs1 (stmt); 622 *type_in = half_type; 623 *type_out = type; 624 625 /* Pattern detected. Create a stmt to be used to replace the pattern: */ 626 var = vect_recog_temp_ssa_var (type, NULL); 627 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var, 628 oprnd0, oprnd1); 629 SSA_NAME_DEF_STMT (var) = pattern_stmt; 630 631 if (vect_print_dump_info (REPORT_DETAILS)) 632 { 633 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: "); 634 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 635 } 636 637 /* We don't allow changing the order of the computation in the inner-loop 638 when doing outer-loop vectorization. */ 639 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); 640 641 return pattern_stmt; 642} 643 644 645/* Function vect_pattern_recog_1 646 647 Input: 648 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain 649 computation pattern. 650 STMT: A stmt from which the pattern search should start. 651 652 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an 653 expression that computes the same functionality and can be used to 654 replace the sequence of stmts that are involved in the pattern. 655 656 Output: 657 This function checks if the expression returned by PATTERN_RECOG_FUNC is 658 supported in vector form by the target. We use 'TYPE_IN' to obtain the 659 relevant vector type. If 'TYPE_IN' is already a vector type, then this 660 indicates that target support had already been checked by PATTERN_RECOG_FUNC. 661 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits 662 to the available target pattern. 663 664 This function also does some bookkeeping, as explained in the documentation 665 for vect_recog_pattern. */ 666 667static void 668vect_pattern_recog_1 ( 669 gimple (* vect_recog_func) (gimple, tree *, tree *), 670 gimple_stmt_iterator si) 671{ 672 gimple stmt = gsi_stmt (si), pattern_stmt; 673 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 674 stmt_vec_info pattern_stmt_info; 675 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 676 tree pattern_vectype; 677 tree type_in, type_out; 678 enum tree_code code; 679 680 pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out); 681 if (!pattern_stmt) 682 return; 683 684 if (VECTOR_MODE_P (TYPE_MODE (type_in))) 685 { 686 /* No need to check target support (already checked by the pattern 687 recognition function). */ 688 pattern_vectype = type_in; 689 } 690 else 691 { 692 enum machine_mode vec_mode; 693 enum insn_code icode; 694 optab optab; 695 696 /* Check target support */ 697 pattern_vectype = get_vectype_for_scalar_type (type_in); 698 if (!pattern_vectype) 699 return; 700 701 if (is_gimple_assign (pattern_stmt)) 702 code = gimple_assign_rhs_code (pattern_stmt); 703 else 704 { 705 gcc_assert (is_gimple_call (pattern_stmt)); 706 code = CALL_EXPR; 707 } 708 709 optab = optab_for_tree_code (code, pattern_vectype, optab_default); 710 vec_mode = TYPE_MODE (pattern_vectype); 711 if (!optab 712 || (icode = optab_handler (optab, vec_mode)->insn_code) == 713 CODE_FOR_nothing 714 || (type_out 715 && (!get_vectype_for_scalar_type (type_out) 716 || (insn_data[icode].operand[0].mode != 717 TYPE_MODE (get_vectype_for_scalar_type (type_out)))))) 718 return; 719 } 720 721 /* Found a vectorizable pattern. */ 722 if (vect_print_dump_info (REPORT_DETAILS)) 723 { 724 fprintf (vect_dump, "pattern recognized: "); 725 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 726 } 727 728 /* Mark the stmts that are involved in the pattern. */ 729 gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT); 730 set_vinfo_for_stmt (pattern_stmt, 731 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); 732 pattern_stmt_info = vinfo_for_stmt (pattern_stmt); 733 734 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; 735 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info); 736 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; 737 STMT_VINFO_IN_PATTERN_P (stmt_info) = true; 738 STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt; 739 740 return; 741} 742 743 744/* Function vect_pattern_recog 745 746 Input: 747 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for 748 computation idioms. 749 750 Output - for each computation idiom that is detected we insert a new stmt 751 that provides the same functionality and that can be vectorized. We 752 also record some information in the struct_stmt_info of the relevant 753 stmts, as explained below: 754 755 At the entry to this function we have the following stmts, with the 756 following initial value in the STMT_VINFO fields: 757 758 stmt in_pattern_p related_stmt vec_stmt 759 S1: a_i = .... - - - 760 S2: a_2 = ..use(a_i).. - - - 761 S3: a_1 = ..use(a_2).. - - - 762 S4: a_0 = ..use(a_1).. - - - 763 S5: ... = ..use(a_0).. - - - 764 765 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be 766 represented by a single stmt. We then: 767 - create a new stmt S6 that will replace the pattern. 768 - insert the new stmt S6 before the last stmt in the pattern 769 - fill in the STMT_VINFO fields as follows: 770 771 in_pattern_p related_stmt vec_stmt 772 S1: a_i = .... - - - 773 S2: a_2 = ..use(a_i).. - - - 774 S3: a_1 = ..use(a_2).. - - - 775 > S6: a_new = .... - S4 - 776 S4: a_0 = ..use(a_1).. true S6 - 777 S5: ... = ..use(a_0).. - - - 778 779 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point 780 to each other through the RELATED_STMT field). 781 782 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead 783 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will 784 remain irrelevant unless used by stmts other than S4. 785 786 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} 787 (because they are marked as irrelevant). It will vectorize S6, and record 788 a pointer to the new vector stmt VS6 both from S6 (as usual), and also 789 from S4. We do that so that when we get to vectorizing stmts that use the 790 def of S4 (like S5 that uses a_0), we'll know where to take the relevant 791 vector-def from. S4 will be skipped, and S5 will be vectorized as usual: 792 793 in_pattern_p related_stmt vec_stmt 794 S1: a_i = .... - - - 795 S2: a_2 = ..use(a_i).. - - - 796 S3: a_1 = ..use(a_2).. - - - 797 > VS6: va_new = .... - - - 798 S6: a_new = .... - S4 VS6 799 S4: a_0 = ..use(a_1).. true S6 VS6 800 > VS5: ... = ..vuse(va_new).. - - - 801 S5: ... = ..use(a_0).. - - - 802 803 DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used 804 elsewhere), and we'll end up with: 805 806 VS6: va_new = .... 807 VS5: ... = ..vuse(va_new).. 808 809 If vectorization does not succeed, DCE will clean S6 away (its def is 810 not used), and we'll end up with the original sequence. 811*/ 812 813void 814vect_pattern_recog (loop_vec_info loop_vinfo) 815{ 816 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 817 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 818 unsigned int nbbs = loop->num_nodes; 819 gimple_stmt_iterator si; 820 unsigned int i, j; 821 gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); 822 823 if (vect_print_dump_info (REPORT_DETAILS)) 824 fprintf (vect_dump, "=== vect_pattern_recog ==="); 825 826 /* Scan through the loop stmts, applying the pattern recognition 827 functions starting at each stmt visited: */ 828 for (i = 0; i < nbbs; i++) 829 { 830 basic_block bb = bbs[i]; 831 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 832 { 833 /* Scan over all generic vect_recog_xxx_pattern functions. */ 834 for (j = 0; j < NUM_PATTERNS; j++) 835 { 836 vect_recog_func_ptr = vect_vect_recog_func_ptrs[j]; 837 vect_pattern_recog_1 (vect_recog_func_ptr, si); 838 } 839 } 840 } 841} 842