1/* Analysis Utilities for Loop Vectorization. 2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc. 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to the Free 19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 2002110-1301, USA. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "tm.h" 26#include "ggc.h" 27#include "tree.h"
| 1/* Analysis Utilities for Loop Vectorization. 2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc. 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to the Free 19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 2002110-1301, USA. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "tm.h" 26#include "ggc.h" 27#include "tree.h"
|
| 28#include "target.h"
|
28#include "basic-block.h" 29#include "diagnostic.h" 30#include "tree-flow.h" 31#include "tree-dump.h" 32#include "timevar.h" 33#include "cfgloop.h" 34#include "expr.h" 35#include "optabs.h" 36#include "params.h" 37#include "tree-chrec.h" 38#include "tree-data-ref.h" 39#include "tree-scalar-evolution.h" 40#include "tree-vectorizer.h" 41 42/* Main analysis functions. */ 43static loop_vec_info vect_analyze_loop_form (struct loop *); 44static bool vect_analyze_data_refs (loop_vec_info); 45static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); 46static void vect_analyze_scalar_cycles (loop_vec_info); 47static bool vect_analyze_data_ref_accesses (loop_vec_info); 48static bool vect_analyze_data_ref_dependences (loop_vec_info); 49static bool vect_analyze_data_refs_alignment (loop_vec_info); 50static bool vect_compute_data_refs_alignment (loop_vec_info); 51static bool vect_enhance_data_refs_alignment (loop_vec_info); 52static bool vect_analyze_operations (loop_vec_info); 53static bool vect_determine_vectorization_factor (loop_vec_info); 54 55/* Utility functions for the analyses. */ 56static bool exist_non_indexing_operands_for_use_p (tree, tree); 57static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool); 58static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *); 59static tree vect_get_loop_niters (struct loop *, tree *); 60static bool vect_analyze_data_ref_dependence 61 (struct data_dependence_relation *, loop_vec_info); 62static bool vect_compute_data_ref_alignment (struct data_reference *); 63static bool vect_analyze_data_ref_access (struct data_reference *); 64static bool vect_can_advance_ivs_p (loop_vec_info); 65static void vect_update_misalignment_for_peel 66 (struct data_reference *, struct data_reference *, int npeel); 67 68 69/* Function vect_determine_vectorization_factor 70 71 Determine the vectorization factor (VF). VF is the number of data elements 72 that are operated upon in parallel in a single iteration of the vectorized 73 loop. For example, when vectorizing a loop that operates on 4byte elements, 74 on a target with vector size (VS) 16byte, the VF is set to 4, since 4 75 elements can fit in a single vector register. 76 77 We currently support vectorization of loops in which all types operated upon 78 are of the same size. Therefore this function currently sets VF according to 79 the size of the types operated upon, and fails if there are multiple sizes 80 in the loop. 81 82 VF is also the factor by which the loop iterations are strip-mined, e.g.: 83 original loop: 84 for (i=0; i<N; i++){ 85 a[i] = b[i] + c[i]; 86 } 87 88 vectorized loop: 89 for (i=0; i<N; i+=VF){ 90 a[i:VF] = b[i:VF] + c[i:VF]; 91 } 92*/ 93 94static bool 95vect_determine_vectorization_factor (loop_vec_info loop_vinfo) 96{ 97 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 98 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 99 int nbbs = loop->num_nodes; 100 block_stmt_iterator si; 101 unsigned int vectorization_factor = 0; 102 int i; 103 tree scalar_type; 104 105 if (vect_print_dump_info (REPORT_DETAILS)) 106 fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); 107 108 for (i = 0; i < nbbs; i++) 109 { 110 basic_block bb = bbs[i]; 111 112 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) 113 { 114 tree stmt = bsi_stmt (si); 115 unsigned int nunits; 116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 117 tree vectype; 118 119 if (vect_print_dump_info (REPORT_DETAILS)) 120 { 121 fprintf (vect_dump, "==> examining statement: "); 122 print_generic_expr (vect_dump, stmt, TDF_SLIM); 123 } 124 125 gcc_assert (stmt_info); 126 /* skip stmts which do not need to be vectorized. */ 127 if (!STMT_VINFO_RELEVANT_P (stmt_info) 128 && !STMT_VINFO_LIVE_P (stmt_info)) 129 { 130 if (vect_print_dump_info (REPORT_DETAILS)) 131 fprintf (vect_dump, "skip."); 132 continue; 133 } 134 135 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) 136 { 137 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 138 { 139 fprintf (vect_dump, "not vectorized: vector stmt in loop:"); 140 print_generic_expr (vect_dump, stmt, TDF_SLIM); 141 } 142 return false; 143 } 144 145 if (STMT_VINFO_VECTYPE (stmt_info)) 146 { 147 vectype = STMT_VINFO_VECTYPE (stmt_info); 148 scalar_type = TREE_TYPE (vectype); 149 } 150 else 151 { 152 if (STMT_VINFO_DATA_REF (stmt_info)) 153 scalar_type = 154 TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); 155 else if (TREE_CODE (stmt) == MODIFY_EXPR) 156 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); 157 else 158 scalar_type = TREE_TYPE (stmt); 159 160 if (vect_print_dump_info (REPORT_DETAILS)) 161 { 162 fprintf (vect_dump, "get vectype for scalar type: "); 163 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 164 } 165 166 vectype = get_vectype_for_scalar_type (scalar_type); 167 if (!vectype) 168 { 169 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 170 { 171 fprintf (vect_dump, 172 "not vectorized: unsupported data-type "); 173 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 174 } 175 return false; 176 } 177 STMT_VINFO_VECTYPE (stmt_info) = vectype; 178 } 179 180 if (vect_print_dump_info (REPORT_DETAILS)) 181 { 182 fprintf (vect_dump, "vectype: "); 183 print_generic_expr (vect_dump, vectype, TDF_SLIM); 184 } 185 186 nunits = TYPE_VECTOR_SUBPARTS (vectype); 187 if (vect_print_dump_info (REPORT_DETAILS)) 188 fprintf (vect_dump, "nunits = %d", nunits); 189 190 if (vectorization_factor) 191 { 192 /* FORNOW: don't allow mixed units. 193 This restriction will be relaxed in the future. */ 194 if (nunits != vectorization_factor) 195 { 196 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 197 fprintf (vect_dump, "not vectorized: mixed data-types"); 198 return false; 199 } 200 } 201 else 202 vectorization_factor = nunits; 203 204 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type)) 205 * vectorization_factor == UNITS_PER_SIMD_WORD); 206 } 207 } 208 209 /* TODO: Analyze cost. Decide if worth while to vectorize. */ 210 211 if (vectorization_factor <= 1) 212 { 213 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 214 fprintf (vect_dump, "not vectorized: unsupported data-type"); 215 return false; 216 } 217 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; 218 219 return true; 220} 221 222 223/* Function vect_analyze_operations. 224 225 Scan the loop stmts and make sure they are all vectorizable. */ 226 227static bool 228vect_analyze_operations (loop_vec_info loop_vinfo) 229{ 230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 231 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 232 int nbbs = loop->num_nodes; 233 block_stmt_iterator si; 234 unsigned int vectorization_factor = 0; 235 int i; 236 bool ok; 237 tree phi; 238 stmt_vec_info stmt_info; 239 bool need_to_vectorize = false; 240 241 if (vect_print_dump_info (REPORT_DETAILS)) 242 fprintf (vect_dump, "=== vect_analyze_operations ==="); 243 244 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); 245 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 246 247 for (i = 0; i < nbbs; i++) 248 { 249 basic_block bb = bbs[i]; 250 251 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 252 { 253 stmt_info = vinfo_for_stmt (phi); 254 if (vect_print_dump_info (REPORT_DETAILS)) 255 { 256 fprintf (vect_dump, "examining phi: "); 257 print_generic_expr (vect_dump, phi, TDF_SLIM); 258 } 259 260 gcc_assert (stmt_info); 261 262 if (STMT_VINFO_LIVE_P (stmt_info)) 263 { 264 /* FORNOW: not yet supported. */ 265 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 266 fprintf (vect_dump, "not vectorized: value used after loop."); 267 return false; 268 } 269 270 if (STMT_VINFO_RELEVANT_P (stmt_info)) 271 { 272 /* Most likely a reduction-like computation that is used 273 in the loop. */ 274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 275 fprintf (vect_dump, "not vectorized: unsupported pattern."); 276 return false; 277 } 278 } 279 280 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) 281 { 282 tree stmt = bsi_stmt (si); 283 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 284 285 if (vect_print_dump_info (REPORT_DETAILS)) 286 { 287 fprintf (vect_dump, "==> examining statement: "); 288 print_generic_expr (vect_dump, stmt, TDF_SLIM); 289 } 290 291 gcc_assert (stmt_info); 292 293 /* skip stmts which do not need to be vectorized. 294 this is expected to include: 295 - the COND_EXPR which is the loop exit condition 296 - any LABEL_EXPRs in the loop 297 - computations that are used only for array indexing or loop 298 control */ 299 300 if (!STMT_VINFO_RELEVANT_P (stmt_info) 301 && !STMT_VINFO_LIVE_P (stmt_info)) 302 { 303 if (vect_print_dump_info (REPORT_DETAILS)) 304 fprintf (vect_dump, "irrelevant."); 305 continue; 306 } 307 308 if (STMT_VINFO_RELEVANT_P (stmt_info)) 309 { 310 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))); 311 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); 312 313 ok = (vectorizable_operation (stmt, NULL, NULL) 314 || vectorizable_assignment (stmt, NULL, NULL) 315 || vectorizable_load (stmt, NULL, NULL) 316 || vectorizable_store (stmt, NULL, NULL) 317 || vectorizable_condition (stmt, NULL, NULL)); 318 319 if (!ok) 320 { 321 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 322 { 323 fprintf (vect_dump, 324 "not vectorized: relevant stmt not supported: "); 325 print_generic_expr (vect_dump, stmt, TDF_SLIM); 326 } 327 return false; 328 } 329 need_to_vectorize = true; 330 } 331 332 if (STMT_VINFO_LIVE_P (stmt_info)) 333 { 334 ok = vectorizable_reduction (stmt, NULL, NULL); 335 336 if (ok) 337 need_to_vectorize = true; 338 else 339 ok = vectorizable_live_operation (stmt, NULL, NULL); 340 341 if (!ok) 342 { 343 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 344 { 345 fprintf (vect_dump, 346 "not vectorized: live stmt not supported: "); 347 print_generic_expr (vect_dump, stmt, TDF_SLIM); 348 } 349 return false; 350 } 351 } 352 } /* stmts in bb */ 353 } /* bbs */ 354 355 /* TODO: Analyze cost. Decide if worth while to vectorize. */ 356 357 /* All operations in the loop are either irrelevant (deal with loop 358 control, or dead), or only used outside the loop and can be moved 359 out of the loop (e.g. invariants, inductions). The loop can be 360 optimized away by scalar optimizations. We're better off not 361 touching this loop. */ 362 if (!need_to_vectorize) 363 { 364 if (vect_print_dump_info (REPORT_DETAILS)) 365 fprintf (vect_dump, 366 "All the computation can be taken out of the loop."); 367 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 368 fprintf (vect_dump, 369 "not vectorized: redundant loop. no profit to vectorize."); 370 return false; 371 } 372 373 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 374 && vect_print_dump_info (REPORT_DETAILS)) 375 fprintf (vect_dump, 376 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, 377 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo)); 378 379 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 380 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor) 381 { 382 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 383 fprintf (vect_dump, "not vectorized: iteration count too small."); 384 return false; 385 } 386 387 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 388 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0 389 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) 390 { 391 if (vect_print_dump_info (REPORT_DETAILS)) 392 fprintf (vect_dump, "epilog loop required."); 393 if (!vect_can_advance_ivs_p (loop_vinfo)) 394 { 395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 396 fprintf (vect_dump, 397 "not vectorized: can't create epilog loop 1."); 398 return false; 399 } 400 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit)) 401 { 402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 403 fprintf (vect_dump, 404 "not vectorized: can't create epilog loop 2."); 405 return false; 406 } 407 } 408 409 return true; 410} 411 412 413/* Function exist_non_indexing_operands_for_use_p 414 415 USE is one of the uses attached to STMT. Check if USE is 416 used in STMT for anything other than indexing an array. */ 417 418static bool 419exist_non_indexing_operands_for_use_p (tree use, tree stmt) 420{ 421 tree operand; 422 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 423 424 /* USE corresponds to some operand in STMT. If there is no data 425 reference in STMT, then any operand that corresponds to USE 426 is not indexing an array. */ 427 if (!STMT_VINFO_DATA_REF (stmt_info)) 428 return true; 429 430 /* STMT has a data_ref. FORNOW this means that its of one of 431 the following forms: 432 -1- ARRAY_REF = var 433 -2- var = ARRAY_REF 434 (This should have been verified in analyze_data_refs). 435 436 'var' in the second case corresponds to a def, not a use, 437 so USE cannot correspond to any operands that are not used 438 for array indexing. 439 440 Therefore, all we need to check is if STMT falls into the 441 first case, and whether var corresponds to USE. */ 442 443 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME) 444 return false; 445 446 operand = TREE_OPERAND (stmt, 1); 447 448 if (TREE_CODE (operand) != SSA_NAME) 449 return false; 450 451 if (operand == use) 452 return true; 453 454 return false; 455} 456 457 458/* Function vect_analyze_scalar_cycles. 459 460 Examine the cross iteration def-use cycles of scalar variables, by 461 analyzing the loop (scalar) PHIs; Classify each cycle as one of the 462 following: invariant, induction, reduction, unknown. 463 464 Some forms of scalar cycles are not yet supported. 465 466 Example1: reduction: (unsupported yet) 467 468 loop1: 469 for (i=0; i<N; i++) 470 sum += a[i]; 471 472 Example2: induction: (unsupported yet) 473 474 loop2: 475 for (i=0; i<N; i++) 476 a[i] = i; 477 478 Note: the following loop *is* vectorizable: 479 480 loop3: 481 for (i=0; i<N; i++) 482 a[i] = b[i]; 483 484 even though it has a def-use cycle caused by the induction variable i: 485 486 loop: i_2 = PHI (i_0, i_1) 487 a[i_2] = ...; 488 i_1 = i_2 + 1; 489 GOTO loop; 490 491 because the def-use cycle in loop3 is considered "not relevant" - i.e., 492 it does not need to be vectorized because it is only used for array 493 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in 494 loop2 on the other hand is relevant (it is being written to memory). 495*/ 496 497static void 498vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) 499{ 500 tree phi; 501 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 502 basic_block bb = loop->header; 503 tree dummy; 504 505 if (vect_print_dump_info (REPORT_DETAILS)) 506 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ==="); 507 508 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 509 { 510 tree access_fn = NULL; 511 tree def = PHI_RESULT (phi); 512 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi); 513 tree reduc_stmt; 514 515 if (vect_print_dump_info (REPORT_DETAILS)) 516 { 517 fprintf (vect_dump, "Analyze phi: "); 518 print_generic_expr (vect_dump, phi, TDF_SLIM); 519 } 520 521 /* Skip virtual phi's. The data dependences that are associated with 522 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ 523 524 if (!is_gimple_reg (SSA_NAME_VAR (def))) 525 { 526 if (vect_print_dump_info (REPORT_DETAILS)) 527 fprintf (vect_dump, "virtual phi. skip."); 528 continue; 529 } 530 531 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type; 532 533 /* Analyze the evolution function. */ 534 535 access_fn = analyze_scalar_evolution (loop, def); 536 537 if (!access_fn) 538 continue; 539 540 if (vect_print_dump_info (REPORT_DETAILS)) 541 { 542 fprintf (vect_dump, "Access function of PHI: "); 543 print_generic_expr (vect_dump, access_fn, TDF_SLIM); 544 } 545 546 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy)) 547 { 548 if (vect_print_dump_info (REPORT_DETAILS)) 549 fprintf (vect_dump, "Detected induction."); 550 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def; 551 continue; 552 } 553 554 /* TODO: handle invariant phis */ 555 556 reduc_stmt = vect_is_simple_reduction (loop, phi); 557 if (reduc_stmt) 558 { 559 if (vect_print_dump_info (REPORT_DETAILS)) 560 fprintf (vect_dump, "Detected reduction."); 561 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; 562 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = 563 vect_reduction_def; 564 } 565 else 566 if (vect_print_dump_info (REPORT_DETAILS)) 567 fprintf (vect_dump, "Unknown def-use cycle pattern."); 568 569 } 570 571 return; 572} 573 574 575/* Function vect_analyze_data_ref_dependence. 576 577 Return TRUE if there (might) exist a dependence between a memory-reference 578 DRA and a memory-reference DRB. */ 579 580static bool 581vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, 582 loop_vec_info loop_vinfo) 583{ 584 unsigned int i; 585 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 586 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 587 struct data_reference *dra = DDR_A (ddr); 588 struct data_reference *drb = DDR_B (ddr); 589 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); 590 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); 591 lambda_vector dist_v; 592 unsigned int loop_depth; 593 594 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 595 return false; 596 597 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 598 { 599 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 600 { 601 fprintf (vect_dump, 602 "not vectorized: can't determine dependence between "); 603 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 604 fprintf (vect_dump, " and "); 605 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 606 } 607 return true; 608 } 609 610 if (DDR_NUM_DIST_VECTS (ddr) == 0) 611 { 612 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 613 { 614 fprintf (vect_dump, "not vectorized: bad dist vector for "); 615 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 616 fprintf (vect_dump, " and "); 617 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 618 } 619 return true; 620 } 621 622 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr)); 623 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++) 624 { 625 int dist = dist_v[loop_depth]; 626 627 if (vect_print_dump_info (REPORT_DR_DETAILS)) 628 fprintf (vect_dump, "dependence distance = %d.", dist); 629 630 /* Same loop iteration. */ 631 if (dist % vectorization_factor == 0) 632 { 633 /* Two references with distance zero have the same alignment. */ 634 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb); 635 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra); 636 if (vect_print_dump_info (REPORT_ALIGNMENT)) 637 fprintf (vect_dump, "accesses have the same alignment."); 638 if (vect_print_dump_info (REPORT_DR_DETAILS)) 639 { 640 fprintf (vect_dump, "dependence distance modulo vf == 0 between "); 641 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 642 fprintf (vect_dump, " and "); 643 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 644 } 645 continue; 646 } 647 648 if (abs (dist) >= vectorization_factor) 649 { 650 /* Dependence distance does not create dependence, as far as vectorization 651 is concerned, in this case. */ 652 if (vect_print_dump_info (REPORT_DR_DETAILS)) 653 fprintf (vect_dump, "dependence distance >= VF."); 654 continue; 655 } 656 657 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 658 { 659 fprintf (vect_dump, 660 "not vectorized: possible dependence between data-refs "); 661 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 662 fprintf (vect_dump, " and "); 663 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 664 } 665 666 return true; 667 } 668 669 return false; 670} 671 672 673/* Function vect_analyze_data_ref_dependences. 674 675 Examine all the data references in the loop, and make sure there do not 676 exist any data dependences between them. */ 677 678static bool 679vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo) 680{ 681 unsigned int i; 682 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo); 683 struct data_dependence_relation *ddr; 684 685 if (vect_print_dump_info (REPORT_DETAILS)) 686 fprintf (vect_dump, "=== vect_analyze_dependences ==="); 687 688 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++) 689 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo)) 690 return false; 691 692 return true; 693} 694 695 696/* Function vect_compute_data_ref_alignment 697 698 Compute the misalignment of the data reference DR. 699 700 Output: 701 1. If during the misalignment computation it is found that the data reference 702 cannot be vectorized then false is returned. 703 2. DR_MISALIGNMENT (DR) is defined. 704 705 FOR NOW: No analysis is actually performed. Misalignment is calculated 706 only for trivial cases. TODO. */ 707 708static bool 709vect_compute_data_ref_alignment (struct data_reference *dr) 710{ 711 tree stmt = DR_STMT (dr); 712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 713 tree ref = DR_REF (dr); 714 tree vectype; 715 tree base, base_addr; 716 bool base_aligned; 717 tree misalign; 718 tree aligned_to, alignment; 719 720 if (vect_print_dump_info (REPORT_DETAILS)) 721 fprintf (vect_dump, "vect_compute_data_ref_alignment:"); 722 723 /* Initialize misalignment to unknown. */ 724 DR_MISALIGNMENT (dr) = -1; 725 726 misalign = DR_OFFSET_MISALIGNMENT (dr); 727 aligned_to = DR_ALIGNED_TO (dr); 728 base_addr = DR_BASE_ADDRESS (dr); 729 base = build_fold_indirect_ref (base_addr); 730 vectype = STMT_VINFO_VECTYPE (stmt_info); 731 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT); 732 733 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0) 734 || !misalign) 735 { 736 if (vect_print_dump_info (REPORT_DETAILS)) 737 { 738 fprintf (vect_dump, "Unknown alignment for access: "); 739 print_generic_expr (vect_dump, base, TDF_SLIM); 740 } 741 return true; 742 } 743 744 if ((DECL_P (base) 745 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)), 746 alignment) >= 0) 747 || (TREE_CODE (base_addr) == SSA_NAME 748 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE ( 749 TREE_TYPE (base_addr)))), 750 alignment) >= 0)) 751 base_aligned = true; 752 else 753 base_aligned = false; 754 755 if (!base_aligned) 756 { 757 /* Do not change the alignment of global variables if 758 flag_section_anchors is enabled. */ 759 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)) 760 || (TREE_STATIC (base) && flag_section_anchors)) 761 { 762 if (vect_print_dump_info (REPORT_DETAILS)) 763 { 764 fprintf (vect_dump, "can't force alignment of ref: "); 765 print_generic_expr (vect_dump, ref, TDF_SLIM); 766 } 767 return true; 768 } 769 770 /* Force the alignment of the decl. 771 NOTE: This is the only change to the code we make during 772 the analysis phase, before deciding to vectorize the loop. */ 773 if (vect_print_dump_info (REPORT_DETAILS)) 774 fprintf (vect_dump, "force alignment"); 775 DECL_ALIGN (base) = TYPE_ALIGN (vectype); 776 DECL_USER_ALIGN (base) = 1; 777 } 778 779 /* At this point we assume that the base is aligned. */ 780 gcc_assert (base_aligned 781 || (TREE_CODE (base) == VAR_DECL 782 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype))); 783 784 /* Modulo alignment. */ 785 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment); 786 787 if (!host_integerp (misalign, 1)) 788 { 789 /* Negative or overflowed misalignment value. */ 790 if (vect_print_dump_info (REPORT_DETAILS)) 791 fprintf (vect_dump, "unexpected misalign value"); 792 return false; 793 } 794 795 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign); 796 797 if (vect_print_dump_info (REPORT_DETAILS)) 798 { 799 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr)); 800 print_generic_expr (vect_dump, ref, TDF_SLIM); 801 } 802 803 return true; 804} 805 806 807/* Function vect_compute_data_refs_alignment 808 809 Compute the misalignment of data references in the loop. 810 Return FALSE if a data reference is found that cannot be vectorized. */ 811 812static bool 813vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) 814{ 815 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 816 struct data_reference *dr; 817 unsigned int i; 818 819 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 820 if (!vect_compute_data_ref_alignment (dr)) 821 return false; 822 823 return true; 824} 825 826 827/* Function vect_update_misalignment_for_peel 828 829 DR - the data reference whose misalignment is to be adjusted. 830 DR_PEEL - the data reference whose misalignment is being made 831 zero in the vector loop by the peel. 832 NPEEL - the number of iterations in the peel loop if the misalignment 833 of DR_PEEL is known at compile time. */ 834 835static void 836vect_update_misalignment_for_peel (struct data_reference *dr, 837 struct data_reference *dr_peel, int npeel) 838{ 839 unsigned int i; 840 int drsize; 841 VEC(dr_p,heap) *same_align_drs; 842 struct data_reference *current_dr; 843 844 if (known_alignment_for_access_p (dr) 845 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel)) 846 { 847 DR_MISALIGNMENT (dr) = 0; 848 return; 849 } 850 851 /* It can be assumed that the data refs with the same alignment as dr_peel 852 are aligned in the vector loop. */ 853 same_align_drs 854 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel))); 855 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++) 856 { 857 if (current_dr != dr) 858 continue; 859 gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel)); 860 DR_MISALIGNMENT (dr) = 0; 861 return; 862 } 863 864 if (known_alignment_for_access_p (dr) 865 && known_alignment_for_access_p (dr_peel)) 866 { 867 drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); 868 DR_MISALIGNMENT (dr) += npeel * drsize; 869 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD; 870 return; 871 } 872 873 DR_MISALIGNMENT (dr) = -1; 874} 875 876 877/* Function vect_verify_datarefs_alignment 878 879 Return TRUE if all data references in the loop can be 880 handled with respect to alignment. */ 881 882static bool 883vect_verify_datarefs_alignment (loop_vec_info loop_vinfo) 884{ 885 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 886 struct data_reference *dr; 887 enum dr_alignment_support supportable_dr_alignment; 888 unsigned int i; 889 890 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 891 { 892 supportable_dr_alignment = vect_supportable_dr_alignment (dr); 893 if (!supportable_dr_alignment) 894 { 895 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 896 { 897 if (DR_IS_READ (dr)) 898 fprintf (vect_dump, 899 "not vectorized: unsupported unaligned load."); 900 else 901 fprintf (vect_dump, 902 "not vectorized: unsupported unaligned store."); 903 } 904 return false; 905 } 906 if (supportable_dr_alignment != dr_aligned 907 && vect_print_dump_info (REPORT_ALIGNMENT)) 908 fprintf (vect_dump, "Vectorizing an unaligned access."); 909 } 910 return true; 911} 912 913
| 29#include "basic-block.h" 30#include "diagnostic.h" 31#include "tree-flow.h" 32#include "tree-dump.h" 33#include "timevar.h" 34#include "cfgloop.h" 35#include "expr.h" 36#include "optabs.h" 37#include "params.h" 38#include "tree-chrec.h" 39#include "tree-data-ref.h" 40#include "tree-scalar-evolution.h" 41#include "tree-vectorizer.h" 42 43/* Main analysis functions. */ 44static loop_vec_info vect_analyze_loop_form (struct loop *); 45static bool vect_analyze_data_refs (loop_vec_info); 46static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); 47static void vect_analyze_scalar_cycles (loop_vec_info); 48static bool vect_analyze_data_ref_accesses (loop_vec_info); 49static bool vect_analyze_data_ref_dependences (loop_vec_info); 50static bool vect_analyze_data_refs_alignment (loop_vec_info); 51static bool vect_compute_data_refs_alignment (loop_vec_info); 52static bool vect_enhance_data_refs_alignment (loop_vec_info); 53static bool vect_analyze_operations (loop_vec_info); 54static bool vect_determine_vectorization_factor (loop_vec_info); 55 56/* Utility functions for the analyses. */ 57static bool exist_non_indexing_operands_for_use_p (tree, tree); 58static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool); 59static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *); 60static tree vect_get_loop_niters (struct loop *, tree *); 61static bool vect_analyze_data_ref_dependence 62 (struct data_dependence_relation *, loop_vec_info); 63static bool vect_compute_data_ref_alignment (struct data_reference *); 64static bool vect_analyze_data_ref_access (struct data_reference *); 65static bool vect_can_advance_ivs_p (loop_vec_info); 66static void vect_update_misalignment_for_peel 67 (struct data_reference *, struct data_reference *, int npeel); 68 69 70/* Function vect_determine_vectorization_factor 71 72 Determine the vectorization factor (VF). VF is the number of data elements 73 that are operated upon in parallel in a single iteration of the vectorized 74 loop. For example, when vectorizing a loop that operates on 4byte elements, 75 on a target with vector size (VS) 16byte, the VF is set to 4, since 4 76 elements can fit in a single vector register. 77 78 We currently support vectorization of loops in which all types operated upon 79 are of the same size. Therefore this function currently sets VF according to 80 the size of the types operated upon, and fails if there are multiple sizes 81 in the loop. 82 83 VF is also the factor by which the loop iterations are strip-mined, e.g.: 84 original loop: 85 for (i=0; i<N; i++){ 86 a[i] = b[i] + c[i]; 87 } 88 89 vectorized loop: 90 for (i=0; i<N; i+=VF){ 91 a[i:VF] = b[i:VF] + c[i:VF]; 92 } 93*/ 94 95static bool 96vect_determine_vectorization_factor (loop_vec_info loop_vinfo) 97{ 98 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 99 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 100 int nbbs = loop->num_nodes; 101 block_stmt_iterator si; 102 unsigned int vectorization_factor = 0; 103 int i; 104 tree scalar_type; 105 106 if (vect_print_dump_info (REPORT_DETAILS)) 107 fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); 108 109 for (i = 0; i < nbbs; i++) 110 { 111 basic_block bb = bbs[i]; 112 113 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) 114 { 115 tree stmt = bsi_stmt (si); 116 unsigned int nunits; 117 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 118 tree vectype; 119 120 if (vect_print_dump_info (REPORT_DETAILS)) 121 { 122 fprintf (vect_dump, "==> examining statement: "); 123 print_generic_expr (vect_dump, stmt, TDF_SLIM); 124 } 125 126 gcc_assert (stmt_info); 127 /* skip stmts which do not need to be vectorized. */ 128 if (!STMT_VINFO_RELEVANT_P (stmt_info) 129 && !STMT_VINFO_LIVE_P (stmt_info)) 130 { 131 if (vect_print_dump_info (REPORT_DETAILS)) 132 fprintf (vect_dump, "skip."); 133 continue; 134 } 135 136 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) 137 { 138 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 139 { 140 fprintf (vect_dump, "not vectorized: vector stmt in loop:"); 141 print_generic_expr (vect_dump, stmt, TDF_SLIM); 142 } 143 return false; 144 } 145 146 if (STMT_VINFO_VECTYPE (stmt_info)) 147 { 148 vectype = STMT_VINFO_VECTYPE (stmt_info); 149 scalar_type = TREE_TYPE (vectype); 150 } 151 else 152 { 153 if (STMT_VINFO_DATA_REF (stmt_info)) 154 scalar_type = 155 TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); 156 else if (TREE_CODE (stmt) == MODIFY_EXPR) 157 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); 158 else 159 scalar_type = TREE_TYPE (stmt); 160 161 if (vect_print_dump_info (REPORT_DETAILS)) 162 { 163 fprintf (vect_dump, "get vectype for scalar type: "); 164 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 165 } 166 167 vectype = get_vectype_for_scalar_type (scalar_type); 168 if (!vectype) 169 { 170 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 171 { 172 fprintf (vect_dump, 173 "not vectorized: unsupported data-type "); 174 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 175 } 176 return false; 177 } 178 STMT_VINFO_VECTYPE (stmt_info) = vectype; 179 } 180 181 if (vect_print_dump_info (REPORT_DETAILS)) 182 { 183 fprintf (vect_dump, "vectype: "); 184 print_generic_expr (vect_dump, vectype, TDF_SLIM); 185 } 186 187 nunits = TYPE_VECTOR_SUBPARTS (vectype); 188 if (vect_print_dump_info (REPORT_DETAILS)) 189 fprintf (vect_dump, "nunits = %d", nunits); 190 191 if (vectorization_factor) 192 { 193 /* FORNOW: don't allow mixed units. 194 This restriction will be relaxed in the future. */ 195 if (nunits != vectorization_factor) 196 { 197 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 198 fprintf (vect_dump, "not vectorized: mixed data-types"); 199 return false; 200 } 201 } 202 else 203 vectorization_factor = nunits; 204 205 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type)) 206 * vectorization_factor == UNITS_PER_SIMD_WORD); 207 } 208 } 209 210 /* TODO: Analyze cost. Decide if worth while to vectorize. */ 211 212 if (vectorization_factor <= 1) 213 { 214 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 215 fprintf (vect_dump, "not vectorized: unsupported data-type"); 216 return false; 217 } 218 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; 219 220 return true; 221} 222 223 224/* Function vect_analyze_operations. 225 226 Scan the loop stmts and make sure they are all vectorizable. */ 227 228static bool 229vect_analyze_operations (loop_vec_info loop_vinfo) 230{ 231 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 232 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 233 int nbbs = loop->num_nodes; 234 block_stmt_iterator si; 235 unsigned int vectorization_factor = 0; 236 int i; 237 bool ok; 238 tree phi; 239 stmt_vec_info stmt_info; 240 bool need_to_vectorize = false; 241 242 if (vect_print_dump_info (REPORT_DETAILS)) 243 fprintf (vect_dump, "=== vect_analyze_operations ==="); 244 245 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); 246 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 247 248 for (i = 0; i < nbbs; i++) 249 { 250 basic_block bb = bbs[i]; 251 252 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 253 { 254 stmt_info = vinfo_for_stmt (phi); 255 if (vect_print_dump_info (REPORT_DETAILS)) 256 { 257 fprintf (vect_dump, "examining phi: "); 258 print_generic_expr (vect_dump, phi, TDF_SLIM); 259 } 260 261 gcc_assert (stmt_info); 262 263 if (STMT_VINFO_LIVE_P (stmt_info)) 264 { 265 /* FORNOW: not yet supported. */ 266 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 267 fprintf (vect_dump, "not vectorized: value used after loop."); 268 return false; 269 } 270 271 if (STMT_VINFO_RELEVANT_P (stmt_info)) 272 { 273 /* Most likely a reduction-like computation that is used 274 in the loop. */ 275 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 276 fprintf (vect_dump, "not vectorized: unsupported pattern."); 277 return false; 278 } 279 } 280 281 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) 282 { 283 tree stmt = bsi_stmt (si); 284 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 285 286 if (vect_print_dump_info (REPORT_DETAILS)) 287 { 288 fprintf (vect_dump, "==> examining statement: "); 289 print_generic_expr (vect_dump, stmt, TDF_SLIM); 290 } 291 292 gcc_assert (stmt_info); 293 294 /* skip stmts which do not need to be vectorized. 295 this is expected to include: 296 - the COND_EXPR which is the loop exit condition 297 - any LABEL_EXPRs in the loop 298 - computations that are used only for array indexing or loop 299 control */ 300 301 if (!STMT_VINFO_RELEVANT_P (stmt_info) 302 && !STMT_VINFO_LIVE_P (stmt_info)) 303 { 304 if (vect_print_dump_info (REPORT_DETAILS)) 305 fprintf (vect_dump, "irrelevant."); 306 continue; 307 } 308 309 if (STMT_VINFO_RELEVANT_P (stmt_info)) 310 { 311 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))); 312 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); 313 314 ok = (vectorizable_operation (stmt, NULL, NULL) 315 || vectorizable_assignment (stmt, NULL, NULL) 316 || vectorizable_load (stmt, NULL, NULL) 317 || vectorizable_store (stmt, NULL, NULL) 318 || vectorizable_condition (stmt, NULL, NULL)); 319 320 if (!ok) 321 { 322 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 323 { 324 fprintf (vect_dump, 325 "not vectorized: relevant stmt not supported: "); 326 print_generic_expr (vect_dump, stmt, TDF_SLIM); 327 } 328 return false; 329 } 330 need_to_vectorize = true; 331 } 332 333 if (STMT_VINFO_LIVE_P (stmt_info)) 334 { 335 ok = vectorizable_reduction (stmt, NULL, NULL); 336 337 if (ok) 338 need_to_vectorize = true; 339 else 340 ok = vectorizable_live_operation (stmt, NULL, NULL); 341 342 if (!ok) 343 { 344 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 345 { 346 fprintf (vect_dump, 347 "not vectorized: live stmt not supported: "); 348 print_generic_expr (vect_dump, stmt, TDF_SLIM); 349 } 350 return false; 351 } 352 } 353 } /* stmts in bb */ 354 } /* bbs */ 355 356 /* TODO: Analyze cost. Decide if worth while to vectorize. */ 357 358 /* All operations in the loop are either irrelevant (deal with loop 359 control, or dead), or only used outside the loop and can be moved 360 out of the loop (e.g. invariants, inductions). The loop can be 361 optimized away by scalar optimizations. We're better off not 362 touching this loop. */ 363 if (!need_to_vectorize) 364 { 365 if (vect_print_dump_info (REPORT_DETAILS)) 366 fprintf (vect_dump, 367 "All the computation can be taken out of the loop."); 368 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 369 fprintf (vect_dump, 370 "not vectorized: redundant loop. no profit to vectorize."); 371 return false; 372 } 373 374 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 375 && vect_print_dump_info (REPORT_DETAILS)) 376 fprintf (vect_dump, 377 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, 378 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo)); 379 380 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 381 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor) 382 { 383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 384 fprintf (vect_dump, "not vectorized: iteration count too small."); 385 return false; 386 } 387 388 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 389 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0 390 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) 391 { 392 if (vect_print_dump_info (REPORT_DETAILS)) 393 fprintf (vect_dump, "epilog loop required."); 394 if (!vect_can_advance_ivs_p (loop_vinfo)) 395 { 396 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 397 fprintf (vect_dump, 398 "not vectorized: can't create epilog loop 1."); 399 return false; 400 } 401 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit)) 402 { 403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 404 fprintf (vect_dump, 405 "not vectorized: can't create epilog loop 2."); 406 return false; 407 } 408 } 409 410 return true; 411} 412 413 414/* Function exist_non_indexing_operands_for_use_p 415 416 USE is one of the uses attached to STMT. Check if USE is 417 used in STMT for anything other than indexing an array. */ 418 419static bool 420exist_non_indexing_operands_for_use_p (tree use, tree stmt) 421{ 422 tree operand; 423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 424 425 /* USE corresponds to some operand in STMT. If there is no data 426 reference in STMT, then any operand that corresponds to USE 427 is not indexing an array. */ 428 if (!STMT_VINFO_DATA_REF (stmt_info)) 429 return true; 430 431 /* STMT has a data_ref. FORNOW this means that its of one of 432 the following forms: 433 -1- ARRAY_REF = var 434 -2- var = ARRAY_REF 435 (This should have been verified in analyze_data_refs). 436 437 'var' in the second case corresponds to a def, not a use, 438 so USE cannot correspond to any operands that are not used 439 for array indexing. 440 441 Therefore, all we need to check is if STMT falls into the 442 first case, and whether var corresponds to USE. */ 443 444 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME) 445 return false; 446 447 operand = TREE_OPERAND (stmt, 1); 448 449 if (TREE_CODE (operand) != SSA_NAME) 450 return false; 451 452 if (operand == use) 453 return true; 454 455 return false; 456} 457 458 459/* Function vect_analyze_scalar_cycles. 460 461 Examine the cross iteration def-use cycles of scalar variables, by 462 analyzing the loop (scalar) PHIs; Classify each cycle as one of the 463 following: invariant, induction, reduction, unknown. 464 465 Some forms of scalar cycles are not yet supported. 466 467 Example1: reduction: (unsupported yet) 468 469 loop1: 470 for (i=0; i<N; i++) 471 sum += a[i]; 472 473 Example2: induction: (unsupported yet) 474 475 loop2: 476 for (i=0; i<N; i++) 477 a[i] = i; 478 479 Note: the following loop *is* vectorizable: 480 481 loop3: 482 for (i=0; i<N; i++) 483 a[i] = b[i]; 484 485 even though it has a def-use cycle caused by the induction variable i: 486 487 loop: i_2 = PHI (i_0, i_1) 488 a[i_2] = ...; 489 i_1 = i_2 + 1; 490 GOTO loop; 491 492 because the def-use cycle in loop3 is considered "not relevant" - i.e., 493 it does not need to be vectorized because it is only used for array 494 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in 495 loop2 on the other hand is relevant (it is being written to memory). 496*/ 497 498static void 499vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) 500{ 501 tree phi; 502 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 503 basic_block bb = loop->header; 504 tree dummy; 505 506 if (vect_print_dump_info (REPORT_DETAILS)) 507 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ==="); 508 509 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 510 { 511 tree access_fn = NULL; 512 tree def = PHI_RESULT (phi); 513 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi); 514 tree reduc_stmt; 515 516 if (vect_print_dump_info (REPORT_DETAILS)) 517 { 518 fprintf (vect_dump, "Analyze phi: "); 519 print_generic_expr (vect_dump, phi, TDF_SLIM); 520 } 521 522 /* Skip virtual phi's. The data dependences that are associated with 523 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ 524 525 if (!is_gimple_reg (SSA_NAME_VAR (def))) 526 { 527 if (vect_print_dump_info (REPORT_DETAILS)) 528 fprintf (vect_dump, "virtual phi. skip."); 529 continue; 530 } 531 532 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type; 533 534 /* Analyze the evolution function. */ 535 536 access_fn = analyze_scalar_evolution (loop, def); 537 538 if (!access_fn) 539 continue; 540 541 if (vect_print_dump_info (REPORT_DETAILS)) 542 { 543 fprintf (vect_dump, "Access function of PHI: "); 544 print_generic_expr (vect_dump, access_fn, TDF_SLIM); 545 } 546 547 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy)) 548 { 549 if (vect_print_dump_info (REPORT_DETAILS)) 550 fprintf (vect_dump, "Detected induction."); 551 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def; 552 continue; 553 } 554 555 /* TODO: handle invariant phis */ 556 557 reduc_stmt = vect_is_simple_reduction (loop, phi); 558 if (reduc_stmt) 559 { 560 if (vect_print_dump_info (REPORT_DETAILS)) 561 fprintf (vect_dump, "Detected reduction."); 562 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; 563 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = 564 vect_reduction_def; 565 } 566 else 567 if (vect_print_dump_info (REPORT_DETAILS)) 568 fprintf (vect_dump, "Unknown def-use cycle pattern."); 569 570 } 571 572 return; 573} 574 575 576/* Function vect_analyze_data_ref_dependence. 577 578 Return TRUE if there (might) exist a dependence between a memory-reference 579 DRA and a memory-reference DRB. */ 580 581static bool 582vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, 583 loop_vec_info loop_vinfo) 584{ 585 unsigned int i; 586 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 587 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 588 struct data_reference *dra = DDR_A (ddr); 589 struct data_reference *drb = DDR_B (ddr); 590 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); 591 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); 592 lambda_vector dist_v; 593 unsigned int loop_depth; 594 595 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 596 return false; 597 598 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 599 { 600 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 601 { 602 fprintf (vect_dump, 603 "not vectorized: can't determine dependence between "); 604 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 605 fprintf (vect_dump, " and "); 606 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 607 } 608 return true; 609 } 610 611 if (DDR_NUM_DIST_VECTS (ddr) == 0) 612 { 613 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 614 { 615 fprintf (vect_dump, "not vectorized: bad dist vector for "); 616 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 617 fprintf (vect_dump, " and "); 618 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 619 } 620 return true; 621 } 622 623 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr)); 624 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++) 625 { 626 int dist = dist_v[loop_depth]; 627 628 if (vect_print_dump_info (REPORT_DR_DETAILS)) 629 fprintf (vect_dump, "dependence distance = %d.", dist); 630 631 /* Same loop iteration. */ 632 if (dist % vectorization_factor == 0) 633 { 634 /* Two references with distance zero have the same alignment. */ 635 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb); 636 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra); 637 if (vect_print_dump_info (REPORT_ALIGNMENT)) 638 fprintf (vect_dump, "accesses have the same alignment."); 639 if (vect_print_dump_info (REPORT_DR_DETAILS)) 640 { 641 fprintf (vect_dump, "dependence distance modulo vf == 0 between "); 642 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 643 fprintf (vect_dump, " and "); 644 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 645 } 646 continue; 647 } 648 649 if (abs (dist) >= vectorization_factor) 650 { 651 /* Dependence distance does not create dependence, as far as vectorization 652 is concerned, in this case. */ 653 if (vect_print_dump_info (REPORT_DR_DETAILS)) 654 fprintf (vect_dump, "dependence distance >= VF."); 655 continue; 656 } 657 658 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 659 { 660 fprintf (vect_dump, 661 "not vectorized: possible dependence between data-refs "); 662 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 663 fprintf (vect_dump, " and "); 664 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 665 } 666 667 return true; 668 } 669 670 return false; 671} 672 673 674/* Function vect_analyze_data_ref_dependences. 675 676 Examine all the data references in the loop, and make sure there do not 677 exist any data dependences between them. */ 678 679static bool 680vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo) 681{ 682 unsigned int i; 683 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo); 684 struct data_dependence_relation *ddr; 685 686 if (vect_print_dump_info (REPORT_DETAILS)) 687 fprintf (vect_dump, "=== vect_analyze_dependences ==="); 688 689 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++) 690 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo)) 691 return false; 692 693 return true; 694} 695 696 697/* Function vect_compute_data_ref_alignment 698 699 Compute the misalignment of the data reference DR. 700 701 Output: 702 1. If during the misalignment computation it is found that the data reference 703 cannot be vectorized then false is returned. 704 2. DR_MISALIGNMENT (DR) is defined. 705 706 FOR NOW: No analysis is actually performed. Misalignment is calculated 707 only for trivial cases. TODO. */ 708 709static bool 710vect_compute_data_ref_alignment (struct data_reference *dr) 711{ 712 tree stmt = DR_STMT (dr); 713 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 714 tree ref = DR_REF (dr); 715 tree vectype; 716 tree base, base_addr; 717 bool base_aligned; 718 tree misalign; 719 tree aligned_to, alignment; 720 721 if (vect_print_dump_info (REPORT_DETAILS)) 722 fprintf (vect_dump, "vect_compute_data_ref_alignment:"); 723 724 /* Initialize misalignment to unknown. */ 725 DR_MISALIGNMENT (dr) = -1; 726 727 misalign = DR_OFFSET_MISALIGNMENT (dr); 728 aligned_to = DR_ALIGNED_TO (dr); 729 base_addr = DR_BASE_ADDRESS (dr); 730 base = build_fold_indirect_ref (base_addr); 731 vectype = STMT_VINFO_VECTYPE (stmt_info); 732 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT); 733 734 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0) 735 || !misalign) 736 { 737 if (vect_print_dump_info (REPORT_DETAILS)) 738 { 739 fprintf (vect_dump, "Unknown alignment for access: "); 740 print_generic_expr (vect_dump, base, TDF_SLIM); 741 } 742 return true; 743 } 744 745 if ((DECL_P (base) 746 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)), 747 alignment) >= 0) 748 || (TREE_CODE (base_addr) == SSA_NAME 749 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE ( 750 TREE_TYPE (base_addr)))), 751 alignment) >= 0)) 752 base_aligned = true; 753 else 754 base_aligned = false; 755 756 if (!base_aligned) 757 { 758 /* Do not change the alignment of global variables if 759 flag_section_anchors is enabled. */ 760 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)) 761 || (TREE_STATIC (base) && flag_section_anchors)) 762 { 763 if (vect_print_dump_info (REPORT_DETAILS)) 764 { 765 fprintf (vect_dump, "can't force alignment of ref: "); 766 print_generic_expr (vect_dump, ref, TDF_SLIM); 767 } 768 return true; 769 } 770 771 /* Force the alignment of the decl. 772 NOTE: This is the only change to the code we make during 773 the analysis phase, before deciding to vectorize the loop. */ 774 if (vect_print_dump_info (REPORT_DETAILS)) 775 fprintf (vect_dump, "force alignment"); 776 DECL_ALIGN (base) = TYPE_ALIGN (vectype); 777 DECL_USER_ALIGN (base) = 1; 778 } 779 780 /* At this point we assume that the base is aligned. */ 781 gcc_assert (base_aligned 782 || (TREE_CODE (base) == VAR_DECL 783 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype))); 784 785 /* Modulo alignment. */ 786 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment); 787 788 if (!host_integerp (misalign, 1)) 789 { 790 /* Negative or overflowed misalignment value. */ 791 if (vect_print_dump_info (REPORT_DETAILS)) 792 fprintf (vect_dump, "unexpected misalign value"); 793 return false; 794 } 795 796 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign); 797 798 if (vect_print_dump_info (REPORT_DETAILS)) 799 { 800 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr)); 801 print_generic_expr (vect_dump, ref, TDF_SLIM); 802 } 803 804 return true; 805} 806 807 808/* Function vect_compute_data_refs_alignment 809 810 Compute the misalignment of data references in the loop. 811 Return FALSE if a data reference is found that cannot be vectorized. */ 812 813static bool 814vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) 815{ 816 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 817 struct data_reference *dr; 818 unsigned int i; 819 820 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 821 if (!vect_compute_data_ref_alignment (dr)) 822 return false; 823 824 return true; 825} 826 827 828/* Function vect_update_misalignment_for_peel 829 830 DR - the data reference whose misalignment is to be adjusted. 831 DR_PEEL - the data reference whose misalignment is being made 832 zero in the vector loop by the peel. 833 NPEEL - the number of iterations in the peel loop if the misalignment 834 of DR_PEEL is known at compile time. */ 835 836static void 837vect_update_misalignment_for_peel (struct data_reference *dr, 838 struct data_reference *dr_peel, int npeel) 839{ 840 unsigned int i; 841 int drsize; 842 VEC(dr_p,heap) *same_align_drs; 843 struct data_reference *current_dr; 844 845 if (known_alignment_for_access_p (dr) 846 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel)) 847 { 848 DR_MISALIGNMENT (dr) = 0; 849 return; 850 } 851 852 /* It can be assumed that the data refs with the same alignment as dr_peel 853 are aligned in the vector loop. */ 854 same_align_drs 855 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel))); 856 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++) 857 { 858 if (current_dr != dr) 859 continue; 860 gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel)); 861 DR_MISALIGNMENT (dr) = 0; 862 return; 863 } 864 865 if (known_alignment_for_access_p (dr) 866 && known_alignment_for_access_p (dr_peel)) 867 { 868 drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); 869 DR_MISALIGNMENT (dr) += npeel * drsize; 870 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD; 871 return; 872 } 873 874 DR_MISALIGNMENT (dr) = -1; 875} 876 877 878/* Function vect_verify_datarefs_alignment 879 880 Return TRUE if all data references in the loop can be 881 handled with respect to alignment. */ 882 883static bool 884vect_verify_datarefs_alignment (loop_vec_info loop_vinfo) 885{ 886 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 887 struct data_reference *dr; 888 enum dr_alignment_support supportable_dr_alignment; 889 unsigned int i; 890 891 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 892 { 893 supportable_dr_alignment = vect_supportable_dr_alignment (dr); 894 if (!supportable_dr_alignment) 895 { 896 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 897 { 898 if (DR_IS_READ (dr)) 899 fprintf (vect_dump, 900 "not vectorized: unsupported unaligned load."); 901 else 902 fprintf (vect_dump, 903 "not vectorized: unsupported unaligned store."); 904 } 905 return false; 906 } 907 if (supportable_dr_alignment != dr_aligned 908 && vect_print_dump_info (REPORT_ALIGNMENT)) 909 fprintf (vect_dump, "Vectorizing an unaligned access."); 910 } 911 return true; 912} 913 914
|
| 915/* Function vector_alignment_reachable_p 916 917 Return true if vector alignment for DR is reachable by peeling 918 a few loop iterations. Return false otherwise. */ 919 920static bool 921vector_alignment_reachable_p (struct data_reference *dr) 922{ 923 tree stmt = DR_STMT (dr); 924 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 925 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 926 927 /* If misalignment is known at the compile time then allow peeling 928 only if natural alignment is reachable through peeling. */ 929 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr)) 930 { 931 HOST_WIDE_INT elmsize = 932 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 933 if (vect_print_dump_info (REPORT_DETAILS)) 934 { 935 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize); 936 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr)); 937 } 938 if (DR_MISALIGNMENT (dr) % elmsize) 939 { 940 if (vect_print_dump_info (REPORT_DETAILS)) 941 fprintf (vect_dump, "data size does not divide the misalignment.\n"); 942 return false; 943 } 944 } 945 946 if (!known_alignment_for_access_p (dr)) 947 { 948 tree type = (TREE_TYPE (DR_REF (dr))); 949 tree ba = DR_BASE_OBJECT (dr); 950 bool is_packed = false; 951 952 if (ba) 953 is_packed = contains_packed_reference (ba); 954 955 if (vect_print_dump_info (REPORT_DETAILS)) 956 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed); 957 if (targetm.vectorize.vector_alignment_reachable (type, is_packed)) 958 return true; 959 else 960 return false; 961 } 962 963 return true; 964} 965
|
914/* Function vect_enhance_data_refs_alignment 915 916 This pass will use loop versioning and loop peeling in order to enhance 917 the alignment of data references in the loop. 918 919 FOR NOW: we assume that whatever versioning/peeling takes place, only the 920 original loop is to be vectorized; Any other loops that are created by 921 the transformations performed in this pass - are not supposed to be 922 vectorized. This restriction will be relaxed. 923 924 This pass will require a cost model to guide it whether to apply peeling 925 or versioning or a combination of the two. For example, the scheme that 926 intel uses when given a loop with several memory accesses, is as follows: 927 choose one memory access ('p') which alignment you want to force by doing 928 peeling. Then, either (1) generate a loop in which 'p' is aligned and all 929 other accesses are not necessarily aligned, or (2) use loop versioning to 930 generate one loop in which all accesses are aligned, and another loop in 931 which only 'p' is necessarily aligned. 932 933 ("Automatic Intra-Register Vectorization for the Intel Architecture", 934 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International 935 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) 936 937 Devising a cost model is the most critical aspect of this work. It will 938 guide us on which access to peel for, whether to use loop versioning, how 939 many versions to create, etc. The cost model will probably consist of 940 generic considerations as well as target specific considerations (on 941 powerpc for example, misaligned stores are more painful than misaligned 942 loads). 943 944 Here are the general steps involved in alignment enhancements: 945 946 -- original loop, before alignment analysis: 947 for (i=0; i<N; i++){ 948 x = q[i]; # DR_MISALIGNMENT(q) = unknown 949 p[i] = y; # DR_MISALIGNMENT(p) = unknown 950 } 951 952 -- After vect_compute_data_refs_alignment: 953 for (i=0; i<N; i++){ 954 x = q[i]; # DR_MISALIGNMENT(q) = 3 955 p[i] = y; # DR_MISALIGNMENT(p) = unknown 956 } 957 958 -- Possibility 1: we do loop versioning: 959 if (p is aligned) { 960 for (i=0; i<N; i++){ # loop 1A 961 x = q[i]; # DR_MISALIGNMENT(q) = 3 962 p[i] = y; # DR_MISALIGNMENT(p) = 0 963 } 964 } 965 else { 966 for (i=0; i<N; i++){ # loop 1B 967 x = q[i]; # DR_MISALIGNMENT(q) = 3 968 p[i] = y; # DR_MISALIGNMENT(p) = unaligned 969 } 970 } 971 972 -- Possibility 2: we do loop peeling: 973 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). 974 x = q[i]; 975 p[i] = y; 976 } 977 for (i = 3; i < N; i++){ # loop 2A 978 x = q[i]; # DR_MISALIGNMENT(q) = 0 979 p[i] = y; # DR_MISALIGNMENT(p) = unknown 980 } 981 982 -- Possibility 3: combination of loop peeling and versioning: 983 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). 984 x = q[i]; 985 p[i] = y; 986 } 987 if (p is aligned) { 988 for (i = 3; i<N; i++){ # loop 3A 989 x = q[i]; # DR_MISALIGNMENT(q) = 0 990 p[i] = y; # DR_MISALIGNMENT(p) = 0 991 } 992 } 993 else { 994 for (i = 3; i<N; i++){ # loop 3B 995 x = q[i]; # DR_MISALIGNMENT(q) = 0 996 p[i] = y; # DR_MISALIGNMENT(p) = unaligned 997 } 998 } 999 1000 These loops are later passed to loop_transform to be vectorized. The 1001 vectorizer will use the alignment information to guide the transformation 1002 (whether to generate regular loads/stores, or with special handling for 1003 misalignment). */ 1004 1005static bool 1006vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) 1007{ 1008 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1009 enum dr_alignment_support supportable_dr_alignment; 1010 struct data_reference *dr0 = NULL; 1011 struct data_reference *dr; 1012 unsigned int i; 1013 bool do_peeling = false; 1014 bool do_versioning = false; 1015 bool stat; 1016 1017 /* While cost model enhancements are expected in the future, the high level 1018 view of the code at this time is as follows: 1019 1020 A) If there is a misaligned write then see if peeling to align this write 1021 can make all data references satisfy vect_supportable_dr_alignment. 1022 If so, update data structures as needed and return true. Note that 1023 at this time vect_supportable_dr_alignment is known to return false 1024 for a a misaligned write. 1025 1026 B) If peeling wasn't possible and there is a data reference with an 1027 unknown misalignment that does not satisfy vect_supportable_dr_alignment 1028 then see if loop versioning checks can be used to make all data 1029 references satisfy vect_supportable_dr_alignment. If so, update 1030 data structures as needed and return true. 1031 1032 C) If neither peeling nor versioning were successful then return false if 1033 any data reference does not satisfy vect_supportable_dr_alignment. 1034 1035 D) Return true (all data references satisfy vect_supportable_dr_alignment). 1036 1037 Note, Possibility 3 above (which is peeling and versioning together) is not 1038 being done at this time. */ 1039 1040 /* (1) Peeling to force alignment. */ 1041 1042 /* (1.1) Decide whether to perform peeling, and how many iterations to peel: 1043 Considerations: 1044 + How many accesses will become aligned due to the peeling 1045 - How many accesses will become unaligned due to the peeling, 1046 and the cost of misaligned accesses. 1047 - The cost of peeling (the extra runtime checks, the increase 1048 in code size). 1049 1050 The scheme we use FORNOW: peel to force the alignment of the first 1051 misaligned store in the loop. 1052 Rationale: misaligned stores are not yet supported. 1053 1054 TODO: Use a cost model. */ 1055 1056 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1057 if (!DR_IS_READ (dr) && !aligned_access_p (dr)) 1058 {
| 966/* Function vect_enhance_data_refs_alignment 967 968 This pass will use loop versioning and loop peeling in order to enhance 969 the alignment of data references in the loop. 970 971 FOR NOW: we assume that whatever versioning/peeling takes place, only the 972 original loop is to be vectorized; Any other loops that are created by 973 the transformations performed in this pass - are not supposed to be 974 vectorized. This restriction will be relaxed. 975 976 This pass will require a cost model to guide it whether to apply peeling 977 or versioning or a combination of the two. For example, the scheme that 978 intel uses when given a loop with several memory accesses, is as follows: 979 choose one memory access ('p') which alignment you want to force by doing 980 peeling. Then, either (1) generate a loop in which 'p' is aligned and all 981 other accesses are not necessarily aligned, or (2) use loop versioning to 982 generate one loop in which all accesses are aligned, and another loop in 983 which only 'p' is necessarily aligned. 984 985 ("Automatic Intra-Register Vectorization for the Intel Architecture", 986 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International 987 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) 988 989 Devising a cost model is the most critical aspect of this work. It will 990 guide us on which access to peel for, whether to use loop versioning, how 991 many versions to create, etc. The cost model will probably consist of 992 generic considerations as well as target specific considerations (on 993 powerpc for example, misaligned stores are more painful than misaligned 994 loads). 995 996 Here are the general steps involved in alignment enhancements: 997 998 -- original loop, before alignment analysis: 999 for (i=0; i<N; i++){ 1000 x = q[i]; # DR_MISALIGNMENT(q) = unknown 1001 p[i] = y; # DR_MISALIGNMENT(p) = unknown 1002 } 1003 1004 -- After vect_compute_data_refs_alignment: 1005 for (i=0; i<N; i++){ 1006 x = q[i]; # DR_MISALIGNMENT(q) = 3 1007 p[i] = y; # DR_MISALIGNMENT(p) = unknown 1008 } 1009 1010 -- Possibility 1: we do loop versioning: 1011 if (p is aligned) { 1012 for (i=0; i<N; i++){ # loop 1A 1013 x = q[i]; # DR_MISALIGNMENT(q) = 3 1014 p[i] = y; # DR_MISALIGNMENT(p) = 0 1015 } 1016 } 1017 else { 1018 for (i=0; i<N; i++){ # loop 1B 1019 x = q[i]; # DR_MISALIGNMENT(q) = 3 1020 p[i] = y; # DR_MISALIGNMENT(p) = unaligned 1021 } 1022 } 1023 1024 -- Possibility 2: we do loop peeling: 1025 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). 1026 x = q[i]; 1027 p[i] = y; 1028 } 1029 for (i = 3; i < N; i++){ # loop 2A 1030 x = q[i]; # DR_MISALIGNMENT(q) = 0 1031 p[i] = y; # DR_MISALIGNMENT(p) = unknown 1032 } 1033 1034 -- Possibility 3: combination of loop peeling and versioning: 1035 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). 1036 x = q[i]; 1037 p[i] = y; 1038 } 1039 if (p is aligned) { 1040 for (i = 3; i<N; i++){ # loop 3A 1041 x = q[i]; # DR_MISALIGNMENT(q) = 0 1042 p[i] = y; # DR_MISALIGNMENT(p) = 0 1043 } 1044 } 1045 else { 1046 for (i = 3; i<N; i++){ # loop 3B 1047 x = q[i]; # DR_MISALIGNMENT(q) = 0 1048 p[i] = y; # DR_MISALIGNMENT(p) = unaligned 1049 } 1050 } 1051 1052 These loops are later passed to loop_transform to be vectorized. The 1053 vectorizer will use the alignment information to guide the transformation 1054 (whether to generate regular loads/stores, or with special handling for 1055 misalignment). */ 1056 1057static bool 1058vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) 1059{ 1060 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1061 enum dr_alignment_support supportable_dr_alignment; 1062 struct data_reference *dr0 = NULL; 1063 struct data_reference *dr; 1064 unsigned int i; 1065 bool do_peeling = false; 1066 bool do_versioning = false; 1067 bool stat; 1068 1069 /* While cost model enhancements are expected in the future, the high level 1070 view of the code at this time is as follows: 1071 1072 A) If there is a misaligned write then see if peeling to align this write 1073 can make all data references satisfy vect_supportable_dr_alignment. 1074 If so, update data structures as needed and return true. Note that 1075 at this time vect_supportable_dr_alignment is known to return false 1076 for a a misaligned write. 1077 1078 B) If peeling wasn't possible and there is a data reference with an 1079 unknown misalignment that does not satisfy vect_supportable_dr_alignment 1080 then see if loop versioning checks can be used to make all data 1081 references satisfy vect_supportable_dr_alignment. If so, update 1082 data structures as needed and return true. 1083 1084 C) If neither peeling nor versioning were successful then return false if 1085 any data reference does not satisfy vect_supportable_dr_alignment. 1086 1087 D) Return true (all data references satisfy vect_supportable_dr_alignment). 1088 1089 Note, Possibility 3 above (which is peeling and versioning together) is not 1090 being done at this time. */ 1091 1092 /* (1) Peeling to force alignment. */ 1093 1094 /* (1.1) Decide whether to perform peeling, and how many iterations to peel: 1095 Considerations: 1096 + How many accesses will become aligned due to the peeling 1097 - How many accesses will become unaligned due to the peeling, 1098 and the cost of misaligned accesses. 1099 - The cost of peeling (the extra runtime checks, the increase 1100 in code size). 1101 1102 The scheme we use FORNOW: peel to force the alignment of the first 1103 misaligned store in the loop. 1104 Rationale: misaligned stores are not yet supported. 1105 1106 TODO: Use a cost model. */ 1107 1108 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1109 if (!DR_IS_READ (dr) && !aligned_access_p (dr)) 1110 {
|
1059 dr0 = dr; 1060 do_peeling = true;
| 1111 do_peeling = vector_alignment_reachable_p (dr); 1112 if (do_peeling) 1113 dr0 = dr; 1114 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS)) 1115 fprintf (vect_dump, "vector alignment may not be reachable");
|
1061 break; 1062 } 1063 1064 /* Often peeling for alignment will require peeling for loop-bound, which in 1065 turn requires that we know how to adjust the loop ivs after the loop. */ 1066 if (!vect_can_advance_ivs_p (loop_vinfo)) 1067 do_peeling = false; 1068 1069 if (do_peeling) 1070 { 1071 int mis; 1072 int npeel = 0; 1073 1074 if (known_alignment_for_access_p (dr0)) 1075 { 1076 /* Since it's known at compile time, compute the number of iterations 1077 in the peeled loop (the peeling factor) for use in updating 1078 DR_MISALIGNMENT values. The peeling factor is the vectorization 1079 factor minus the misalignment as an element count. */ 1080 mis = DR_MISALIGNMENT (dr0); 1081 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0)))); 1082 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis; 1083 } 1084 1085 /* Ensure that all data refs can be vectorized after the peel. */ 1086 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1087 { 1088 int save_misalignment; 1089 1090 if (dr == dr0) 1091 continue; 1092 1093 save_misalignment = DR_MISALIGNMENT (dr); 1094 vect_update_misalignment_for_peel (dr, dr0, npeel); 1095 supportable_dr_alignment = vect_supportable_dr_alignment (dr); 1096 DR_MISALIGNMENT (dr) = save_misalignment; 1097 1098 if (!supportable_dr_alignment) 1099 { 1100 do_peeling = false; 1101 break; 1102 } 1103 } 1104 1105 if (do_peeling) 1106 { 1107 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i. 1108 If the misalignment of DR_i is identical to that of dr0 then set 1109 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and 1110 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i) 1111 by the peeling factor times the element size of DR_i (MOD the 1112 vectorization factor times the size). Otherwise, the 1113 misalignment of DR_i must be set to unknown. */ 1114 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1115 if (dr != dr0) 1116 vect_update_misalignment_for_peel (dr, dr0, npeel); 1117 1118 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0; 1119 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0); 1120 DR_MISALIGNMENT (dr0) = 0; 1121 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1122 fprintf (vect_dump, "Alignment of access forced using peeling."); 1123 1124 if (vect_print_dump_info (REPORT_DETAILS)) 1125 fprintf (vect_dump, "Peeling for alignment will be applied."); 1126 1127 stat = vect_verify_datarefs_alignment (loop_vinfo); 1128 gcc_assert (stat); 1129 return stat; 1130 } 1131 } 1132 1133 1134 /* (2) Versioning to force alignment. */ 1135 1136 /* Try versioning if: 1137 1) flag_tree_vect_loop_version is TRUE 1138 2) optimize_size is FALSE 1139 3) there is at least one unsupported misaligned data ref with an unknown 1140 misalignment, and 1141 4) all misaligned data refs with a known misalignment are supported, and 1142 5) the number of runtime alignment checks is within reason. */ 1143 1144 do_versioning = flag_tree_vect_loop_version && (!optimize_size); 1145 1146 if (do_versioning) 1147 { 1148 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1149 { 1150 if (aligned_access_p (dr)) 1151 continue; 1152 1153 supportable_dr_alignment = vect_supportable_dr_alignment (dr); 1154 1155 if (!supportable_dr_alignment) 1156 { 1157 tree stmt; 1158 int mask; 1159 tree vectype; 1160 1161 if (known_alignment_for_access_p (dr) 1162 || VEC_length (tree, 1163 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) 1164 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS)) 1165 { 1166 do_versioning = false; 1167 break; 1168 } 1169 1170 stmt = DR_STMT (dr); 1171 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); 1172 gcc_assert (vectype); 1173 1174 /* The rightmost bits of an aligned address must be zeros. 1175 Construct the mask needed for this test. For example, 1176 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the 1177 mask must be 15 = 0xf. */ 1178 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1; 1179 1180 /* FORNOW: use the same mask to test all potentially unaligned 1181 references in the loop. The vectorizer currently supports 1182 a single vector size, see the reference to 1183 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the 1184 vectorization factor is computed. */ 1185 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo) 1186 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask); 1187 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask; 1188 VEC_safe_push (tree, heap, 1189 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 1190 DR_STMT (dr)); 1191 } 1192 } 1193 1194 /* Versioning requires at least one misaligned data reference. */ 1195 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0) 1196 do_versioning = false; 1197 else if (!do_versioning) 1198 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0); 1199 } 1200 1201 if (do_versioning) 1202 { 1203 VEC(tree,heap) *may_misalign_stmts 1204 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); 1205 tree stmt; 1206 1207 /* It can now be assumed that the data references in the statements 1208 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version 1209 of the loop being vectorized. */ 1210 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++) 1211 { 1212 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1213 dr = STMT_VINFO_DATA_REF (stmt_info); 1214 DR_MISALIGNMENT (dr) = 0; 1215 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1216 fprintf (vect_dump, "Alignment of access forced using versioning."); 1217 } 1218 1219 if (vect_print_dump_info (REPORT_DETAILS)) 1220 fprintf (vect_dump, "Versioning for alignment will be applied."); 1221 1222 /* Peeling and versioning can't be done together at this time. */ 1223 gcc_assert (! (do_peeling && do_versioning)); 1224 1225 stat = vect_verify_datarefs_alignment (loop_vinfo); 1226 gcc_assert (stat); 1227 return stat; 1228 } 1229 1230 /* This point is reached if neither peeling nor versioning is being done. */ 1231 gcc_assert (! (do_peeling || do_versioning)); 1232 1233 stat = vect_verify_datarefs_alignment (loop_vinfo); 1234 return stat; 1235} 1236 1237 1238/* Function vect_analyze_data_refs_alignment 1239 1240 Analyze the alignment of the data-references in the loop. 1241 Return FALSE if a data reference is found that cannot be vectorized. */ 1242 1243static bool 1244vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) 1245{ 1246 if (vect_print_dump_info (REPORT_DETAILS)) 1247 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ==="); 1248 1249 if (!vect_compute_data_refs_alignment (loop_vinfo)) 1250 { 1251 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1252 fprintf (vect_dump, 1253 "not vectorized: can't calculate alignment for data ref."); 1254 return false; 1255 } 1256 1257 return true; 1258} 1259 1260 1261/* Function vect_analyze_data_ref_access. 1262 1263 Analyze the access pattern of the data-reference DR. For now, a data access 1264 has to be consecutive to be considered vectorizable. */ 1265 1266static bool 1267vect_analyze_data_ref_access (struct data_reference *dr) 1268{ 1269 tree step = DR_STEP (dr); 1270 tree scalar_type = TREE_TYPE (DR_REF (dr)); 1271 1272 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) 1273 { 1274 if (vect_print_dump_info (REPORT_DETAILS)) 1275 fprintf (vect_dump, "not consecutive access"); 1276 return false; 1277 } 1278 return true; 1279} 1280 1281 1282/* Function vect_analyze_data_ref_accesses. 1283 1284 Analyze the access pattern of all the data references in the loop. 1285 1286 FORNOW: the only access pattern that is considered vectorizable is a 1287 simple step 1 (consecutive) access. 1288 1289 FORNOW: handle only arrays and pointer accesses. */ 1290 1291static bool 1292vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo) 1293{ 1294 unsigned int i; 1295 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1296 struct data_reference *dr; 1297 1298 if (vect_print_dump_info (REPORT_DETAILS)) 1299 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ==="); 1300 1301 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1302 if (!vect_analyze_data_ref_access (dr)) 1303 { 1304 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1305 fprintf (vect_dump, "not vectorized: complicated access pattern."); 1306 return false; 1307 } 1308 1309 return true; 1310} 1311 1312 1313/* Function vect_analyze_data_refs. 1314 1315 Find all the data references in the loop. 1316 1317 The general structure of the analysis of data refs in the vectorizer is as 1318 follows: 1319 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to 1320 find and analyze all data-refs in the loop and their dependences. 1321 2- vect_analyze_dependences(): apply dependence testing using ddrs. 1322 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok. 1323 4- vect_analyze_drs_access(): check that ref_stmt.step is ok. 1324 1325*/ 1326 1327static bool 1328vect_analyze_data_refs (loop_vec_info loop_vinfo) 1329{ 1330 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1331 unsigned int i; 1332 VEC (data_reference_p, heap) *datarefs; 1333 struct data_reference *dr; 1334 tree scalar_type; 1335 1336 if (vect_print_dump_info (REPORT_DETAILS)) 1337 fprintf (vect_dump, "=== vect_analyze_data_refs ==="); 1338 1339 compute_data_dependences_for_loop (loop, false, 1340 &LOOP_VINFO_DATAREFS (loop_vinfo), 1341 &LOOP_VINFO_DDRS (loop_vinfo)); 1342 1343 /* Go through the data-refs, check that the analysis succeeded. Update pointer 1344 from stmt_vec_info struct to DR and vectype. */ 1345 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1346 1347 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1348 { 1349 tree stmt; 1350 stmt_vec_info stmt_info; 1351 1352 if (!dr || !DR_REF (dr)) 1353 { 1354 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1355 fprintf (vect_dump, "not vectorized: unhandled data-ref "); 1356 return false; 1357 } 1358 1359 /* Update DR field in stmt_vec_info struct. */ 1360 stmt = DR_STMT (dr); 1361 stmt_info = vinfo_for_stmt (stmt); 1362 1363 if (STMT_VINFO_DATA_REF (stmt_info)) 1364 { 1365 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1366 { 1367 fprintf (vect_dump, 1368 "not vectorized: more than one data ref in stmt: "); 1369 print_generic_expr (vect_dump, stmt, TDF_SLIM); 1370 } 1371 return false; 1372 } 1373 STMT_VINFO_DATA_REF (stmt_info) = dr; 1374 1375 /* Check that analysis of the data-ref succeeded. */ 1376 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr) 1377 || !DR_STEP (dr)) 1378 { 1379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1380 { 1381 fprintf (vect_dump, "not vectorized: data ref analysis failed "); 1382 print_generic_expr (vect_dump, stmt, TDF_SLIM); 1383 } 1384 return false; 1385 } 1386 if (!DR_MEMTAG (dr)) 1387 { 1388 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1389 { 1390 fprintf (vect_dump, "not vectorized: no memory tag for "); 1391 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); 1392 } 1393 return false; 1394 } 1395 1396 /* Set vectype for STMT. */ 1397 scalar_type = TREE_TYPE (DR_REF (dr)); 1398 STMT_VINFO_VECTYPE (stmt_info) = 1399 get_vectype_for_scalar_type (scalar_type); 1400 if (!STMT_VINFO_VECTYPE (stmt_info)) 1401 { 1402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1403 { 1404 fprintf (vect_dump, 1405 "not vectorized: no vectype for stmt: "); 1406 print_generic_expr (vect_dump, stmt, TDF_SLIM); 1407 fprintf (vect_dump, " scalar_type: "); 1408 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS); 1409 } 1410 return false; 1411 } 1412 } 1413 1414 return true; 1415} 1416 1417 1418/* Utility functions used by vect_mark_stmts_to_be_vectorized. */ 1419 1420/* Function vect_mark_relevant. 1421 1422 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ 1423 1424static void 1425vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt, 1426 bool relevant_p, bool live_p) 1427{ 1428 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1429 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info); 1430 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); 1431 1432 if (vect_print_dump_info (REPORT_DETAILS)) 1433 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p); 1434 1435 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 1436 { 1437 tree pattern_stmt; 1438 1439 /* This is the last stmt in a sequence that was detected as a 1440 pattern that can potentially be vectorized. Don't mark the stmt 1441 as relevant/live because it's not going to vectorized. 1442 Instead mark the pattern-stmt that replaces it. */ 1443 if (vect_print_dump_info (REPORT_DETAILS)) 1444 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); 1445 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 1446 stmt_info = vinfo_for_stmt (pattern_stmt); 1447 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); 1448 save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info); 1449 save_live_p = STMT_VINFO_LIVE_P (stmt_info); 1450 stmt = pattern_stmt; 1451 } 1452 1453 STMT_VINFO_LIVE_P (stmt_info) |= live_p; 1454 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p; 1455 1456 if (TREE_CODE (stmt) == PHI_NODE) 1457 /* Don't put phi-nodes in the worklist. Phis that are marked relevant 1458 or live will fail vectorization later on. */ 1459 return; 1460 1461 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p 1462 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p) 1463 { 1464 if (vect_print_dump_info (REPORT_DETAILS)) 1465 fprintf (vect_dump, "already marked relevant/live."); 1466 return; 1467 } 1468 1469 VEC_safe_push (tree, heap, *worklist, stmt); 1470} 1471 1472 1473/* Function vect_stmt_relevant_p. 1474 1475 Return true if STMT in loop that is represented by LOOP_VINFO is 1476 "relevant for vectorization". 1477 1478 A stmt is considered "relevant for vectorization" if: 1479 - it has uses outside the loop. 1480 - it has vdefs (it alters memory). 1481 - control stmts in the loop (except for the exit condition). 1482 1483 CHECKME: what other side effects would the vectorizer allow? */ 1484 1485static bool 1486vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo, 1487 bool *relevant_p, bool *live_p) 1488{ 1489 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1490 ssa_op_iter op_iter; 1491 imm_use_iterator imm_iter; 1492 use_operand_p use_p; 1493 def_operand_p def_p; 1494 1495 *relevant_p = false; 1496 *live_p = false; 1497 1498 /* cond stmt other than loop exit cond. */ 1499 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo))) 1500 *relevant_p = true; 1501 1502 /* changing memory. */ 1503 if (TREE_CODE (stmt) != PHI_NODE) 1504 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS)) 1505 { 1506 if (vect_print_dump_info (REPORT_DETAILS)) 1507 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs."); 1508 *relevant_p = true; 1509 } 1510 1511 /* uses outside the loop. */ 1512 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF) 1513 { 1514 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) 1515 { 1516 basic_block bb = bb_for_stmt (USE_STMT (use_p)); 1517 if (!flow_bb_inside_loop_p (loop, bb)) 1518 { 1519 if (vect_print_dump_info (REPORT_DETAILS)) 1520 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop."); 1521 1522 /* We expect all such uses to be in the loop exit phis 1523 (because of loop closed form) */ 1524 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE); 1525 gcc_assert (bb == loop->single_exit->dest); 1526 1527 *live_p = true; 1528 } 1529 } 1530 } 1531 1532 return (*live_p || *relevant_p); 1533} 1534 1535 1536/* Function vect_mark_stmts_to_be_vectorized. 1537 1538 Not all stmts in the loop need to be vectorized. For example: 1539 1540 for i... 1541 for j... 1542 1. T0 = i + j 1543 2. T1 = a[T0] 1544 1545 3. j = j + 1 1546 1547 Stmt 1 and 3 do not need to be vectorized, because loop control and 1548 addressing of vectorized data-refs are handled differently. 1549 1550 This pass detects such stmts. */ 1551 1552static bool 1553vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) 1554{ 1555 VEC(tree,heap) *worklist; 1556 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1557 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 1558 unsigned int nbbs = loop->num_nodes; 1559 block_stmt_iterator si; 1560 tree stmt, use; 1561 stmt_ann_t ann; 1562 ssa_op_iter iter; 1563 unsigned int i; 1564 stmt_vec_info stmt_vinfo; 1565 basic_block bb; 1566 tree phi; 1567 bool relevant_p, live_p; 1568 tree def, def_stmt; 1569 enum vect_def_type dt; 1570 1571 if (vect_print_dump_info (REPORT_DETAILS)) 1572 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ==="); 1573 1574 worklist = VEC_alloc (tree, heap, 64); 1575 1576 /* 1. Init worklist. */ 1577 1578 bb = loop->header; 1579 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 1580 { 1581 if (vect_print_dump_info (REPORT_DETAILS)) 1582 { 1583 fprintf (vect_dump, "init: phi relevant? "); 1584 print_generic_expr (vect_dump, phi, TDF_SLIM); 1585 } 1586 1587 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p)) 1588 vect_mark_relevant (&worklist, phi, relevant_p, live_p); 1589 } 1590 1591 for (i = 0; i < nbbs; i++) 1592 { 1593 bb = bbs[i]; 1594 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) 1595 { 1596 stmt = bsi_stmt (si); 1597 1598 if (vect_print_dump_info (REPORT_DETAILS)) 1599 { 1600 fprintf (vect_dump, "init: stmt relevant? "); 1601 print_generic_expr (vect_dump, stmt, TDF_SLIM); 1602 } 1603 1604 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p)) 1605 vect_mark_relevant (&worklist, stmt, relevant_p, live_p); 1606 } 1607 } 1608 1609 1610 /* 2. Process_worklist */ 1611 1612 while (VEC_length (tree, worklist) > 0) 1613 { 1614 stmt = VEC_pop (tree, worklist); 1615 1616 if (vect_print_dump_info (REPORT_DETAILS)) 1617 { 1618 fprintf (vect_dump, "worklist: examine stmt: "); 1619 print_generic_expr (vect_dump, stmt, TDF_SLIM); 1620 } 1621 1622 /* Examine the USEs of STMT. For each ssa-name USE thta is defined 1623 in the loop, mark the stmt that defines it (DEF_STMT) as 1624 relevant/irrelevant and live/dead according to the liveness and 1625 relevance properties of STMT. 1626 */ 1627 1628 gcc_assert (TREE_CODE (stmt) != PHI_NODE); 1629 1630 ann = stmt_ann (stmt); 1631 stmt_vinfo = vinfo_for_stmt (stmt); 1632 1633 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo); 1634 live_p = STMT_VINFO_LIVE_P (stmt_vinfo); 1635 1636 /* Generally, the liveness and relevance properties of STMT are 1637 propagated to the DEF_STMTs of its USEs: 1638 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p 1639 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p 1640 1641 Exceptions: 1642 1643 (case 1) 1644 If USE is used only for address computations (e.g. array indexing), 1645 which does not need to be directly vectorized, then the 1646 liveness/relevance of the respective DEF_STMT is left unchanged. 1647 1648 (case 2) 1649 If STMT has been identified as defining a reduction variable, then 1650 we have two cases: 1651 (case 2.1) 1652 The last use of STMT is the reduction-variable, which is defined 1653 by a loop-header-phi. We don't want to mark the phi as live or 1654 relevant (because it does not need to be vectorized, it is handled 1655 as part of the vectorization of the reduction), so in this case we 1656 skip the call to vect_mark_relevant. 1657 (case 2.2) 1658 The rest of the uses of STMT are defined in the loop body. For 1659 the def_stmt of these uses we want to set liveness/relevance 1660 as follows: 1661 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false 1662 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true 1663 because even though STMT is classified as live (since it defines a 1664 value that is used across loop iterations) and irrelevant (since it 1665 is not used inside the loop), it will be vectorized, and therefore 1666 the corresponding DEF_STMTs need to marked as relevant. 1667 */ 1668 1669 /* case 2.2: */ 1670 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) 1671 { 1672 gcc_assert (!relevant_p && live_p); 1673 relevant_p = true; 1674 live_p = false; 1675 } 1676 1677 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) 1678 { 1679 /* case 1: we are only interested in uses that need to be vectorized. 1680 Uses that are used for address computation are not considered 1681 relevant. 1682 */ 1683 if (!exist_non_indexing_operands_for_use_p (use, stmt)) 1684 continue; 1685 1686 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt)) 1687 { 1688 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1689 fprintf (vect_dump, "not vectorized: unsupported use in stmt."); 1690 VEC_free (tree, heap, worklist); 1691 return false; 1692 } 1693 1694 if (!def_stmt || IS_EMPTY_STMT (def_stmt)) 1695 continue; 1696 1697 if (vect_print_dump_info (REPORT_DETAILS)) 1698 { 1699 fprintf (vect_dump, "worklist: examine use %d: ", i); 1700 print_generic_expr (vect_dump, use, TDF_SLIM); 1701 } 1702 1703 bb = bb_for_stmt (def_stmt); 1704 if (!flow_bb_inside_loop_p (loop, bb)) 1705 continue; 1706 1707 /* case 2.1: the reduction-use does not mark the defining-phi 1708 as relevant. */ 1709 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 1710 && TREE_CODE (def_stmt) == PHI_NODE) 1711 continue; 1712 1713 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p); 1714 } 1715 } /* while worklist */ 1716 1717 VEC_free (tree, heap, worklist); 1718 return true; 1719} 1720 1721 1722/* Function vect_can_advance_ivs_p 1723 1724 In case the number of iterations that LOOP iterates is unknown at compile 1725 time, an epilog loop will be generated, and the loop induction variables 1726 (IVs) will be "advanced" to the value they are supposed to take just before 1727 the epilog loop. Here we check that the access function of the loop IVs 1728 and the expression that represents the loop bound are simple enough. 1729 These restrictions will be relaxed in the future. */ 1730 1731static bool 1732vect_can_advance_ivs_p (loop_vec_info loop_vinfo) 1733{ 1734 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1735 basic_block bb = loop->header; 1736 tree phi; 1737 1738 /* Analyze phi functions of the loop header. */ 1739 1740 if (vect_print_dump_info (REPORT_DETAILS)) 1741 fprintf (vect_dump, "=== vect_can_advance_ivs_p ==="); 1742 1743 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 1744 { 1745 tree access_fn = NULL; 1746 tree evolution_part; 1747 1748 if (vect_print_dump_info (REPORT_DETAILS)) 1749 { 1750 fprintf (vect_dump, "Analyze phi: "); 1751 print_generic_expr (vect_dump, phi, TDF_SLIM); 1752 } 1753 1754 /* Skip virtual phi's. The data dependences that are associated with 1755 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ 1756 1757 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) 1758 { 1759 if (vect_print_dump_info (REPORT_DETAILS)) 1760 fprintf (vect_dump, "virtual phi. skip."); 1761 continue; 1762 } 1763 1764 /* Skip reduction phis. */ 1765 1766 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) 1767 { 1768 if (vect_print_dump_info (REPORT_DETAILS)) 1769 fprintf (vect_dump, "reduc phi. skip."); 1770 continue; 1771 } 1772 1773 /* Analyze the evolution function. */ 1774 1775 access_fn = instantiate_parameters 1776 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); 1777 1778 if (!access_fn) 1779 { 1780 if (vect_print_dump_info (REPORT_DETAILS)) 1781 fprintf (vect_dump, "No Access function."); 1782 return false; 1783 } 1784 1785 if (vect_print_dump_info (REPORT_DETAILS)) 1786 { 1787 fprintf (vect_dump, "Access function of PHI: "); 1788 print_generic_expr (vect_dump, access_fn, TDF_SLIM); 1789 } 1790 1791 evolution_part = evolution_part_in_loop_num (access_fn, loop->num); 1792 1793 if (evolution_part == NULL_TREE) 1794 { 1795 if (vect_print_dump_info (REPORT_DETAILS)) 1796 fprintf (vect_dump, "No evolution."); 1797 return false; 1798 } 1799 1800 /* FORNOW: We do not transform initial conditions of IVs 1801 which evolution functions are a polynomial of degree >= 2. */ 1802 1803 if (tree_is_chrec (evolution_part)) 1804 return false; 1805 } 1806 1807 return true; 1808} 1809 1810 1811/* Function vect_get_loop_niters. 1812 1813 Determine how many iterations the loop is executed. 1814 If an expression that represents the number of iterations 1815 can be constructed, place it in NUMBER_OF_ITERATIONS. 1816 Return the loop exit condition. */ 1817 1818static tree 1819vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) 1820{ 1821 tree niters; 1822 1823 if (vect_print_dump_info (REPORT_DETAILS)) 1824 fprintf (vect_dump, "=== get_loop_niters ==="); 1825 1826 niters = number_of_iterations_in_loop (loop); 1827 1828 if (niters != NULL_TREE 1829 && niters != chrec_dont_know) 1830 { 1831 *number_of_iterations = niters; 1832 1833 if (vect_print_dump_info (REPORT_DETAILS)) 1834 { 1835 fprintf (vect_dump, "==> get_loop_niters:" ); 1836 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM); 1837 } 1838 } 1839 1840 return get_loop_exit_condition (loop); 1841} 1842 1843 1844/* Function vect_analyze_loop_form. 1845 1846 Verify the following restrictions (some may be relaxed in the future): 1847 - it's an inner-most loop 1848 - number of BBs = 2 (which are the loop header and the latch) 1849 - the loop has a pre-header 1850 - the loop has a single entry and exit 1851 - the loop exit condition is simple enough, and the number of iterations 1852 can be analyzed (a countable loop). */ 1853 1854static loop_vec_info 1855vect_analyze_loop_form (struct loop *loop) 1856{ 1857 loop_vec_info loop_vinfo; 1858 tree loop_cond; 1859 tree number_of_iterations = NULL; 1860 1861 if (vect_print_dump_info (REPORT_DETAILS)) 1862 fprintf (vect_dump, "=== vect_analyze_loop_form ==="); 1863 1864 if (loop->inner) 1865 { 1866 if (vect_print_dump_info (REPORT_OUTER_LOOPS)) 1867 fprintf (vect_dump, "not vectorized: nested loop."); 1868 return NULL; 1869 } 1870 1871 if (!loop->single_exit 1872 || loop->num_nodes != 2 1873 || EDGE_COUNT (loop->header->preds) != 2) 1874 { 1875 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1876 { 1877 if (!loop->single_exit) 1878 fprintf (vect_dump, "not vectorized: multiple exits."); 1879 else if (loop->num_nodes != 2) 1880 fprintf (vect_dump, "not vectorized: too many BBs in loop."); 1881 else if (EDGE_COUNT (loop->header->preds) != 2) 1882 fprintf (vect_dump, "not vectorized: too many incoming edges."); 1883 } 1884 1885 return NULL; 1886 } 1887 1888 /* We assume that the loop exit condition is at the end of the loop. i.e, 1889 that the loop is represented as a do-while (with a proper if-guard 1890 before the loop if needed), where the loop header contains all the 1891 executable statements, and the latch is empty. */ 1892 if (!empty_block_p (loop->latch) 1893 || phi_nodes (loop->latch)) 1894 { 1895 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1896 fprintf (vect_dump, "not vectorized: unexpected loop form."); 1897 return NULL; 1898 } 1899 1900 /* Make sure there exists a single-predecessor exit bb: */ 1901 if (!single_pred_p (loop->single_exit->dest)) 1902 { 1903 edge e = loop->single_exit; 1904 if (!(e->flags & EDGE_ABNORMAL)) 1905 { 1906 split_loop_exit_edge (e); 1907 if (vect_print_dump_info (REPORT_DETAILS)) 1908 fprintf (vect_dump, "split exit edge."); 1909 } 1910 else 1911 { 1912 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1913 fprintf (vect_dump, "not vectorized: abnormal loop exit edge."); 1914 return NULL; 1915 } 1916 } 1917 1918 if (empty_block_p (loop->header)) 1919 { 1920 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1921 fprintf (vect_dump, "not vectorized: empty loop."); 1922 return NULL; 1923 } 1924 1925 loop_cond = vect_get_loop_niters (loop, &number_of_iterations); 1926 if (!loop_cond) 1927 { 1928 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1929 fprintf (vect_dump, "not vectorized: complicated exit condition."); 1930 return NULL; 1931 } 1932 1933 if (!number_of_iterations) 1934 { 1935 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1936 fprintf (vect_dump, 1937 "not vectorized: number of iterations cannot be computed."); 1938 return NULL; 1939 } 1940 1941 if (chrec_contains_undetermined (number_of_iterations)) 1942 { 1943 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1944 fprintf (vect_dump, "Infinite number of iterations."); 1945 return false; 1946 } 1947 1948 loop_vinfo = new_loop_vec_info (loop); 1949 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; 1950 1951 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) 1952 { 1953 if (vect_print_dump_info (REPORT_DETAILS)) 1954 { 1955 fprintf (vect_dump, "Symbolic number of iterations is "); 1956 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS); 1957 } 1958 } 1959 else 1960 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0) 1961 { 1962 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1963 fprintf (vect_dump, "not vectorized: number of iterations = 0."); 1964 return NULL; 1965 } 1966 1967 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; 1968 1969 return loop_vinfo; 1970} 1971 1972 1973/* Function vect_analyze_loop. 1974 1975 Apply a set of analyses on LOOP, and create a loop_vec_info struct 1976 for it. The different analyses will record information in the 1977 loop_vec_info struct. */ 1978loop_vec_info 1979vect_analyze_loop (struct loop *loop) 1980{ 1981 bool ok; 1982 loop_vec_info loop_vinfo; 1983 1984 if (vect_print_dump_info (REPORT_DETAILS)) 1985 fprintf (vect_dump, "===== analyze_loop_nest ====="); 1986 1987 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */ 1988 1989 loop_vinfo = vect_analyze_loop_form (loop); 1990 if (!loop_vinfo) 1991 { 1992 if (vect_print_dump_info (REPORT_DETAILS)) 1993 fprintf (vect_dump, "bad loop form."); 1994 return NULL; 1995 } 1996 1997 /* Find all data references in the loop (which correspond to vdefs/vuses) 1998 and analyze their evolution in the loop. 1999 2000 FORNOW: Handle only simple, array references, which 2001 alignment can be forced, and aligned pointer-references. */ 2002 2003 ok = vect_analyze_data_refs (loop_vinfo); 2004 if (!ok) 2005 { 2006 if (vect_print_dump_info (REPORT_DETAILS)) 2007 fprintf (vect_dump, "bad data references."); 2008 destroy_loop_vec_info (loop_vinfo); 2009 return NULL; 2010 } 2011 2012 /* Classify all cross-iteration scalar data-flow cycles. 2013 Cross-iteration cycles caused by virtual phis are analyzed separately. */ 2014 2015 vect_analyze_scalar_cycles (loop_vinfo); 2016 2017 vect_pattern_recog (loop_vinfo); 2018 2019 /* Data-flow analysis to detect stmts that do not need to be vectorized. */ 2020 2021 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); 2022 if (!ok) 2023 { 2024 if (vect_print_dump_info (REPORT_DETAILS)) 2025 fprintf (vect_dump, "unexpected pattern."); 2026 destroy_loop_vec_info (loop_vinfo); 2027 return NULL; 2028 } 2029 2030 /* Analyze the alignment of the data-refs in the loop. 2031 Fail if a data reference is found that cannot be vectorized. */ 2032 2033 ok = vect_analyze_data_refs_alignment (loop_vinfo); 2034 if (!ok) 2035 { 2036 if (vect_print_dump_info (REPORT_DETAILS)) 2037 fprintf (vect_dump, "bad data alignment."); 2038 destroy_loop_vec_info (loop_vinfo); 2039 return NULL; 2040 } 2041 2042 ok = vect_determine_vectorization_factor (loop_vinfo); 2043 if (!ok) 2044 { 2045 if (vect_print_dump_info (REPORT_DETAILS)) 2046 fprintf (vect_dump, "can't determine vectorization factor."); 2047 destroy_loop_vec_info (loop_vinfo); 2048 return NULL; 2049 } 2050 2051 /* Analyze data dependences between the data-refs in the loop. 2052 FORNOW: fail at the first data dependence that we encounter. */ 2053 2054 ok = vect_analyze_data_ref_dependences (loop_vinfo); 2055 if (!ok) 2056 { 2057 if (vect_print_dump_info (REPORT_DETAILS)) 2058 fprintf (vect_dump, "bad data dependence."); 2059 destroy_loop_vec_info (loop_vinfo); 2060 return NULL; 2061 } 2062 2063 /* Analyze the access patterns of the data-refs in the loop (consecutive, 2064 complex, etc.). FORNOW: Only handle consecutive access pattern. */ 2065 2066 ok = vect_analyze_data_ref_accesses (loop_vinfo); 2067 if (!ok) 2068 { 2069 if (vect_print_dump_info (REPORT_DETAILS)) 2070 fprintf (vect_dump, "bad data access."); 2071 destroy_loop_vec_info (loop_vinfo); 2072 return NULL; 2073 } 2074 2075 /* This pass will decide on using loop versioning and/or loop peeling in 2076 order to enhance the alignment of data references in the loop. */ 2077 2078 ok = vect_enhance_data_refs_alignment (loop_vinfo); 2079 if (!ok) 2080 { 2081 if (vect_print_dump_info (REPORT_DETAILS)) 2082 fprintf (vect_dump, "bad data alignment."); 2083 destroy_loop_vec_info (loop_vinfo); 2084 return NULL; 2085 } 2086 2087 /* Scan all the operations in the loop and make sure they are 2088 vectorizable. */ 2089 2090 ok = vect_analyze_operations (loop_vinfo); 2091 if (!ok) 2092 { 2093 if (vect_print_dump_info (REPORT_DETAILS)) 2094 fprintf (vect_dump, "bad operation or unsupported loop bound."); 2095 destroy_loop_vec_info (loop_vinfo); 2096 return NULL; 2097 } 2098 2099 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; 2100 2101 return loop_vinfo; 2102}
| 1116 break; 1117 } 1118 1119 /* Often peeling for alignment will require peeling for loop-bound, which in 1120 turn requires that we know how to adjust the loop ivs after the loop. */ 1121 if (!vect_can_advance_ivs_p (loop_vinfo)) 1122 do_peeling = false; 1123 1124 if (do_peeling) 1125 { 1126 int mis; 1127 int npeel = 0; 1128 1129 if (known_alignment_for_access_p (dr0)) 1130 { 1131 /* Since it's known at compile time, compute the number of iterations 1132 in the peeled loop (the peeling factor) for use in updating 1133 DR_MISALIGNMENT values. The peeling factor is the vectorization 1134 factor minus the misalignment as an element count. */ 1135 mis = DR_MISALIGNMENT (dr0); 1136 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0)))); 1137 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis; 1138 } 1139 1140 /* Ensure that all data refs can be vectorized after the peel. */ 1141 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1142 { 1143 int save_misalignment; 1144 1145 if (dr == dr0) 1146 continue; 1147 1148 save_misalignment = DR_MISALIGNMENT (dr); 1149 vect_update_misalignment_for_peel (dr, dr0, npeel); 1150 supportable_dr_alignment = vect_supportable_dr_alignment (dr); 1151 DR_MISALIGNMENT (dr) = save_misalignment; 1152 1153 if (!supportable_dr_alignment) 1154 { 1155 do_peeling = false; 1156 break; 1157 } 1158 } 1159 1160 if (do_peeling) 1161 { 1162 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i. 1163 If the misalignment of DR_i is identical to that of dr0 then set 1164 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and 1165 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i) 1166 by the peeling factor times the element size of DR_i (MOD the 1167 vectorization factor times the size). Otherwise, the 1168 misalignment of DR_i must be set to unknown. */ 1169 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1170 if (dr != dr0) 1171 vect_update_misalignment_for_peel (dr, dr0, npeel); 1172 1173 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0; 1174 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0); 1175 DR_MISALIGNMENT (dr0) = 0; 1176 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1177 fprintf (vect_dump, "Alignment of access forced using peeling."); 1178 1179 if (vect_print_dump_info (REPORT_DETAILS)) 1180 fprintf (vect_dump, "Peeling for alignment will be applied."); 1181 1182 stat = vect_verify_datarefs_alignment (loop_vinfo); 1183 gcc_assert (stat); 1184 return stat; 1185 } 1186 } 1187 1188 1189 /* (2) Versioning to force alignment. */ 1190 1191 /* Try versioning if: 1192 1) flag_tree_vect_loop_version is TRUE 1193 2) optimize_size is FALSE 1194 3) there is at least one unsupported misaligned data ref with an unknown 1195 misalignment, and 1196 4) all misaligned data refs with a known misalignment are supported, and 1197 5) the number of runtime alignment checks is within reason. */ 1198 1199 do_versioning = flag_tree_vect_loop_version && (!optimize_size); 1200 1201 if (do_versioning) 1202 { 1203 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1204 { 1205 if (aligned_access_p (dr)) 1206 continue; 1207 1208 supportable_dr_alignment = vect_supportable_dr_alignment (dr); 1209 1210 if (!supportable_dr_alignment) 1211 { 1212 tree stmt; 1213 int mask; 1214 tree vectype; 1215 1216 if (known_alignment_for_access_p (dr) 1217 || VEC_length (tree, 1218 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) 1219 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS)) 1220 { 1221 do_versioning = false; 1222 break; 1223 } 1224 1225 stmt = DR_STMT (dr); 1226 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); 1227 gcc_assert (vectype); 1228 1229 /* The rightmost bits of an aligned address must be zeros. 1230 Construct the mask needed for this test. For example, 1231 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the 1232 mask must be 15 = 0xf. */ 1233 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1; 1234 1235 /* FORNOW: use the same mask to test all potentially unaligned 1236 references in the loop. The vectorizer currently supports 1237 a single vector size, see the reference to 1238 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the 1239 vectorization factor is computed. */ 1240 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo) 1241 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask); 1242 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask; 1243 VEC_safe_push (tree, heap, 1244 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 1245 DR_STMT (dr)); 1246 } 1247 } 1248 1249 /* Versioning requires at least one misaligned data reference. */ 1250 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0) 1251 do_versioning = false; 1252 else if (!do_versioning) 1253 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0); 1254 } 1255 1256 if (do_versioning) 1257 { 1258 VEC(tree,heap) *may_misalign_stmts 1259 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); 1260 tree stmt; 1261 1262 /* It can now be assumed that the data references in the statements 1263 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version 1264 of the loop being vectorized. */ 1265 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++) 1266 { 1267 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1268 dr = STMT_VINFO_DATA_REF (stmt_info); 1269 DR_MISALIGNMENT (dr) = 0; 1270 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1271 fprintf (vect_dump, "Alignment of access forced using versioning."); 1272 } 1273 1274 if (vect_print_dump_info (REPORT_DETAILS)) 1275 fprintf (vect_dump, "Versioning for alignment will be applied."); 1276 1277 /* Peeling and versioning can't be done together at this time. */ 1278 gcc_assert (! (do_peeling && do_versioning)); 1279 1280 stat = vect_verify_datarefs_alignment (loop_vinfo); 1281 gcc_assert (stat); 1282 return stat; 1283 } 1284 1285 /* This point is reached if neither peeling nor versioning is being done. */ 1286 gcc_assert (! (do_peeling || do_versioning)); 1287 1288 stat = vect_verify_datarefs_alignment (loop_vinfo); 1289 return stat; 1290} 1291 1292 1293/* Function vect_analyze_data_refs_alignment 1294 1295 Analyze the alignment of the data-references in the loop. 1296 Return FALSE if a data reference is found that cannot be vectorized. */ 1297 1298static bool 1299vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) 1300{ 1301 if (vect_print_dump_info (REPORT_DETAILS)) 1302 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ==="); 1303 1304 if (!vect_compute_data_refs_alignment (loop_vinfo)) 1305 { 1306 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1307 fprintf (vect_dump, 1308 "not vectorized: can't calculate alignment for data ref."); 1309 return false; 1310 } 1311 1312 return true; 1313} 1314 1315 1316/* Function vect_analyze_data_ref_access. 1317 1318 Analyze the access pattern of the data-reference DR. For now, a data access 1319 has to be consecutive to be considered vectorizable. */ 1320 1321static bool 1322vect_analyze_data_ref_access (struct data_reference *dr) 1323{ 1324 tree step = DR_STEP (dr); 1325 tree scalar_type = TREE_TYPE (DR_REF (dr)); 1326 1327 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) 1328 { 1329 if (vect_print_dump_info (REPORT_DETAILS)) 1330 fprintf (vect_dump, "not consecutive access"); 1331 return false; 1332 } 1333 return true; 1334} 1335 1336 1337/* Function vect_analyze_data_ref_accesses. 1338 1339 Analyze the access pattern of all the data references in the loop. 1340 1341 FORNOW: the only access pattern that is considered vectorizable is a 1342 simple step 1 (consecutive) access. 1343 1344 FORNOW: handle only arrays and pointer accesses. */ 1345 1346static bool 1347vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo) 1348{ 1349 unsigned int i; 1350 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1351 struct data_reference *dr; 1352 1353 if (vect_print_dump_info (REPORT_DETAILS)) 1354 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ==="); 1355 1356 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1357 if (!vect_analyze_data_ref_access (dr)) 1358 { 1359 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1360 fprintf (vect_dump, "not vectorized: complicated access pattern."); 1361 return false; 1362 } 1363 1364 return true; 1365} 1366 1367 1368/* Function vect_analyze_data_refs. 1369 1370 Find all the data references in the loop. 1371 1372 The general structure of the analysis of data refs in the vectorizer is as 1373 follows: 1374 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to 1375 find and analyze all data-refs in the loop and their dependences. 1376 2- vect_analyze_dependences(): apply dependence testing using ddrs. 1377 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok. 1378 4- vect_analyze_drs_access(): check that ref_stmt.step is ok. 1379 1380*/ 1381 1382static bool 1383vect_analyze_data_refs (loop_vec_info loop_vinfo) 1384{ 1385 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1386 unsigned int i; 1387 VEC (data_reference_p, heap) *datarefs; 1388 struct data_reference *dr; 1389 tree scalar_type; 1390 1391 if (vect_print_dump_info (REPORT_DETAILS)) 1392 fprintf (vect_dump, "=== vect_analyze_data_refs ==="); 1393 1394 compute_data_dependences_for_loop (loop, false, 1395 &LOOP_VINFO_DATAREFS (loop_vinfo), 1396 &LOOP_VINFO_DDRS (loop_vinfo)); 1397 1398 /* Go through the data-refs, check that the analysis succeeded. Update pointer 1399 from stmt_vec_info struct to DR and vectype. */ 1400 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1401 1402 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1403 { 1404 tree stmt; 1405 stmt_vec_info stmt_info; 1406 1407 if (!dr || !DR_REF (dr)) 1408 { 1409 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1410 fprintf (vect_dump, "not vectorized: unhandled data-ref "); 1411 return false; 1412 } 1413 1414 /* Update DR field in stmt_vec_info struct. */ 1415 stmt = DR_STMT (dr); 1416 stmt_info = vinfo_for_stmt (stmt); 1417 1418 if (STMT_VINFO_DATA_REF (stmt_info)) 1419 { 1420 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1421 { 1422 fprintf (vect_dump, 1423 "not vectorized: more than one data ref in stmt: "); 1424 print_generic_expr (vect_dump, stmt, TDF_SLIM); 1425 } 1426 return false; 1427 } 1428 STMT_VINFO_DATA_REF (stmt_info) = dr; 1429 1430 /* Check that analysis of the data-ref succeeded. */ 1431 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr) 1432 || !DR_STEP (dr)) 1433 { 1434 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1435 { 1436 fprintf (vect_dump, "not vectorized: data ref analysis failed "); 1437 print_generic_expr (vect_dump, stmt, TDF_SLIM); 1438 } 1439 return false; 1440 } 1441 if (!DR_MEMTAG (dr)) 1442 { 1443 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1444 { 1445 fprintf (vect_dump, "not vectorized: no memory tag for "); 1446 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); 1447 } 1448 return false; 1449 } 1450 1451 /* Set vectype for STMT. */ 1452 scalar_type = TREE_TYPE (DR_REF (dr)); 1453 STMT_VINFO_VECTYPE (stmt_info) = 1454 get_vectype_for_scalar_type (scalar_type); 1455 if (!STMT_VINFO_VECTYPE (stmt_info)) 1456 { 1457 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1458 { 1459 fprintf (vect_dump, 1460 "not vectorized: no vectype for stmt: "); 1461 print_generic_expr (vect_dump, stmt, TDF_SLIM); 1462 fprintf (vect_dump, " scalar_type: "); 1463 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS); 1464 } 1465 return false; 1466 } 1467 } 1468 1469 return true; 1470} 1471 1472 1473/* Utility functions used by vect_mark_stmts_to_be_vectorized. */ 1474 1475/* Function vect_mark_relevant. 1476 1477 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ 1478 1479static void 1480vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt, 1481 bool relevant_p, bool live_p) 1482{ 1483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1484 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info); 1485 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); 1486 1487 if (vect_print_dump_info (REPORT_DETAILS)) 1488 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p); 1489 1490 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 1491 { 1492 tree pattern_stmt; 1493 1494 /* This is the last stmt in a sequence that was detected as a 1495 pattern that can potentially be vectorized. Don't mark the stmt 1496 as relevant/live because it's not going to vectorized. 1497 Instead mark the pattern-stmt that replaces it. */ 1498 if (vect_print_dump_info (REPORT_DETAILS)) 1499 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); 1500 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 1501 stmt_info = vinfo_for_stmt (pattern_stmt); 1502 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); 1503 save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info); 1504 save_live_p = STMT_VINFO_LIVE_P (stmt_info); 1505 stmt = pattern_stmt; 1506 } 1507 1508 STMT_VINFO_LIVE_P (stmt_info) |= live_p; 1509 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p; 1510 1511 if (TREE_CODE (stmt) == PHI_NODE) 1512 /* Don't put phi-nodes in the worklist. Phis that are marked relevant 1513 or live will fail vectorization later on. */ 1514 return; 1515 1516 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p 1517 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p) 1518 { 1519 if (vect_print_dump_info (REPORT_DETAILS)) 1520 fprintf (vect_dump, "already marked relevant/live."); 1521 return; 1522 } 1523 1524 VEC_safe_push (tree, heap, *worklist, stmt); 1525} 1526 1527 1528/* Function vect_stmt_relevant_p. 1529 1530 Return true if STMT in loop that is represented by LOOP_VINFO is 1531 "relevant for vectorization". 1532 1533 A stmt is considered "relevant for vectorization" if: 1534 - it has uses outside the loop. 1535 - it has vdefs (it alters memory). 1536 - control stmts in the loop (except for the exit condition). 1537 1538 CHECKME: what other side effects would the vectorizer allow? */ 1539 1540static bool 1541vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo, 1542 bool *relevant_p, bool *live_p) 1543{ 1544 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1545 ssa_op_iter op_iter; 1546 imm_use_iterator imm_iter; 1547 use_operand_p use_p; 1548 def_operand_p def_p; 1549 1550 *relevant_p = false; 1551 *live_p = false; 1552 1553 /* cond stmt other than loop exit cond. */ 1554 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo))) 1555 *relevant_p = true; 1556 1557 /* changing memory. */ 1558 if (TREE_CODE (stmt) != PHI_NODE) 1559 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS)) 1560 { 1561 if (vect_print_dump_info (REPORT_DETAILS)) 1562 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs."); 1563 *relevant_p = true; 1564 } 1565 1566 /* uses outside the loop. */ 1567 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF) 1568 { 1569 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) 1570 { 1571 basic_block bb = bb_for_stmt (USE_STMT (use_p)); 1572 if (!flow_bb_inside_loop_p (loop, bb)) 1573 { 1574 if (vect_print_dump_info (REPORT_DETAILS)) 1575 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop."); 1576 1577 /* We expect all such uses to be in the loop exit phis 1578 (because of loop closed form) */ 1579 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE); 1580 gcc_assert (bb == loop->single_exit->dest); 1581 1582 *live_p = true; 1583 } 1584 } 1585 } 1586 1587 return (*live_p || *relevant_p); 1588} 1589 1590 1591/* Function vect_mark_stmts_to_be_vectorized. 1592 1593 Not all stmts in the loop need to be vectorized. For example: 1594 1595 for i... 1596 for j... 1597 1. T0 = i + j 1598 2. T1 = a[T0] 1599 1600 3. j = j + 1 1601 1602 Stmt 1 and 3 do not need to be vectorized, because loop control and 1603 addressing of vectorized data-refs are handled differently. 1604 1605 This pass detects such stmts. */ 1606 1607static bool 1608vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) 1609{ 1610 VEC(tree,heap) *worklist; 1611 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1612 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 1613 unsigned int nbbs = loop->num_nodes; 1614 block_stmt_iterator si; 1615 tree stmt, use; 1616 stmt_ann_t ann; 1617 ssa_op_iter iter; 1618 unsigned int i; 1619 stmt_vec_info stmt_vinfo; 1620 basic_block bb; 1621 tree phi; 1622 bool relevant_p, live_p; 1623 tree def, def_stmt; 1624 enum vect_def_type dt; 1625 1626 if (vect_print_dump_info (REPORT_DETAILS)) 1627 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ==="); 1628 1629 worklist = VEC_alloc (tree, heap, 64); 1630 1631 /* 1. Init worklist. */ 1632 1633 bb = loop->header; 1634 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 1635 { 1636 if (vect_print_dump_info (REPORT_DETAILS)) 1637 { 1638 fprintf (vect_dump, "init: phi relevant? "); 1639 print_generic_expr (vect_dump, phi, TDF_SLIM); 1640 } 1641 1642 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p)) 1643 vect_mark_relevant (&worklist, phi, relevant_p, live_p); 1644 } 1645 1646 for (i = 0; i < nbbs; i++) 1647 { 1648 bb = bbs[i]; 1649 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) 1650 { 1651 stmt = bsi_stmt (si); 1652 1653 if (vect_print_dump_info (REPORT_DETAILS)) 1654 { 1655 fprintf (vect_dump, "init: stmt relevant? "); 1656 print_generic_expr (vect_dump, stmt, TDF_SLIM); 1657 } 1658 1659 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p)) 1660 vect_mark_relevant (&worklist, stmt, relevant_p, live_p); 1661 } 1662 } 1663 1664 1665 /* 2. Process_worklist */ 1666 1667 while (VEC_length (tree, worklist) > 0) 1668 { 1669 stmt = VEC_pop (tree, worklist); 1670 1671 if (vect_print_dump_info (REPORT_DETAILS)) 1672 { 1673 fprintf (vect_dump, "worklist: examine stmt: "); 1674 print_generic_expr (vect_dump, stmt, TDF_SLIM); 1675 } 1676 1677 /* Examine the USEs of STMT. For each ssa-name USE thta is defined 1678 in the loop, mark the stmt that defines it (DEF_STMT) as 1679 relevant/irrelevant and live/dead according to the liveness and 1680 relevance properties of STMT. 1681 */ 1682 1683 gcc_assert (TREE_CODE (stmt) != PHI_NODE); 1684 1685 ann = stmt_ann (stmt); 1686 stmt_vinfo = vinfo_for_stmt (stmt); 1687 1688 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo); 1689 live_p = STMT_VINFO_LIVE_P (stmt_vinfo); 1690 1691 /* Generally, the liveness and relevance properties of STMT are 1692 propagated to the DEF_STMTs of its USEs: 1693 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p 1694 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p 1695 1696 Exceptions: 1697 1698 (case 1) 1699 If USE is used only for address computations (e.g. array indexing), 1700 which does not need to be directly vectorized, then the 1701 liveness/relevance of the respective DEF_STMT is left unchanged. 1702 1703 (case 2) 1704 If STMT has been identified as defining a reduction variable, then 1705 we have two cases: 1706 (case 2.1) 1707 The last use of STMT is the reduction-variable, which is defined 1708 by a loop-header-phi. We don't want to mark the phi as live or 1709 relevant (because it does not need to be vectorized, it is handled 1710 as part of the vectorization of the reduction), so in this case we 1711 skip the call to vect_mark_relevant. 1712 (case 2.2) 1713 The rest of the uses of STMT are defined in the loop body. For 1714 the def_stmt of these uses we want to set liveness/relevance 1715 as follows: 1716 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false 1717 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true 1718 because even though STMT is classified as live (since it defines a 1719 value that is used across loop iterations) and irrelevant (since it 1720 is not used inside the loop), it will be vectorized, and therefore 1721 the corresponding DEF_STMTs need to marked as relevant. 1722 */ 1723 1724 /* case 2.2: */ 1725 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) 1726 { 1727 gcc_assert (!relevant_p && live_p); 1728 relevant_p = true; 1729 live_p = false; 1730 } 1731 1732 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) 1733 { 1734 /* case 1: we are only interested in uses that need to be vectorized. 1735 Uses that are used for address computation are not considered 1736 relevant. 1737 */ 1738 if (!exist_non_indexing_operands_for_use_p (use, stmt)) 1739 continue; 1740 1741 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt)) 1742 { 1743 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 1744 fprintf (vect_dump, "not vectorized: unsupported use in stmt."); 1745 VEC_free (tree, heap, worklist); 1746 return false; 1747 } 1748 1749 if (!def_stmt || IS_EMPTY_STMT (def_stmt)) 1750 continue; 1751 1752 if (vect_print_dump_info (REPORT_DETAILS)) 1753 { 1754 fprintf (vect_dump, "worklist: examine use %d: ", i); 1755 print_generic_expr (vect_dump, use, TDF_SLIM); 1756 } 1757 1758 bb = bb_for_stmt (def_stmt); 1759 if (!flow_bb_inside_loop_p (loop, bb)) 1760 continue; 1761 1762 /* case 2.1: the reduction-use does not mark the defining-phi 1763 as relevant. */ 1764 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 1765 && TREE_CODE (def_stmt) == PHI_NODE) 1766 continue; 1767 1768 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p); 1769 } 1770 } /* while worklist */ 1771 1772 VEC_free (tree, heap, worklist); 1773 return true; 1774} 1775 1776 1777/* Function vect_can_advance_ivs_p 1778 1779 In case the number of iterations that LOOP iterates is unknown at compile 1780 time, an epilog loop will be generated, and the loop induction variables 1781 (IVs) will be "advanced" to the value they are supposed to take just before 1782 the epilog loop. Here we check that the access function of the loop IVs 1783 and the expression that represents the loop bound are simple enough. 1784 These restrictions will be relaxed in the future. */ 1785 1786static bool 1787vect_can_advance_ivs_p (loop_vec_info loop_vinfo) 1788{ 1789 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1790 basic_block bb = loop->header; 1791 tree phi; 1792 1793 /* Analyze phi functions of the loop header. */ 1794 1795 if (vect_print_dump_info (REPORT_DETAILS)) 1796 fprintf (vect_dump, "=== vect_can_advance_ivs_p ==="); 1797 1798 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 1799 { 1800 tree access_fn = NULL; 1801 tree evolution_part; 1802 1803 if (vect_print_dump_info (REPORT_DETAILS)) 1804 { 1805 fprintf (vect_dump, "Analyze phi: "); 1806 print_generic_expr (vect_dump, phi, TDF_SLIM); 1807 } 1808 1809 /* Skip virtual phi's. The data dependences that are associated with 1810 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ 1811 1812 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) 1813 { 1814 if (vect_print_dump_info (REPORT_DETAILS)) 1815 fprintf (vect_dump, "virtual phi. skip."); 1816 continue; 1817 } 1818 1819 /* Skip reduction phis. */ 1820 1821 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) 1822 { 1823 if (vect_print_dump_info (REPORT_DETAILS)) 1824 fprintf (vect_dump, "reduc phi. skip."); 1825 continue; 1826 } 1827 1828 /* Analyze the evolution function. */ 1829 1830 access_fn = instantiate_parameters 1831 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); 1832 1833 if (!access_fn) 1834 { 1835 if (vect_print_dump_info (REPORT_DETAILS)) 1836 fprintf (vect_dump, "No Access function."); 1837 return false; 1838 } 1839 1840 if (vect_print_dump_info (REPORT_DETAILS)) 1841 { 1842 fprintf (vect_dump, "Access function of PHI: "); 1843 print_generic_expr (vect_dump, access_fn, TDF_SLIM); 1844 } 1845 1846 evolution_part = evolution_part_in_loop_num (access_fn, loop->num); 1847 1848 if (evolution_part == NULL_TREE) 1849 { 1850 if (vect_print_dump_info (REPORT_DETAILS)) 1851 fprintf (vect_dump, "No evolution."); 1852 return false; 1853 } 1854 1855 /* FORNOW: We do not transform initial conditions of IVs 1856 which evolution functions are a polynomial of degree >= 2. */ 1857 1858 if (tree_is_chrec (evolution_part)) 1859 return false; 1860 } 1861 1862 return true; 1863} 1864 1865 1866/* Function vect_get_loop_niters. 1867 1868 Determine how many iterations the loop is executed. 1869 If an expression that represents the number of iterations 1870 can be constructed, place it in NUMBER_OF_ITERATIONS. 1871 Return the loop exit condition. */ 1872 1873static tree 1874vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) 1875{ 1876 tree niters; 1877 1878 if (vect_print_dump_info (REPORT_DETAILS)) 1879 fprintf (vect_dump, "=== get_loop_niters ==="); 1880 1881 niters = number_of_iterations_in_loop (loop); 1882 1883 if (niters != NULL_TREE 1884 && niters != chrec_dont_know) 1885 { 1886 *number_of_iterations = niters; 1887 1888 if (vect_print_dump_info (REPORT_DETAILS)) 1889 { 1890 fprintf (vect_dump, "==> get_loop_niters:" ); 1891 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM); 1892 } 1893 } 1894 1895 return get_loop_exit_condition (loop); 1896} 1897 1898 1899/* Function vect_analyze_loop_form. 1900 1901 Verify the following restrictions (some may be relaxed in the future): 1902 - it's an inner-most loop 1903 - number of BBs = 2 (which are the loop header and the latch) 1904 - the loop has a pre-header 1905 - the loop has a single entry and exit 1906 - the loop exit condition is simple enough, and the number of iterations 1907 can be analyzed (a countable loop). */ 1908 1909static loop_vec_info 1910vect_analyze_loop_form (struct loop *loop) 1911{ 1912 loop_vec_info loop_vinfo; 1913 tree loop_cond; 1914 tree number_of_iterations = NULL; 1915 1916 if (vect_print_dump_info (REPORT_DETAILS)) 1917 fprintf (vect_dump, "=== vect_analyze_loop_form ==="); 1918 1919 if (loop->inner) 1920 { 1921 if (vect_print_dump_info (REPORT_OUTER_LOOPS)) 1922 fprintf (vect_dump, "not vectorized: nested loop."); 1923 return NULL; 1924 } 1925 1926 if (!loop->single_exit 1927 || loop->num_nodes != 2 1928 || EDGE_COUNT (loop->header->preds) != 2) 1929 { 1930 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1931 { 1932 if (!loop->single_exit) 1933 fprintf (vect_dump, "not vectorized: multiple exits."); 1934 else if (loop->num_nodes != 2) 1935 fprintf (vect_dump, "not vectorized: too many BBs in loop."); 1936 else if (EDGE_COUNT (loop->header->preds) != 2) 1937 fprintf (vect_dump, "not vectorized: too many incoming edges."); 1938 } 1939 1940 return NULL; 1941 } 1942 1943 /* We assume that the loop exit condition is at the end of the loop. i.e, 1944 that the loop is represented as a do-while (with a proper if-guard 1945 before the loop if needed), where the loop header contains all the 1946 executable statements, and the latch is empty. */ 1947 if (!empty_block_p (loop->latch) 1948 || phi_nodes (loop->latch)) 1949 { 1950 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1951 fprintf (vect_dump, "not vectorized: unexpected loop form."); 1952 return NULL; 1953 } 1954 1955 /* Make sure there exists a single-predecessor exit bb: */ 1956 if (!single_pred_p (loop->single_exit->dest)) 1957 { 1958 edge e = loop->single_exit; 1959 if (!(e->flags & EDGE_ABNORMAL)) 1960 { 1961 split_loop_exit_edge (e); 1962 if (vect_print_dump_info (REPORT_DETAILS)) 1963 fprintf (vect_dump, "split exit edge."); 1964 } 1965 else 1966 { 1967 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1968 fprintf (vect_dump, "not vectorized: abnormal loop exit edge."); 1969 return NULL; 1970 } 1971 } 1972 1973 if (empty_block_p (loop->header)) 1974 { 1975 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1976 fprintf (vect_dump, "not vectorized: empty loop."); 1977 return NULL; 1978 } 1979 1980 loop_cond = vect_get_loop_niters (loop, &number_of_iterations); 1981 if (!loop_cond) 1982 { 1983 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1984 fprintf (vect_dump, "not vectorized: complicated exit condition."); 1985 return NULL; 1986 } 1987 1988 if (!number_of_iterations) 1989 { 1990 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1991 fprintf (vect_dump, 1992 "not vectorized: number of iterations cannot be computed."); 1993 return NULL; 1994 } 1995 1996 if (chrec_contains_undetermined (number_of_iterations)) 1997 { 1998 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) 1999 fprintf (vect_dump, "Infinite number of iterations."); 2000 return false; 2001 } 2002 2003 loop_vinfo = new_loop_vec_info (loop); 2004 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; 2005 2006 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) 2007 { 2008 if (vect_print_dump_info (REPORT_DETAILS)) 2009 { 2010 fprintf (vect_dump, "Symbolic number of iterations is "); 2011 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS); 2012 } 2013 } 2014 else 2015 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0) 2016 { 2017 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) 2018 fprintf (vect_dump, "not vectorized: number of iterations = 0."); 2019 return NULL; 2020 } 2021 2022 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; 2023 2024 return loop_vinfo; 2025} 2026 2027 2028/* Function vect_analyze_loop. 2029 2030 Apply a set of analyses on LOOP, and create a loop_vec_info struct 2031 for it. The different analyses will record information in the 2032 loop_vec_info struct. */ 2033loop_vec_info 2034vect_analyze_loop (struct loop *loop) 2035{ 2036 bool ok; 2037 loop_vec_info loop_vinfo; 2038 2039 if (vect_print_dump_info (REPORT_DETAILS)) 2040 fprintf (vect_dump, "===== analyze_loop_nest ====="); 2041 2042 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */ 2043 2044 loop_vinfo = vect_analyze_loop_form (loop); 2045 if (!loop_vinfo) 2046 { 2047 if (vect_print_dump_info (REPORT_DETAILS)) 2048 fprintf (vect_dump, "bad loop form."); 2049 return NULL; 2050 } 2051 2052 /* Find all data references in the loop (which correspond to vdefs/vuses) 2053 and analyze their evolution in the loop. 2054 2055 FORNOW: Handle only simple, array references, which 2056 alignment can be forced, and aligned pointer-references. */ 2057 2058 ok = vect_analyze_data_refs (loop_vinfo); 2059 if (!ok) 2060 { 2061 if (vect_print_dump_info (REPORT_DETAILS)) 2062 fprintf (vect_dump, "bad data references."); 2063 destroy_loop_vec_info (loop_vinfo); 2064 return NULL; 2065 } 2066 2067 /* Classify all cross-iteration scalar data-flow cycles. 2068 Cross-iteration cycles caused by virtual phis are analyzed separately. */ 2069 2070 vect_analyze_scalar_cycles (loop_vinfo); 2071 2072 vect_pattern_recog (loop_vinfo); 2073 2074 /* Data-flow analysis to detect stmts that do not need to be vectorized. */ 2075 2076 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); 2077 if (!ok) 2078 { 2079 if (vect_print_dump_info (REPORT_DETAILS)) 2080 fprintf (vect_dump, "unexpected pattern."); 2081 destroy_loop_vec_info (loop_vinfo); 2082 return NULL; 2083 } 2084 2085 /* Analyze the alignment of the data-refs in the loop. 2086 Fail if a data reference is found that cannot be vectorized. */ 2087 2088 ok = vect_analyze_data_refs_alignment (loop_vinfo); 2089 if (!ok) 2090 { 2091 if (vect_print_dump_info (REPORT_DETAILS)) 2092 fprintf (vect_dump, "bad data alignment."); 2093 destroy_loop_vec_info (loop_vinfo); 2094 return NULL; 2095 } 2096 2097 ok = vect_determine_vectorization_factor (loop_vinfo); 2098 if (!ok) 2099 { 2100 if (vect_print_dump_info (REPORT_DETAILS)) 2101 fprintf (vect_dump, "can't determine vectorization factor."); 2102 destroy_loop_vec_info (loop_vinfo); 2103 return NULL; 2104 } 2105 2106 /* Analyze data dependences between the data-refs in the loop. 2107 FORNOW: fail at the first data dependence that we encounter. */ 2108 2109 ok = vect_analyze_data_ref_dependences (loop_vinfo); 2110 if (!ok) 2111 { 2112 if (vect_print_dump_info (REPORT_DETAILS)) 2113 fprintf (vect_dump, "bad data dependence."); 2114 destroy_loop_vec_info (loop_vinfo); 2115 return NULL; 2116 } 2117 2118 /* Analyze the access patterns of the data-refs in the loop (consecutive, 2119 complex, etc.). FORNOW: Only handle consecutive access pattern. */ 2120 2121 ok = vect_analyze_data_ref_accesses (loop_vinfo); 2122 if (!ok) 2123 { 2124 if (vect_print_dump_info (REPORT_DETAILS)) 2125 fprintf (vect_dump, "bad data access."); 2126 destroy_loop_vec_info (loop_vinfo); 2127 return NULL; 2128 } 2129 2130 /* This pass will decide on using loop versioning and/or loop peeling in 2131 order to enhance the alignment of data references in the loop. */ 2132 2133 ok = vect_enhance_data_refs_alignment (loop_vinfo); 2134 if (!ok) 2135 { 2136 if (vect_print_dump_info (REPORT_DETAILS)) 2137 fprintf (vect_dump, "bad data alignment."); 2138 destroy_loop_vec_info (loop_vinfo); 2139 return NULL; 2140 } 2141 2142 /* Scan all the operations in the loop and make sure they are 2143 vectorizable. */ 2144 2145 ok = vect_analyze_operations (loop_vinfo); 2146 if (!ok) 2147 { 2148 if (vect_print_dump_info (REPORT_DETAILS)) 2149 fprintf (vect_dump, "bad operation or unsupported loop bound."); 2150 destroy_loop_vec_info (loop_vinfo); 2151 return NULL; 2152 } 2153 2154 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; 2155 2156 return loop_vinfo; 2157}
|