Deleted Added
full compact
1/* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to the Free
19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2002110-1301, USA. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "ggc.h"
27#include "tree.h"
28#include "target.h"
29#include "basic-block.h"
30#include "diagnostic.h"
31#include "tree-flow.h"
32#include "tree-dump.h"
33#include "timevar.h"
34#include "cfgloop.h"
35#include "expr.h"
36#include "optabs.h"
37#include "params.h"
38#include "tree-chrec.h"
39#include "tree-data-ref.h"
40#include "tree-scalar-evolution.h"
41#include "tree-vectorizer.h"
42
43/* Main analysis functions. */
44static loop_vec_info vect_analyze_loop_form (struct loop *);
45static bool vect_analyze_data_refs (loop_vec_info);
46static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47static void vect_analyze_scalar_cycles (loop_vec_info);
48static bool vect_analyze_data_ref_accesses (loop_vec_info);
49static bool vect_analyze_data_ref_dependences (loop_vec_info);
50static bool vect_analyze_data_refs_alignment (loop_vec_info);
51static bool vect_compute_data_refs_alignment (loop_vec_info);
52static bool vect_enhance_data_refs_alignment (loop_vec_info);
53static bool vect_analyze_operations (loop_vec_info);
54static bool vect_determine_vectorization_factor (loop_vec_info);
55
56/* Utility functions for the analyses. */
57static bool exist_non_indexing_operands_for_use_p (tree, tree);
58static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
59static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
60static tree vect_get_loop_niters (struct loop *, tree *);
61static bool vect_analyze_data_ref_dependence
62 (struct data_dependence_relation *, loop_vec_info);
63static bool vect_compute_data_ref_alignment (struct data_reference *);
64static bool vect_analyze_data_ref_access (struct data_reference *);
65static bool vect_can_advance_ivs_p (loop_vec_info);
66static void vect_update_misalignment_for_peel
67 (struct data_reference *, struct data_reference *, int npeel);
68
69
70/* Function vect_determine_vectorization_factor
71
72 Determine the vectorization factor (VF). VF is the number of data elements
73 that are operated upon in parallel in a single iteration of the vectorized
74 loop. For example, when vectorizing a loop that operates on 4byte elements,
75 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
76 elements can fit in a single vector register.
77
78 We currently support vectorization of loops in which all types operated upon
79 are of the same size. Therefore this function currently sets VF according to
80 the size of the types operated upon, and fails if there are multiple sizes
81 in the loop.
82
83 VF is also the factor by which the loop iterations are strip-mined, e.g.:
84 original loop:
85 for (i=0; i<N; i++){
86 a[i] = b[i] + c[i];
87 }
88
89 vectorized loop:
90 for (i=0; i<N; i+=VF){
91 a[i:VF] = b[i:VF] + c[i:VF];
92 }
93*/
94
95static bool
96vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
97{
98 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
99 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
100 int nbbs = loop->num_nodes;
101 block_stmt_iterator si;
102 unsigned int vectorization_factor = 0;
103 int i;
104 tree scalar_type;
105
106 if (vect_print_dump_info (REPORT_DETAILS))
107 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
108
109 for (i = 0; i < nbbs; i++)
110 {
111 basic_block bb = bbs[i];
112
113 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
114 {
115 tree stmt = bsi_stmt (si);
116 unsigned int nunits;
117 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
118 tree vectype;
119
120 if (vect_print_dump_info (REPORT_DETAILS))
121 {
122 fprintf (vect_dump, "==> examining statement: ");
123 print_generic_expr (vect_dump, stmt, TDF_SLIM);
124 }
125
126 gcc_assert (stmt_info);
127 /* skip stmts which do not need to be vectorized. */
128 if (!STMT_VINFO_RELEVANT_P (stmt_info)
129 && !STMT_VINFO_LIVE_P (stmt_info))
130 {
131 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "skip.");
133 continue;
134 }
135
136 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
137 {
138 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
139 {
140 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
141 print_generic_expr (vect_dump, stmt, TDF_SLIM);
142 }
143 return false;
144 }
145
146 if (STMT_VINFO_VECTYPE (stmt_info))
147 {
148 vectype = STMT_VINFO_VECTYPE (stmt_info);
149 scalar_type = TREE_TYPE (vectype);
150 }
151 else
152 {
153 if (STMT_VINFO_DATA_REF (stmt_info))
154 scalar_type =
155 TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
156 else if (TREE_CODE (stmt) == MODIFY_EXPR)
157 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
158 else
159 scalar_type = TREE_TYPE (stmt);
160
161 if (vect_print_dump_info (REPORT_DETAILS))
162 {
163 fprintf (vect_dump, "get vectype for scalar type: ");
164 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
165 }
166
167 vectype = get_vectype_for_scalar_type (scalar_type);
168 if (!vectype)
169 {
170 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
171 {
172 fprintf (vect_dump,
173 "not vectorized: unsupported data-type ");
174 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
175 }
176 return false;
177 }
178 STMT_VINFO_VECTYPE (stmt_info) = vectype;
179 }
180
181 if (vect_print_dump_info (REPORT_DETAILS))
182 {
183 fprintf (vect_dump, "vectype: ");
184 print_generic_expr (vect_dump, vectype, TDF_SLIM);
185 }
186
187 nunits = TYPE_VECTOR_SUBPARTS (vectype);
188 if (vect_print_dump_info (REPORT_DETAILS))
189 fprintf (vect_dump, "nunits = %d", nunits);
190
191 if (vectorization_factor)
192 {
193 /* FORNOW: don't allow mixed units.
194 This restriction will be relaxed in the future. */
195 if (nunits != vectorization_factor)
196 {
197 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
198 fprintf (vect_dump, "not vectorized: mixed data-types");
199 return false;
200 }
201 }
202 else
203 vectorization_factor = nunits;
204
205 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
206 * vectorization_factor == UNITS_PER_SIMD_WORD);
207 }
208 }
209
210 /* TODO: Analyze cost. Decide if worth while to vectorize. */
211
212 if (vectorization_factor <= 1)
213 {
214 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
215 fprintf (vect_dump, "not vectorized: unsupported data-type");
216 return false;
217 }
218 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
219
220 return true;
221}
222
223
224/* Function vect_analyze_operations.
225
226 Scan the loop stmts and make sure they are all vectorizable. */
227
228static bool
229vect_analyze_operations (loop_vec_info loop_vinfo)
230{
231 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
233 int nbbs = loop->num_nodes;
234 block_stmt_iterator si;
235 unsigned int vectorization_factor = 0;
236 int i;
237 bool ok;
238 tree phi;
239 stmt_vec_info stmt_info;
240 bool need_to_vectorize = false;
241
242 if (vect_print_dump_info (REPORT_DETAILS))
243 fprintf (vect_dump, "=== vect_analyze_operations ===");
244
245 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
246 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
247
248 for (i = 0; i < nbbs; i++)
249 {
250 basic_block bb = bbs[i];
251
252 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
253 {
254 stmt_info = vinfo_for_stmt (phi);
255 if (vect_print_dump_info (REPORT_DETAILS))
256 {
257 fprintf (vect_dump, "examining phi: ");
258 print_generic_expr (vect_dump, phi, TDF_SLIM);
259 }
260
261 gcc_assert (stmt_info);
262
263 if (STMT_VINFO_LIVE_P (stmt_info))
264 {
265 /* FORNOW: not yet supported. */
266 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
267 fprintf (vect_dump, "not vectorized: value used after loop.");
268 return false;
269 }
270
271 if (STMT_VINFO_RELEVANT_P (stmt_info))
272 {
273 /* Most likely a reduction-like computation that is used
274 in the loop. */
275 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
276 fprintf (vect_dump, "not vectorized: unsupported pattern.");
277 return false;
278 }
279 }
280
281 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
282 {
283 tree stmt = bsi_stmt (si);
284 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
285
286 if (vect_print_dump_info (REPORT_DETAILS))
287 {
288 fprintf (vect_dump, "==> examining statement: ");
289 print_generic_expr (vect_dump, stmt, TDF_SLIM);
290 }
291
292 gcc_assert (stmt_info);
293
294 /* skip stmts which do not need to be vectorized.
295 this is expected to include:
296 - the COND_EXPR which is the loop exit condition
297 - any LABEL_EXPRs in the loop
298 - computations that are used only for array indexing or loop
299 control */
300
301 if (!STMT_VINFO_RELEVANT_P (stmt_info)
302 && !STMT_VINFO_LIVE_P (stmt_info))
303 {
304 if (vect_print_dump_info (REPORT_DETAILS))
305 fprintf (vect_dump, "irrelevant.");
306 continue;
307 }
308
309 if (STMT_VINFO_RELEVANT_P (stmt_info))
310 {
311 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
312 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
313
314 ok = (vectorizable_operation (stmt, NULL, NULL)
315 || vectorizable_assignment (stmt, NULL, NULL)
316 || vectorizable_load (stmt, NULL, NULL)
317 || vectorizable_store (stmt, NULL, NULL)
318 || vectorizable_condition (stmt, NULL, NULL));
319
320 if (!ok)
321 {
322 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
323 {
324 fprintf (vect_dump,
325 "not vectorized: relevant stmt not supported: ");
326 print_generic_expr (vect_dump, stmt, TDF_SLIM);
327 }
328 return false;
329 }
330 need_to_vectorize = true;
331 }
332
333 if (STMT_VINFO_LIVE_P (stmt_info))
334 {
335 ok = vectorizable_reduction (stmt, NULL, NULL);
336
337 if (ok)
338 need_to_vectorize = true;
339 else
340 ok = vectorizable_live_operation (stmt, NULL, NULL);
341
342 if (!ok)
343 {
344 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
345 {
346 fprintf (vect_dump,
347 "not vectorized: live stmt not supported: ");
348 print_generic_expr (vect_dump, stmt, TDF_SLIM);
349 }
350 return false;
351 }
352 }
353 } /* stmts in bb */
354 } /* bbs */
355
356 /* TODO: Analyze cost. Decide if worth while to vectorize. */
357
358 /* All operations in the loop are either irrelevant (deal with loop
359 control, or dead), or only used outside the loop and can be moved
360 out of the loop (e.g. invariants, inductions). The loop can be
361 optimized away by scalar optimizations. We're better off not
362 touching this loop. */
363 if (!need_to_vectorize)
364 {
365 if (vect_print_dump_info (REPORT_DETAILS))
366 fprintf (vect_dump,
367 "All the computation can be taken out of the loop.");
368 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
369 fprintf (vect_dump,
370 "not vectorized: redundant loop. no profit to vectorize.");
371 return false;
372 }
373
374 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
375 && vect_print_dump_info (REPORT_DETAILS))
376 fprintf (vect_dump,
377 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
378 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
379
380 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
381 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
382 {
383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
384 fprintf (vect_dump, "not vectorized: iteration count too small.");
385 return false;
386 }
387
388 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
389 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
390 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
391 {
392 if (vect_print_dump_info (REPORT_DETAILS))
393 fprintf (vect_dump, "epilog loop required.");
394 if (!vect_can_advance_ivs_p (loop_vinfo))
395 {
396 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
397 fprintf (vect_dump,
398 "not vectorized: can't create epilog loop 1.");
399 return false;
400 }
401 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
402 {
403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
404 fprintf (vect_dump,
405 "not vectorized: can't create epilog loop 2.");
406 return false;
407 }
408 }
409
410 return true;
411}
412
413
414/* Function exist_non_indexing_operands_for_use_p
415
416 USE is one of the uses attached to STMT. Check if USE is
417 used in STMT for anything other than indexing an array. */
418
419static bool
420exist_non_indexing_operands_for_use_p (tree use, tree stmt)
421{
422 tree operand;
423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
424
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
430
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
436
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
440
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
443
444 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
445 return false;
446
447 operand = TREE_OPERAND (stmt, 1);
448
449 if (TREE_CODE (operand) != SSA_NAME)
450 return false;
451
452 if (operand == use)
453 return true;
454
455 return false;
456}
457
458
459/* Function vect_analyze_scalar_cycles.
460
461 Examine the cross iteration def-use cycles of scalar variables, by
462 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
463 following: invariant, induction, reduction, unknown.
464
465 Some forms of scalar cycles are not yet supported.
466
467 Example1: reduction: (unsupported yet)
468
469 loop1:
470 for (i=0; i<N; i++)
471 sum += a[i];
472
473 Example2: induction: (unsupported yet)
474
475 loop2:
476 for (i=0; i<N; i++)
477 a[i] = i;
478
479 Note: the following loop *is* vectorizable:
480
481 loop3:
482 for (i=0; i<N; i++)
483 a[i] = b[i];
484
485 even though it has a def-use cycle caused by the induction variable i:
486
487 loop: i_2 = PHI (i_0, i_1)
488 a[i_2] = ...;
489 i_1 = i_2 + 1;
490 GOTO loop;
491
492 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
493 it does not need to be vectorized because it is only used for array
494 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
495 loop2 on the other hand is relevant (it is being written to memory).
496*/
497
498static void
499vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
500{
501 tree phi;
502 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
503 basic_block bb = loop->header;
504 tree dummy;
505
506 if (vect_print_dump_info (REPORT_DETAILS))
507 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
508
509 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
510 {
511 tree access_fn = NULL;
512 tree def = PHI_RESULT (phi);
513 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
514 tree reduc_stmt;
515
516 if (vect_print_dump_info (REPORT_DETAILS))
517 {
518 fprintf (vect_dump, "Analyze phi: ");
519 print_generic_expr (vect_dump, phi, TDF_SLIM);
520 }
521
522 /* Skip virtual phi's. The data dependences that are associated with
523 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
524
525 if (!is_gimple_reg (SSA_NAME_VAR (def)))
526 {
527 if (vect_print_dump_info (REPORT_DETAILS))
528 fprintf (vect_dump, "virtual phi. skip.");
529 continue;
530 }
531
532 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
533
534 /* Analyze the evolution function. */
535
536 access_fn = analyze_scalar_evolution (loop, def);
537
538 if (!access_fn)
539 continue;
540
541 if (vect_print_dump_info (REPORT_DETAILS))
542 {
543 fprintf (vect_dump, "Access function of PHI: ");
544 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
545 }
546
547 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
548 {
549 if (vect_print_dump_info (REPORT_DETAILS))
550 fprintf (vect_dump, "Detected induction.");
551 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
552 continue;
553 }
554
555 /* TODO: handle invariant phis */
556
557 reduc_stmt = vect_is_simple_reduction (loop, phi);
558 if (reduc_stmt)
559 {
560 if (vect_print_dump_info (REPORT_DETAILS))
561 fprintf (vect_dump, "Detected reduction.");
562 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
563 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
564 vect_reduction_def;
565 }
566 else
567 if (vect_print_dump_info (REPORT_DETAILS))
568 fprintf (vect_dump, "Unknown def-use cycle pattern.");
569
570 }
571
572 return;
573}
574
575
576/* Function vect_analyze_data_ref_dependence.
577
578 Return TRUE if there (might) exist a dependence between a memory-reference
579 DRA and a memory-reference DRB. */
580
581static bool
582vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
583 loop_vec_info loop_vinfo)
584{
585 unsigned int i;
586 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
587 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
588 struct data_reference *dra = DDR_A (ddr);
589 struct data_reference *drb = DDR_B (ddr);
590 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
591 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
592 lambda_vector dist_v;
593 unsigned int loop_depth;
594
595 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
596 return false;
597
598 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
599 {
600 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
601 {
602 fprintf (vect_dump,
603 "not vectorized: can't determine dependence between ");
604 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
605 fprintf (vect_dump, " and ");
606 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
607 }
608 return true;
609 }
610
611 if (DDR_NUM_DIST_VECTS (ddr) == 0)
612 {
613 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
614 {
615 fprintf (vect_dump, "not vectorized: bad dist vector for ");
616 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
617 fprintf (vect_dump, " and ");
618 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
619 }
620 return true;
621 }
622
623 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
624 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
625 {
626 int dist = dist_v[loop_depth];
627
628 if (vect_print_dump_info (REPORT_DR_DETAILS))
629 fprintf (vect_dump, "dependence distance = %d.", dist);
630
631 /* Same loop iteration. */
632 if (dist % vectorization_factor == 0)
633 {
634 /* Two references with distance zero have the same alignment. */
635 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
636 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
637 if (vect_print_dump_info (REPORT_ALIGNMENT))
638 fprintf (vect_dump, "accesses have the same alignment.");
639 if (vect_print_dump_info (REPORT_DR_DETAILS))
640 {
641 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
642 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
643 fprintf (vect_dump, " and ");
644 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
645 }
646 continue;
647 }
648
649 if (abs (dist) >= vectorization_factor)
650 {
651 /* Dependence distance does not create dependence, as far as vectorization
652 is concerned, in this case. */
653 if (vect_print_dump_info (REPORT_DR_DETAILS))
654 fprintf (vect_dump, "dependence distance >= VF.");
655 continue;
656 }
657
658 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
659 {
660 fprintf (vect_dump,
661 "not vectorized: possible dependence between data-refs ");
662 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
663 fprintf (vect_dump, " and ");
664 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
665 }
666
667 return true;
668 }
669
670 return false;
671}
672
673
674/* Function vect_analyze_data_ref_dependences.
675
676 Examine all the data references in the loop, and make sure there do not
677 exist any data dependences between them. */
678
679static bool
680vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
681{
682 unsigned int i;
683 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
684 struct data_dependence_relation *ddr;
685
686 if (vect_print_dump_info (REPORT_DETAILS))
687 fprintf (vect_dump, "=== vect_analyze_dependences ===");
688
689 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
690 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
691 return false;
692
693 return true;
694}
695
696
697/* Function vect_compute_data_ref_alignment
698
699 Compute the misalignment of the data reference DR.
700
701 Output:
702 1. If during the misalignment computation it is found that the data reference
703 cannot be vectorized then false is returned.
704 2. DR_MISALIGNMENT (DR) is defined.
705
706 FOR NOW: No analysis is actually performed. Misalignment is calculated
707 only for trivial cases. TODO. */
708
709static bool
710vect_compute_data_ref_alignment (struct data_reference *dr)
711{
712 tree stmt = DR_STMT (dr);
713 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
714 tree ref = DR_REF (dr);
715 tree vectype;
716 tree base, base_addr;
717 bool base_aligned;
718 tree misalign;
719 tree aligned_to, alignment;
720
721 if (vect_print_dump_info (REPORT_DETAILS))
722 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
723
724 /* Initialize misalignment to unknown. */
725 DR_MISALIGNMENT (dr) = -1;
726
727 misalign = DR_OFFSET_MISALIGNMENT (dr);
728 aligned_to = DR_ALIGNED_TO (dr);
729 base_addr = DR_BASE_ADDRESS (dr);
730 base = build_fold_indirect_ref (base_addr);
731 vectype = STMT_VINFO_VECTYPE (stmt_info);
732 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
733
734 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
735 || !misalign)
736 {
737 if (vect_print_dump_info (REPORT_DETAILS))
738 {
739 fprintf (vect_dump, "Unknown alignment for access: ");
740 print_generic_expr (vect_dump, base, TDF_SLIM);
741 }
742 return true;
743 }
744
745 if ((DECL_P (base)
746 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
747 alignment) >= 0)
748 || (TREE_CODE (base_addr) == SSA_NAME
749 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
750 TREE_TYPE (base_addr)))),
751 alignment) >= 0))
752 base_aligned = true;
753 else
754 base_aligned = false;
755
756 if (!base_aligned)
757 {
758 /* Do not change the alignment of global variables if
759 flag_section_anchors is enabled. */
760 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
761 || (TREE_STATIC (base) && flag_section_anchors))
762 {
763 if (vect_print_dump_info (REPORT_DETAILS))
764 {
765 fprintf (vect_dump, "can't force alignment of ref: ");
766 print_generic_expr (vect_dump, ref, TDF_SLIM);
767 }
768 return true;
769 }
770
771 /* Force the alignment of the decl.
772 NOTE: This is the only change to the code we make during
773 the analysis phase, before deciding to vectorize the loop. */
774 if (vect_print_dump_info (REPORT_DETAILS))
775 fprintf (vect_dump, "force alignment");
776 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
777 DECL_USER_ALIGN (base) = 1;
778 }
779
780 /* At this point we assume that the base is aligned. */
781 gcc_assert (base_aligned
782 || (TREE_CODE (base) == VAR_DECL
783 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
784
785 /* Modulo alignment. */
786 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
787
788 if (!host_integerp (misalign, 1))
789 {
790 /* Negative or overflowed misalignment value. */
791 if (vect_print_dump_info (REPORT_DETAILS))
792 fprintf (vect_dump, "unexpected misalign value");
793 return false;
794 }
795
796 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
797
798 if (vect_print_dump_info (REPORT_DETAILS))
799 {
800 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
801 print_generic_expr (vect_dump, ref, TDF_SLIM);
802 }
803
804 return true;
805}
806
807
808/* Function vect_compute_data_refs_alignment
809
810 Compute the misalignment of data references in the loop.
811 Return FALSE if a data reference is found that cannot be vectorized. */
812
813static bool
814vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
815{
816 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
817 struct data_reference *dr;
818 unsigned int i;
819
820 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
821 if (!vect_compute_data_ref_alignment (dr))
822 return false;
823
824 return true;
825}
826
827
828/* Function vect_update_misalignment_for_peel
829
830 DR - the data reference whose misalignment is to be adjusted.
831 DR_PEEL - the data reference whose misalignment is being made
832 zero in the vector loop by the peel.
833 NPEEL - the number of iterations in the peel loop if the misalignment
834 of DR_PEEL is known at compile time. */
835
836static void
837vect_update_misalignment_for_peel (struct data_reference *dr,
838 struct data_reference *dr_peel, int npeel)
839{
840 unsigned int i;
841 int drsize;
842 VEC(dr_p,heap) *same_align_drs;
843 struct data_reference *current_dr;
844
845 if (known_alignment_for_access_p (dr)
846 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
847 {
848 DR_MISALIGNMENT (dr) = 0;
849 return;
850 }
851
852 /* It can be assumed that the data refs with the same alignment as dr_peel
853 are aligned in the vector loop. */
854 same_align_drs
855 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
856 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
857 {
858 if (current_dr != dr)
859 continue;
860 gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
861 DR_MISALIGNMENT (dr) = 0;
862 return;
863 }
864
865 if (known_alignment_for_access_p (dr)
866 && known_alignment_for_access_p (dr_peel))
867 {
868 drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
869 DR_MISALIGNMENT (dr) += npeel * drsize;
870 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
871 return;
872 }
873
874 DR_MISALIGNMENT (dr) = -1;
875}
876
877
878/* Function vect_verify_datarefs_alignment
879
880 Return TRUE if all data references in the loop can be
881 handled with respect to alignment. */
882
883static bool
884vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
885{
886 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
887 struct data_reference *dr;
888 enum dr_alignment_support supportable_dr_alignment;
889 unsigned int i;
890
891 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
892 {
893 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
894 if (!supportable_dr_alignment)
895 {
896 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
897 {
898 if (DR_IS_READ (dr))
899 fprintf (vect_dump,
900 "not vectorized: unsupported unaligned load.");
901 else
902 fprintf (vect_dump,
903 "not vectorized: unsupported unaligned store.");
904 }
905 return false;
906 }
907 if (supportable_dr_alignment != dr_aligned
908 && vect_print_dump_info (REPORT_ALIGNMENT))
909 fprintf (vect_dump, "Vectorizing an unaligned access.");
910 }
911 return true;
912}
913
914
915/* Function vector_alignment_reachable_p
916
917 Return true if vector alignment for DR is reachable by peeling
918 a few loop iterations. Return false otherwise. */
919
920static bool
921vector_alignment_reachable_p (struct data_reference *dr)
922{
923 tree stmt = DR_STMT (dr);
924 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
925 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
926
927 /* If misalignment is known at the compile time then allow peeling
928 only if natural alignment is reachable through peeling. */
929 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
930 {
931 HOST_WIDE_INT elmsize =
932 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
933 if (vect_print_dump_info (REPORT_DETAILS))
934 {
935 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
936 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
937 }
938 if (DR_MISALIGNMENT (dr) % elmsize)
939 {
940 if (vect_print_dump_info (REPORT_DETAILS))
941 fprintf (vect_dump, "data size does not divide the misalignment.\n");
942 return false;
943 }
944 }
945
946 if (!known_alignment_for_access_p (dr))
947 {
948 tree type = (TREE_TYPE (DR_REF (dr)));
949 tree ba = DR_BASE_OBJECT (dr);
950 bool is_packed = false;
951
952 if (ba)
953 is_packed = contains_packed_reference (ba);
954
955 if (vect_print_dump_info (REPORT_DETAILS))
956 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
957 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
958 return true;
959 else
960 return false;
961 }
962
963 return true;
964}
965
966/* Function vect_enhance_data_refs_alignment
967
968 This pass will use loop versioning and loop peeling in order to enhance
969 the alignment of data references in the loop.
970
971 FOR NOW: we assume that whatever versioning/peeling takes place, only the
972 original loop is to be vectorized; Any other loops that are created by
973 the transformations performed in this pass - are not supposed to be
974 vectorized. This restriction will be relaxed.
975
976 This pass will require a cost model to guide it whether to apply peeling
977 or versioning or a combination of the two. For example, the scheme that
978 intel uses when given a loop with several memory accesses, is as follows:
979 choose one memory access ('p') which alignment you want to force by doing
980 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
981 other accesses are not necessarily aligned, or (2) use loop versioning to
982 generate one loop in which all accesses are aligned, and another loop in
983 which only 'p' is necessarily aligned.
984
985 ("Automatic Intra-Register Vectorization for the Intel Architecture",
986 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
987 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
988
989 Devising a cost model is the most critical aspect of this work. It will
990 guide us on which access to peel for, whether to use loop versioning, how
991 many versions to create, etc. The cost model will probably consist of
992 generic considerations as well as target specific considerations (on
993 powerpc for example, misaligned stores are more painful than misaligned
994 loads).
995
996 Here are the general steps involved in alignment enhancements:
997
998 -- original loop, before alignment analysis:
999 for (i=0; i<N; i++){
1000 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1001 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1002 }
1003
1004 -- After vect_compute_data_refs_alignment:
1005 for (i=0; i<N; i++){
1006 x = q[i]; # DR_MISALIGNMENT(q) = 3
1007 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1008 }
1009
1010 -- Possibility 1: we do loop versioning:
1011 if (p is aligned) {
1012 for (i=0; i<N; i++){ # loop 1A
1013 x = q[i]; # DR_MISALIGNMENT(q) = 3
1014 p[i] = y; # DR_MISALIGNMENT(p) = 0
1015 }
1016 }
1017 else {
1018 for (i=0; i<N; i++){ # loop 1B
1019 x = q[i]; # DR_MISALIGNMENT(q) = 3
1020 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1021 }
1022 }
1023
1024 -- Possibility 2: we do loop peeling:
1025 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1026 x = q[i];
1027 p[i] = y;
1028 }
1029 for (i = 3; i < N; i++){ # loop 2A
1030 x = q[i]; # DR_MISALIGNMENT(q) = 0
1031 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1032 }
1033
1034 -- Possibility 3: combination of loop peeling and versioning:
1035 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1036 x = q[i];
1037 p[i] = y;
1038 }
1039 if (p is aligned) {
1040 for (i = 3; i<N; i++){ # loop 3A
1041 x = q[i]; # DR_MISALIGNMENT(q) = 0
1042 p[i] = y; # DR_MISALIGNMENT(p) = 0
1043 }
1044 }
1045 else {
1046 for (i = 3; i<N; i++){ # loop 3B
1047 x = q[i]; # DR_MISALIGNMENT(q) = 0
1048 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1049 }
1050 }
1051
1052 These loops are later passed to loop_transform to be vectorized. The
1053 vectorizer will use the alignment information to guide the transformation
1054 (whether to generate regular loads/stores, or with special handling for
1055 misalignment). */
1056
1057static bool
1058vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1059{
1060 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1061 enum dr_alignment_support supportable_dr_alignment;
1062 struct data_reference *dr0 = NULL;
1063 struct data_reference *dr;
1064 unsigned int i;
1065 bool do_peeling = false;
1066 bool do_versioning = false;
1067 bool stat;
1068
1069 /* While cost model enhancements are expected in the future, the high level
1070 view of the code at this time is as follows:
1071
1072 A) If there is a misaligned write then see if peeling to align this write
1073 can make all data references satisfy vect_supportable_dr_alignment.
1074 If so, update data structures as needed and return true. Note that
1075 at this time vect_supportable_dr_alignment is known to return false
1076 for a a misaligned write.
1077
1078 B) If peeling wasn't possible and there is a data reference with an
1079 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1080 then see if loop versioning checks can be used to make all data
1081 references satisfy vect_supportable_dr_alignment. If so, update
1082 data structures as needed and return true.
1083
1084 C) If neither peeling nor versioning were successful then return false if
1085 any data reference does not satisfy vect_supportable_dr_alignment.
1086
1087 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1088
1089 Note, Possibility 3 above (which is peeling and versioning together) is not
1090 being done at this time. */
1091
1092 /* (1) Peeling to force alignment. */
1093
1094 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1095 Considerations:
1096 + How many accesses will become aligned due to the peeling
1097 - How many accesses will become unaligned due to the peeling,
1098 and the cost of misaligned accesses.
1099 - The cost of peeling (the extra runtime checks, the increase
1100 in code size).
1101
1102 The scheme we use FORNOW: peel to force the alignment of the first
1103 misaligned store in the loop.
1104 Rationale: misaligned stores are not yet supported.
1105
1106 TODO: Use a cost model. */
1107
1108 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1109 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1110 {
1059 dr0 = dr;
1060 do_peeling = true;
1111 do_peeling = vector_alignment_reachable_p (dr);
1112 if (do_peeling)
1113 dr0 = dr;
1114 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1115 fprintf (vect_dump, "vector alignment may not be reachable");
1116 break;
1117 }
1118
1119 /* Often peeling for alignment will require peeling for loop-bound, which in
1120 turn requires that we know how to adjust the loop ivs after the loop. */
1121 if (!vect_can_advance_ivs_p (loop_vinfo))
1122 do_peeling = false;
1123
1124 if (do_peeling)
1125 {
1126 int mis;
1127 int npeel = 0;
1128
1129 if (known_alignment_for_access_p (dr0))
1130 {
1131 /* Since it's known at compile time, compute the number of iterations
1132 in the peeled loop (the peeling factor) for use in updating
1133 DR_MISALIGNMENT values. The peeling factor is the vectorization
1134 factor minus the misalignment as an element count. */
1135 mis = DR_MISALIGNMENT (dr0);
1136 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1137 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1138 }
1139
1140 /* Ensure that all data refs can be vectorized after the peel. */
1141 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1142 {
1143 int save_misalignment;
1144
1145 if (dr == dr0)
1146 continue;
1147
1148 save_misalignment = DR_MISALIGNMENT (dr);
1149 vect_update_misalignment_for_peel (dr, dr0, npeel);
1150 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1151 DR_MISALIGNMENT (dr) = save_misalignment;
1152
1153 if (!supportable_dr_alignment)
1154 {
1155 do_peeling = false;
1156 break;
1157 }
1158 }
1159
1160 if (do_peeling)
1161 {
1162 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1163 If the misalignment of DR_i is identical to that of dr0 then set
1164 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1165 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1166 by the peeling factor times the element size of DR_i (MOD the
1167 vectorization factor times the size). Otherwise, the
1168 misalignment of DR_i must be set to unknown. */
1169 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1170 if (dr != dr0)
1171 vect_update_misalignment_for_peel (dr, dr0, npeel);
1172
1173 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1174 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1175 DR_MISALIGNMENT (dr0) = 0;
1176 if (vect_print_dump_info (REPORT_ALIGNMENT))
1177 fprintf (vect_dump, "Alignment of access forced using peeling.");
1178
1179 if (vect_print_dump_info (REPORT_DETAILS))
1180 fprintf (vect_dump, "Peeling for alignment will be applied.");
1181
1182 stat = vect_verify_datarefs_alignment (loop_vinfo);
1183 gcc_assert (stat);
1184 return stat;
1185 }
1186 }
1187
1188
1189 /* (2) Versioning to force alignment. */
1190
1191 /* Try versioning if:
1192 1) flag_tree_vect_loop_version is TRUE
1193 2) optimize_size is FALSE
1194 3) there is at least one unsupported misaligned data ref with an unknown
1195 misalignment, and
1196 4) all misaligned data refs with a known misalignment are supported, and
1197 5) the number of runtime alignment checks is within reason. */
1198
1199 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1200
1201 if (do_versioning)
1202 {
1203 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1204 {
1205 if (aligned_access_p (dr))
1206 continue;
1207
1208 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1209
1210 if (!supportable_dr_alignment)
1211 {
1212 tree stmt;
1213 int mask;
1214 tree vectype;
1215
1216 if (known_alignment_for_access_p (dr)
1217 || VEC_length (tree,
1218 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1219 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1220 {
1221 do_versioning = false;
1222 break;
1223 }
1224
1225 stmt = DR_STMT (dr);
1226 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1227 gcc_assert (vectype);
1228
1229 /* The rightmost bits of an aligned address must be zeros.
1230 Construct the mask needed for this test. For example,
1231 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1232 mask must be 15 = 0xf. */
1233 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1234
1235 /* FORNOW: use the same mask to test all potentially unaligned
1236 references in the loop. The vectorizer currently supports
1237 a single vector size, see the reference to
1238 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1239 vectorization factor is computed. */
1240 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1241 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1242 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1243 VEC_safe_push (tree, heap,
1244 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1245 DR_STMT (dr));
1246 }
1247 }
1248
1249 /* Versioning requires at least one misaligned data reference. */
1250 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1251 do_versioning = false;
1252 else if (!do_versioning)
1253 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1254 }
1255
1256 if (do_versioning)
1257 {
1258 VEC(tree,heap) *may_misalign_stmts
1259 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1260 tree stmt;
1261
1262 /* It can now be assumed that the data references in the statements
1263 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1264 of the loop being vectorized. */
1265 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1266 {
1267 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1268 dr = STMT_VINFO_DATA_REF (stmt_info);
1269 DR_MISALIGNMENT (dr) = 0;
1270 if (vect_print_dump_info (REPORT_ALIGNMENT))
1271 fprintf (vect_dump, "Alignment of access forced using versioning.");
1272 }
1273
1274 if (vect_print_dump_info (REPORT_DETAILS))
1275 fprintf (vect_dump, "Versioning for alignment will be applied.");
1276
1277 /* Peeling and versioning can't be done together at this time. */
1278 gcc_assert (! (do_peeling && do_versioning));
1279
1280 stat = vect_verify_datarefs_alignment (loop_vinfo);
1281 gcc_assert (stat);
1282 return stat;
1283 }
1284
1285 /* This point is reached if neither peeling nor versioning is being done. */
1286 gcc_assert (! (do_peeling || do_versioning));
1287
1288 stat = vect_verify_datarefs_alignment (loop_vinfo);
1289 return stat;
1290}
1291
1292
1293/* Function vect_analyze_data_refs_alignment
1294
1295 Analyze the alignment of the data-references in the loop.
1296 Return FALSE if a data reference is found that cannot be vectorized. */
1297
1298static bool
1299vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1300{
1301 if (vect_print_dump_info (REPORT_DETAILS))
1302 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1303
1304 if (!vect_compute_data_refs_alignment (loop_vinfo))
1305 {
1306 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1307 fprintf (vect_dump,
1308 "not vectorized: can't calculate alignment for data ref.");
1309 return false;
1310 }
1311
1312 return true;
1313}
1314
1315
1316/* Function vect_analyze_data_ref_access.
1317
1318 Analyze the access pattern of the data-reference DR. For now, a data access
1319 has to be consecutive to be considered vectorizable. */
1320
1321static bool
1322vect_analyze_data_ref_access (struct data_reference *dr)
1323{
1324 tree step = DR_STEP (dr);
1325 tree scalar_type = TREE_TYPE (DR_REF (dr));
1326
1327 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1328 {
1329 if (vect_print_dump_info (REPORT_DETAILS))
1330 fprintf (vect_dump, "not consecutive access");
1331 return false;
1332 }
1333 return true;
1334}
1335
1336
1337/* Function vect_analyze_data_ref_accesses.
1338
1339 Analyze the access pattern of all the data references in the loop.
1340
1341 FORNOW: the only access pattern that is considered vectorizable is a
1342 simple step 1 (consecutive) access.
1343
1344 FORNOW: handle only arrays and pointer accesses. */
1345
1346static bool
1347vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1348{
1349 unsigned int i;
1350 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1351 struct data_reference *dr;
1352
1353 if (vect_print_dump_info (REPORT_DETAILS))
1354 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1355
1356 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1357 if (!vect_analyze_data_ref_access (dr))
1358 {
1359 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1360 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1361 return false;
1362 }
1363
1364 return true;
1365}
1366
1367
1368/* Function vect_analyze_data_refs.
1369
1370 Find all the data references in the loop.
1371
1372 The general structure of the analysis of data refs in the vectorizer is as
1373 follows:
1374 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1375 find and analyze all data-refs in the loop and their dependences.
1376 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1377 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1378 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1379
1380*/
1381
1382static bool
1383vect_analyze_data_refs (loop_vec_info loop_vinfo)
1384{
1385 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1386 unsigned int i;
1387 VEC (data_reference_p, heap) *datarefs;
1388 struct data_reference *dr;
1389 tree scalar_type;
1390
1391 if (vect_print_dump_info (REPORT_DETAILS))
1392 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1393
1394 compute_data_dependences_for_loop (loop, false,
1395 &LOOP_VINFO_DATAREFS (loop_vinfo),
1396 &LOOP_VINFO_DDRS (loop_vinfo));
1397
1398 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1399 from stmt_vec_info struct to DR and vectype. */
1400 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1401
1402 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1403 {
1404 tree stmt;
1405 stmt_vec_info stmt_info;
1406
1407 if (!dr || !DR_REF (dr))
1408 {
1409 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1410 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1411 return false;
1412 }
1413
1414 /* Update DR field in stmt_vec_info struct. */
1415 stmt = DR_STMT (dr);
1416 stmt_info = vinfo_for_stmt (stmt);
1417
1418 if (STMT_VINFO_DATA_REF (stmt_info))
1419 {
1420 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1421 {
1422 fprintf (vect_dump,
1423 "not vectorized: more than one data ref in stmt: ");
1424 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1425 }
1426 return false;
1427 }
1428 STMT_VINFO_DATA_REF (stmt_info) = dr;
1429
1430 /* Check that analysis of the data-ref succeeded. */
1431 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1432 || !DR_STEP (dr))
1433 {
1434 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1435 {
1436 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1437 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1438 }
1439 return false;
1440 }
1441 if (!DR_MEMTAG (dr))
1442 {
1443 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1444 {
1445 fprintf (vect_dump, "not vectorized: no memory tag for ");
1446 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1447 }
1448 return false;
1449 }
1450
1451 /* Set vectype for STMT. */
1452 scalar_type = TREE_TYPE (DR_REF (dr));
1453 STMT_VINFO_VECTYPE (stmt_info) =
1454 get_vectype_for_scalar_type (scalar_type);
1455 if (!STMT_VINFO_VECTYPE (stmt_info))
1456 {
1457 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1458 {
1459 fprintf (vect_dump,
1460 "not vectorized: no vectype for stmt: ");
1461 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1462 fprintf (vect_dump, " scalar_type: ");
1463 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1464 }
1465 return false;
1466 }
1467 }
1468
1469 return true;
1470}
1471
1472
1473/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1474
1475/* Function vect_mark_relevant.
1476
1477 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1478
1479static void
1480vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1481 bool relevant_p, bool live_p)
1482{
1483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1484 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1485 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1486
1487 if (vect_print_dump_info (REPORT_DETAILS))
1488 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1489
1490 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1491 {
1492 tree pattern_stmt;
1493
1494 /* This is the last stmt in a sequence that was detected as a
1495 pattern that can potentially be vectorized. Don't mark the stmt
1496 as relevant/live because it's not going to vectorized.
1497 Instead mark the pattern-stmt that replaces it. */
1498 if (vect_print_dump_info (REPORT_DETAILS))
1499 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
1500 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1501 stmt_info = vinfo_for_stmt (pattern_stmt);
1502 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
1503 save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1504 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1505 stmt = pattern_stmt;
1506 }
1507
1508 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1509 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1510
1511 if (TREE_CODE (stmt) == PHI_NODE)
1512 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1513 or live will fail vectorization later on. */
1514 return;
1515
1516 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1517 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1518 {
1519 if (vect_print_dump_info (REPORT_DETAILS))
1520 fprintf (vect_dump, "already marked relevant/live.");
1521 return;
1522 }
1523
1524 VEC_safe_push (tree, heap, *worklist, stmt);
1525}
1526
1527
1528/* Function vect_stmt_relevant_p.
1529
1530 Return true if STMT in loop that is represented by LOOP_VINFO is
1531 "relevant for vectorization".
1532
1533 A stmt is considered "relevant for vectorization" if:
1534 - it has uses outside the loop.
1535 - it has vdefs (it alters memory).
1536 - control stmts in the loop (except for the exit condition).
1537
1538 CHECKME: what other side effects would the vectorizer allow? */
1539
1540static bool
1541vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1542 bool *relevant_p, bool *live_p)
1543{
1544 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1545 ssa_op_iter op_iter;
1546 imm_use_iterator imm_iter;
1547 use_operand_p use_p;
1548 def_operand_p def_p;
1549
1550 *relevant_p = false;
1551 *live_p = false;
1552
1553 /* cond stmt other than loop exit cond. */
1554 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1555 *relevant_p = true;
1556
1557 /* changing memory. */
1558 if (TREE_CODE (stmt) != PHI_NODE)
1559 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1560 {
1561 if (vect_print_dump_info (REPORT_DETAILS))
1562 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1563 *relevant_p = true;
1564 }
1565
1566 /* uses outside the loop. */
1567 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1568 {
1569 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1570 {
1571 basic_block bb = bb_for_stmt (USE_STMT (use_p));
1572 if (!flow_bb_inside_loop_p (loop, bb))
1573 {
1574 if (vect_print_dump_info (REPORT_DETAILS))
1575 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1576
1577 /* We expect all such uses to be in the loop exit phis
1578 (because of loop closed form) */
1579 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1580 gcc_assert (bb == loop->single_exit->dest);
1581
1582 *live_p = true;
1583 }
1584 }
1585 }
1586
1587 return (*live_p || *relevant_p);
1588}
1589
1590
1591/* Function vect_mark_stmts_to_be_vectorized.
1592
1593 Not all stmts in the loop need to be vectorized. For example:
1594
1595 for i...
1596 for j...
1597 1. T0 = i + j
1598 2. T1 = a[T0]
1599
1600 3. j = j + 1
1601
1602 Stmt 1 and 3 do not need to be vectorized, because loop control and
1603 addressing of vectorized data-refs are handled differently.
1604
1605 This pass detects such stmts. */
1606
1607static bool
1608vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1609{
1610 VEC(tree,heap) *worklist;
1611 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1612 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1613 unsigned int nbbs = loop->num_nodes;
1614 block_stmt_iterator si;
1615 tree stmt, use;
1616 stmt_ann_t ann;
1617 ssa_op_iter iter;
1618 unsigned int i;
1619 stmt_vec_info stmt_vinfo;
1620 basic_block bb;
1621 tree phi;
1622 bool relevant_p, live_p;
1623 tree def, def_stmt;
1624 enum vect_def_type dt;
1625
1626 if (vect_print_dump_info (REPORT_DETAILS))
1627 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1628
1629 worklist = VEC_alloc (tree, heap, 64);
1630
1631 /* 1. Init worklist. */
1632
1633 bb = loop->header;
1634 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1635 {
1636 if (vect_print_dump_info (REPORT_DETAILS))
1637 {
1638 fprintf (vect_dump, "init: phi relevant? ");
1639 print_generic_expr (vect_dump, phi, TDF_SLIM);
1640 }
1641
1642 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1643 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1644 }
1645
1646 for (i = 0; i < nbbs; i++)
1647 {
1648 bb = bbs[i];
1649 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1650 {
1651 stmt = bsi_stmt (si);
1652
1653 if (vect_print_dump_info (REPORT_DETAILS))
1654 {
1655 fprintf (vect_dump, "init: stmt relevant? ");
1656 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1657 }
1658
1659 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1660 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1661 }
1662 }
1663
1664
1665 /* 2. Process_worklist */
1666
1667 while (VEC_length (tree, worklist) > 0)
1668 {
1669 stmt = VEC_pop (tree, worklist);
1670
1671 if (vect_print_dump_info (REPORT_DETAILS))
1672 {
1673 fprintf (vect_dump, "worklist: examine stmt: ");
1674 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1675 }
1676
1677 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1678 in the loop, mark the stmt that defines it (DEF_STMT) as
1679 relevant/irrelevant and live/dead according to the liveness and
1680 relevance properties of STMT.
1681 */
1682
1683 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1684
1685 ann = stmt_ann (stmt);
1686 stmt_vinfo = vinfo_for_stmt (stmt);
1687
1688 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1689 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1690
1691 /* Generally, the liveness and relevance properties of STMT are
1692 propagated to the DEF_STMTs of its USEs:
1693 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1694 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1695
1696 Exceptions:
1697
1698 (case 1)
1699 If USE is used only for address computations (e.g. array indexing),
1700 which does not need to be directly vectorized, then the
1701 liveness/relevance of the respective DEF_STMT is left unchanged.
1702
1703 (case 2)
1704 If STMT has been identified as defining a reduction variable, then
1705 we have two cases:
1706 (case 2.1)
1707 The last use of STMT is the reduction-variable, which is defined
1708 by a loop-header-phi. We don't want to mark the phi as live or
1709 relevant (because it does not need to be vectorized, it is handled
1710 as part of the vectorization of the reduction), so in this case we
1711 skip the call to vect_mark_relevant.
1712 (case 2.2)
1713 The rest of the uses of STMT are defined in the loop body. For
1714 the def_stmt of these uses we want to set liveness/relevance
1715 as follows:
1716 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1717 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1718 because even though STMT is classified as live (since it defines a
1719 value that is used across loop iterations) and irrelevant (since it
1720 is not used inside the loop), it will be vectorized, and therefore
1721 the corresponding DEF_STMTs need to marked as relevant.
1722 */
1723
1724 /* case 2.2: */
1725 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1726 {
1727 gcc_assert (!relevant_p && live_p);
1728 relevant_p = true;
1729 live_p = false;
1730 }
1731
1732 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1733 {
1734 /* case 1: we are only interested in uses that need to be vectorized.
1735 Uses that are used for address computation are not considered
1736 relevant.
1737 */
1738 if (!exist_non_indexing_operands_for_use_p (use, stmt))
1739 continue;
1740
1741 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1742 {
1743 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1744 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1745 VEC_free (tree, heap, worklist);
1746 return false;
1747 }
1748
1749 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1750 continue;
1751
1752 if (vect_print_dump_info (REPORT_DETAILS))
1753 {
1754 fprintf (vect_dump, "worklist: examine use %d: ", i);
1755 print_generic_expr (vect_dump, use, TDF_SLIM);
1756 }
1757
1758 bb = bb_for_stmt (def_stmt);
1759 if (!flow_bb_inside_loop_p (loop, bb))
1760 continue;
1761
1762 /* case 2.1: the reduction-use does not mark the defining-phi
1763 as relevant. */
1764 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1765 && TREE_CODE (def_stmt) == PHI_NODE)
1766 continue;
1767
1768 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1769 }
1770 } /* while worklist */
1771
1772 VEC_free (tree, heap, worklist);
1773 return true;
1774}
1775
1776
1777/* Function vect_can_advance_ivs_p
1778
1779 In case the number of iterations that LOOP iterates is unknown at compile
1780 time, an epilog loop will be generated, and the loop induction variables
1781 (IVs) will be "advanced" to the value they are supposed to take just before
1782 the epilog loop. Here we check that the access function of the loop IVs
1783 and the expression that represents the loop bound are simple enough.
1784 These restrictions will be relaxed in the future. */
1785
1786static bool
1787vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1788{
1789 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1790 basic_block bb = loop->header;
1791 tree phi;
1792
1793 /* Analyze phi functions of the loop header. */
1794
1795 if (vect_print_dump_info (REPORT_DETAILS))
1796 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1797
1798 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1799 {
1800 tree access_fn = NULL;
1801 tree evolution_part;
1802
1803 if (vect_print_dump_info (REPORT_DETAILS))
1804 {
1805 fprintf (vect_dump, "Analyze phi: ");
1806 print_generic_expr (vect_dump, phi, TDF_SLIM);
1807 }
1808
1809 /* Skip virtual phi's. The data dependences that are associated with
1810 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1811
1812 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1813 {
1814 if (vect_print_dump_info (REPORT_DETAILS))
1815 fprintf (vect_dump, "virtual phi. skip.");
1816 continue;
1817 }
1818
1819 /* Skip reduction phis. */
1820
1821 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1822 {
1823 if (vect_print_dump_info (REPORT_DETAILS))
1824 fprintf (vect_dump, "reduc phi. skip.");
1825 continue;
1826 }
1827
1828 /* Analyze the evolution function. */
1829
1830 access_fn = instantiate_parameters
1831 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1832
1833 if (!access_fn)
1834 {
1835 if (vect_print_dump_info (REPORT_DETAILS))
1836 fprintf (vect_dump, "No Access function.");
1837 return false;
1838 }
1839
1840 if (vect_print_dump_info (REPORT_DETAILS))
1841 {
1842 fprintf (vect_dump, "Access function of PHI: ");
1843 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1844 }
1845
1846 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1847
1848 if (evolution_part == NULL_TREE)
1849 {
1850 if (vect_print_dump_info (REPORT_DETAILS))
1851 fprintf (vect_dump, "No evolution.");
1852 return false;
1853 }
1854
1855 /* FORNOW: We do not transform initial conditions of IVs
1856 which evolution functions are a polynomial of degree >= 2. */
1857
1858 if (tree_is_chrec (evolution_part))
1859 return false;
1860 }
1861
1862 return true;
1863}
1864
1865
1866/* Function vect_get_loop_niters.
1867
1868 Determine how many iterations the loop is executed.
1869 If an expression that represents the number of iterations
1870 can be constructed, place it in NUMBER_OF_ITERATIONS.
1871 Return the loop exit condition. */
1872
1873static tree
1874vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1875{
1876 tree niters;
1877
1878 if (vect_print_dump_info (REPORT_DETAILS))
1879 fprintf (vect_dump, "=== get_loop_niters ===");
1880
1881 niters = number_of_iterations_in_loop (loop);
1882
1883 if (niters != NULL_TREE
1884 && niters != chrec_dont_know)
1885 {
1886 *number_of_iterations = niters;
1887
1888 if (vect_print_dump_info (REPORT_DETAILS))
1889 {
1890 fprintf (vect_dump, "==> get_loop_niters:" );
1891 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1892 }
1893 }
1894
1895 return get_loop_exit_condition (loop);
1896}
1897
1898
1899/* Function vect_analyze_loop_form.
1900
1901 Verify the following restrictions (some may be relaxed in the future):
1902 - it's an inner-most loop
1903 - number of BBs = 2 (which are the loop header and the latch)
1904 - the loop has a pre-header
1905 - the loop has a single entry and exit
1906 - the loop exit condition is simple enough, and the number of iterations
1907 can be analyzed (a countable loop). */
1908
1909static loop_vec_info
1910vect_analyze_loop_form (struct loop *loop)
1911{
1912 loop_vec_info loop_vinfo;
1913 tree loop_cond;
1914 tree number_of_iterations = NULL;
1915
1916 if (vect_print_dump_info (REPORT_DETAILS))
1917 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1918
1919 if (loop->inner)
1920 {
1921 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1922 fprintf (vect_dump, "not vectorized: nested loop.");
1923 return NULL;
1924 }
1925
1926 if (!loop->single_exit
1927 || loop->num_nodes != 2
1928 || EDGE_COUNT (loop->header->preds) != 2)
1929 {
1930 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1931 {
1932 if (!loop->single_exit)
1933 fprintf (vect_dump, "not vectorized: multiple exits.");
1934 else if (loop->num_nodes != 2)
1935 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1936 else if (EDGE_COUNT (loop->header->preds) != 2)
1937 fprintf (vect_dump, "not vectorized: too many incoming edges.");
1938 }
1939
1940 return NULL;
1941 }
1942
1943 /* We assume that the loop exit condition is at the end of the loop. i.e,
1944 that the loop is represented as a do-while (with a proper if-guard
1945 before the loop if needed), where the loop header contains all the
1946 executable statements, and the latch is empty. */
1947 if (!empty_block_p (loop->latch)
1948 || phi_nodes (loop->latch))
1949 {
1950 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1951 fprintf (vect_dump, "not vectorized: unexpected loop form.");
1952 return NULL;
1953 }
1954
1955 /* Make sure there exists a single-predecessor exit bb: */
1956 if (!single_pred_p (loop->single_exit->dest))
1957 {
1958 edge e = loop->single_exit;
1959 if (!(e->flags & EDGE_ABNORMAL))
1960 {
1961 split_loop_exit_edge (e);
1962 if (vect_print_dump_info (REPORT_DETAILS))
1963 fprintf (vect_dump, "split exit edge.");
1964 }
1965 else
1966 {
1967 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1968 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1969 return NULL;
1970 }
1971 }
1972
1973 if (empty_block_p (loop->header))
1974 {
1975 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1976 fprintf (vect_dump, "not vectorized: empty loop.");
1977 return NULL;
1978 }
1979
1980 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1981 if (!loop_cond)
1982 {
1983 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1984 fprintf (vect_dump, "not vectorized: complicated exit condition.");
1985 return NULL;
1986 }
1987
1988 if (!number_of_iterations)
1989 {
1990 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1991 fprintf (vect_dump,
1992 "not vectorized: number of iterations cannot be computed.");
1993 return NULL;
1994 }
1995
1996 if (chrec_contains_undetermined (number_of_iterations))
1997 {
1998 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1999 fprintf (vect_dump, "Infinite number of iterations.");
2000 return false;
2001 }
2002
2003 loop_vinfo = new_loop_vec_info (loop);
2004 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2005
2006 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2007 {
2008 if (vect_print_dump_info (REPORT_DETAILS))
2009 {
2010 fprintf (vect_dump, "Symbolic number of iterations is ");
2011 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2012 }
2013 }
2014 else
2015 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2016 {
2017 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2018 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2019 return NULL;
2020 }
2021
2022 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2023
2024 return loop_vinfo;
2025}
2026
2027
2028/* Function vect_analyze_loop.
2029
2030 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2031 for it. The different analyses will record information in the
2032 loop_vec_info struct. */
2033loop_vec_info
2034vect_analyze_loop (struct loop *loop)
2035{
2036 bool ok;
2037 loop_vec_info loop_vinfo;
2038
2039 if (vect_print_dump_info (REPORT_DETAILS))
2040 fprintf (vect_dump, "===== analyze_loop_nest =====");
2041
2042 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2043
2044 loop_vinfo = vect_analyze_loop_form (loop);
2045 if (!loop_vinfo)
2046 {
2047 if (vect_print_dump_info (REPORT_DETAILS))
2048 fprintf (vect_dump, "bad loop form.");
2049 return NULL;
2050 }
2051
2052 /* Find all data references in the loop (which correspond to vdefs/vuses)
2053 and analyze their evolution in the loop.
2054
2055 FORNOW: Handle only simple, array references, which
2056 alignment can be forced, and aligned pointer-references. */
2057
2058 ok = vect_analyze_data_refs (loop_vinfo);
2059 if (!ok)
2060 {
2061 if (vect_print_dump_info (REPORT_DETAILS))
2062 fprintf (vect_dump, "bad data references.");
2063 destroy_loop_vec_info (loop_vinfo);
2064 return NULL;
2065 }
2066
2067 /* Classify all cross-iteration scalar data-flow cycles.
2068 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2069
2070 vect_analyze_scalar_cycles (loop_vinfo);
2071
2072 vect_pattern_recog (loop_vinfo);
2073
2074 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2075
2076 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2077 if (!ok)
2078 {
2079 if (vect_print_dump_info (REPORT_DETAILS))
2080 fprintf (vect_dump, "unexpected pattern.");
2081 destroy_loop_vec_info (loop_vinfo);
2082 return NULL;
2083 }
2084
2085 /* Analyze the alignment of the data-refs in the loop.
2086 Fail if a data reference is found that cannot be vectorized. */
2087
2088 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2089 if (!ok)
2090 {
2091 if (vect_print_dump_info (REPORT_DETAILS))
2092 fprintf (vect_dump, "bad data alignment.");
2093 destroy_loop_vec_info (loop_vinfo);
2094 return NULL;
2095 }
2096
2097 ok = vect_determine_vectorization_factor (loop_vinfo);
2098 if (!ok)
2099 {
2100 if (vect_print_dump_info (REPORT_DETAILS))
2101 fprintf (vect_dump, "can't determine vectorization factor.");
2102 destroy_loop_vec_info (loop_vinfo);
2103 return NULL;
2104 }
2105
2106 /* Analyze data dependences between the data-refs in the loop.
2107 FORNOW: fail at the first data dependence that we encounter. */
2108
2109 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2110 if (!ok)
2111 {
2112 if (vect_print_dump_info (REPORT_DETAILS))
2113 fprintf (vect_dump, "bad data dependence.");
2114 destroy_loop_vec_info (loop_vinfo);
2115 return NULL;
2116 }
2117
2118 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2119 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2120
2121 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2122 if (!ok)
2123 {
2124 if (vect_print_dump_info (REPORT_DETAILS))
2125 fprintf (vect_dump, "bad data access.");
2126 destroy_loop_vec_info (loop_vinfo);
2127 return NULL;
2128 }
2129
2130 /* This pass will decide on using loop versioning and/or loop peeling in
2131 order to enhance the alignment of data references in the loop. */
2132
2133 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2134 if (!ok)
2135 {
2136 if (vect_print_dump_info (REPORT_DETAILS))
2137 fprintf (vect_dump, "bad data alignment.");
2138 destroy_loop_vec_info (loop_vinfo);
2139 return NULL;
2140 }
2141
2142 /* Scan all the operations in the loop and make sure they are
2143 vectorizable. */
2144
2145 ok = vect_analyze_operations (loop_vinfo);
2146 if (!ok)
2147 {
2148 if (vect_print_dump_info (REPORT_DETAILS))
2149 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2150 destroy_loop_vec_info (loop_vinfo);
2151 return NULL;
2152 }
2153
2154 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2155
2156 return loop_vinfo;
2157}