1169689Skan/* Transformation Utilities for Loop Vectorization. 2169689Skan Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc. 3169689Skan Contributed by Dorit Naishlos <dorit@il.ibm.com> 4169689Skan 5169689SkanThis file is part of GCC. 6169689Skan 7169689SkanGCC is free software; you can redistribute it and/or modify it under 8169689Skanthe terms of the GNU General Public License as published by the Free 9169689SkanSoftware Foundation; either version 2, or (at your option) any later 10169689Skanversion. 11169689Skan 12169689SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY 13169689SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or 14169689SkanFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15169689Skanfor more details. 16169689Skan 17169689SkanYou should have received a copy of the GNU General Public License 18169689Skanalong with GCC; see the file COPYING. If not, write to the Free 19169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 20169689Skan02110-1301, USA. */ 21169689Skan 22169689Skan#include "config.h" 23169689Skan#include "system.h" 24169689Skan#include "coretypes.h" 25169689Skan#include "tm.h" 26169689Skan#include "ggc.h" 27169689Skan#include "tree.h" 28169689Skan#include "target.h" 29169689Skan#include "rtl.h" 30169689Skan#include "basic-block.h" 31169689Skan#include "diagnostic.h" 32169689Skan#include "tree-flow.h" 33169689Skan#include "tree-dump.h" 34169689Skan#include "timevar.h" 35169689Skan#include "cfgloop.h" 36169689Skan#include "expr.h" 37169689Skan#include "optabs.h" 38169689Skan#include "recog.h" 39169689Skan#include "tree-data-ref.h" 40169689Skan#include "tree-chrec.h" 41169689Skan#include "tree-scalar-evolution.h" 42169689Skan#include "tree-vectorizer.h" 43169689Skan#include "langhooks.h" 44169689Skan#include "tree-pass.h" 45169689Skan#include "toplev.h" 46169689Skan#include "real.h" 47169689Skan 48169689Skan/* Utility functions for the code transformation. */ 49169689Skanstatic bool vect_transform_stmt (tree, block_stmt_iterator *); 50169689Skanstatic void vect_align_data_ref (tree); 51169689Skanstatic tree vect_create_destination_var (tree, tree); 52169689Skanstatic tree vect_create_data_ref_ptr 53169689Skan (tree, block_stmt_iterator *, tree, tree *, bool); 54169689Skanstatic tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); 55169689Skanstatic tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); 56169689Skanstatic tree vect_get_vec_def_for_operand (tree, tree, tree *); 57169689Skanstatic tree vect_init_vector (tree, tree); 58169689Skanstatic void vect_finish_stmt_generation 59169689Skan (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); 60169689Skanstatic bool vect_is_simple_cond (tree, loop_vec_info); 61169689Skanstatic void update_vuses_to_preheader (tree, struct loop*); 62169689Skanstatic void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree); 63169689Skanstatic tree get_initial_def_for_reduction (tree, tree, tree *); 64169689Skan 65169689Skan/* Utility function dealing with loop peeling (not peeling itself). */ 66169689Skanstatic void vect_generate_tmps_on_preheader 67169689Skan (loop_vec_info, tree *, tree *, tree *); 68169689Skanstatic tree vect_build_loop_niters (loop_vec_info); 69169689Skanstatic void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge); 70169689Skanstatic tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); 71169689Skanstatic void vect_update_init_of_dr (struct data_reference *, tree niters); 72169689Skanstatic void vect_update_inits_of_drs (loop_vec_info, tree); 73169689Skanstatic void vect_do_peeling_for_alignment (loop_vec_info, struct loops *); 74169689Skanstatic void vect_do_peeling_for_loop_bound 75169689Skan (loop_vec_info, tree *, struct loops *); 76169689Skanstatic int vect_min_worthwhile_factor (enum tree_code); 77169689Skan 78169689Skan 79169689Skan/* Function vect_get_new_vect_var. 80169689Skan 81169689Skan Returns a name for a new variable. The current naming scheme appends the 82169689Skan prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to 83169689Skan the name of vectorizer generated variables, and appends that to NAME if 84169689Skan provided. */ 85169689Skan 86169689Skanstatic tree 87169689Skanvect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) 88169689Skan{ 89169689Skan const char *prefix; 90169689Skan tree new_vect_var; 91169689Skan 92169689Skan switch (var_kind) 93169689Skan { 94169689Skan case vect_simple_var: 95169689Skan prefix = "vect_"; 96169689Skan break; 97169689Skan case vect_scalar_var: 98169689Skan prefix = "stmp_"; 99169689Skan break; 100169689Skan case vect_pointer_var: 101169689Skan prefix = "vect_p"; 102169689Skan break; 103169689Skan default: 104169689Skan gcc_unreachable (); 105169689Skan } 106169689Skan 107169689Skan if (name) 108169689Skan new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); 109169689Skan else 110169689Skan new_vect_var = create_tmp_var (type, prefix); 111169689Skan 112169689Skan return new_vect_var; 113169689Skan} 114169689Skan 115169689Skan 116169689Skan/* Function vect_create_addr_base_for_vector_ref. 117169689Skan 118169689Skan Create an expression that computes the address of the first memory location 119169689Skan that will be accessed for a data reference. 120169689Skan 121169689Skan Input: 122169689Skan STMT: The statement containing the data reference. 123169689Skan NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. 124169689Skan OFFSET: Optional. If supplied, it is be added to the initial address. 125169689Skan 126169689Skan Output: 127169689Skan 1. Return an SSA_NAME whose value is the address of the memory location of 128169689Skan the first vector of the data reference. 129169689Skan 2. If new_stmt_list is not NULL_TREE after return then the caller must insert 130169689Skan these statement(s) which define the returned SSA_NAME. 131169689Skan 132169689Skan FORNOW: We are only handling array accesses with step 1. */ 133169689Skan 134169689Skanstatic tree 135169689Skanvect_create_addr_base_for_vector_ref (tree stmt, 136169689Skan tree *new_stmt_list, 137169689Skan tree offset) 138169689Skan{ 139169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 140169689Skan struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 141169689Skan tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr)); 142169689Skan tree base_name = build_fold_indirect_ref (data_ref_base); 143169689Skan tree ref = DR_REF (dr); 144169689Skan tree scalar_type = TREE_TYPE (ref); 145169689Skan tree scalar_ptr_type = build_pointer_type (scalar_type); 146169689Skan tree vec_stmt; 147169689Skan tree new_temp; 148169689Skan tree addr_base, addr_expr; 149169689Skan tree dest, new_stmt; 150169689Skan tree base_offset = unshare_expr (DR_OFFSET (dr)); 151169689Skan tree init = unshare_expr (DR_INIT (dr)); 152169689Skan 153169689Skan /* Create base_offset */ 154169689Skan base_offset = size_binop (PLUS_EXPR, base_offset, init); 155169689Skan dest = create_tmp_var (TREE_TYPE (base_offset), "base_off"); 156169689Skan add_referenced_var (dest); 157169689Skan base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest); 158169689Skan append_to_statement_list_force (new_stmt, new_stmt_list); 159169689Skan 160169689Skan if (offset) 161169689Skan { 162169689Skan tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset"); 163169689Skan add_referenced_var (tmp); 164169689Skan offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, 165169689Skan DR_STEP (dr)); 166169689Skan base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset), 167169689Skan base_offset, offset); 168169689Skan base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp); 169169689Skan append_to_statement_list_force (new_stmt, new_stmt_list); 170169689Skan } 171169689Skan 172169689Skan /* base + base_offset */ 173169689Skan addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base, 174169689Skan base_offset); 175169689Skan 176169689Skan /* addr_expr = addr_base */ 177169689Skan addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var, 178169689Skan get_name (base_name)); 179169689Skan add_referenced_var (addr_expr); 180169689Skan vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base); 181169689Skan new_temp = make_ssa_name (addr_expr, vec_stmt); 182169689Skan TREE_OPERAND (vec_stmt, 0) = new_temp; 183169689Skan append_to_statement_list_force (vec_stmt, new_stmt_list); 184169689Skan 185169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 186169689Skan { 187169689Skan fprintf (vect_dump, "created "); 188169689Skan print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); 189169689Skan } 190169689Skan return new_temp; 191169689Skan} 192169689Skan 193169689Skan 194169689Skan/* Function vect_align_data_ref. 195169689Skan 196169689Skan Handle misalignment of a memory accesses. 197169689Skan 198169689Skan FORNOW: Can't handle misaligned accesses. 199169689Skan Make sure that the dataref is aligned. */ 200169689Skan 201169689Skanstatic void 202169689Skanvect_align_data_ref (tree stmt) 203169689Skan{ 204169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 205169689Skan struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 206169689Skan 207169689Skan /* FORNOW: can't handle misaligned accesses; 208169689Skan all accesses expected to be aligned. */ 209169689Skan gcc_assert (aligned_access_p (dr)); 210169689Skan} 211169689Skan 212169689Skan 213169689Skan/* Function vect_create_data_ref_ptr. 214169689Skan 215169689Skan Create a memory reference expression for vector access, to be used in a 216169689Skan vector load/store stmt. The reference is based on a new pointer to vector 217169689Skan type (vp). 218169689Skan 219169689Skan Input: 220169689Skan 1. STMT: a stmt that references memory. Expected to be of the form 221169689Skan MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>. 222169689Skan 2. BSI: block_stmt_iterator where new stmts can be added. 223169689Skan 3. OFFSET (optional): an offset to be added to the initial address accessed 224169689Skan by the data-ref in STMT. 225169689Skan 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain 226169689Skan pointing to the initial address. 227169689Skan 228169689Skan Output: 229169689Skan 1. Declare a new ptr to vector_type, and have it point to the base of the 230169689Skan data reference (initial addressed accessed by the data reference). 231169689Skan For example, for vector of type V8HI, the following code is generated: 232169689Skan 233169689Skan v8hi *vp; 234169689Skan vp = (v8hi *)initial_address; 235169689Skan 236169689Skan if OFFSET is not supplied: 237169689Skan initial_address = &a[init]; 238169689Skan if OFFSET is supplied: 239169689Skan initial_address = &a[init + OFFSET]; 240169689Skan 241169689Skan Return the initial_address in INITIAL_ADDRESS. 242169689Skan 243169689Skan 2. If ONLY_INIT is true, return the initial pointer. Otherwise, create 244169689Skan a data-reference in the loop based on the new vector pointer vp. This 245169689Skan new data reference will by some means be updated each iteration of 246169689Skan the loop. Return the pointer vp'. 247169689Skan 248169689Skan FORNOW: handle only aligned and consecutive accesses. */ 249169689Skan 250169689Skanstatic tree 251169689Skanvect_create_data_ref_ptr (tree stmt, 252169689Skan block_stmt_iterator *bsi ATTRIBUTE_UNUSED, 253169689Skan tree offset, tree *initial_address, bool only_init) 254169689Skan{ 255169689Skan tree base_name; 256169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 257169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 258169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 259169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_info); 260169689Skan tree vect_ptr_type; 261169689Skan tree vect_ptr; 262169689Skan tree tag; 263169689Skan tree new_temp; 264169689Skan tree vec_stmt; 265169689Skan tree new_stmt_list = NULL_TREE; 266169689Skan edge pe = loop_preheader_edge (loop); 267169689Skan basic_block new_bb; 268169689Skan tree vect_ptr_init; 269169689Skan struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 270169689Skan 271169689Skan base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr))); 272169689Skan 273169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 274169689Skan { 275169689Skan tree data_ref_base = base_name; 276169689Skan fprintf (vect_dump, "create vector-pointer variable to type: "); 277169689Skan print_generic_expr (vect_dump, vectype, TDF_SLIM); 278169689Skan if (TREE_CODE (data_ref_base) == VAR_DECL) 279169689Skan fprintf (vect_dump, " vectorizing a one dimensional array ref: "); 280169689Skan else if (TREE_CODE (data_ref_base) == ARRAY_REF) 281169689Skan fprintf (vect_dump, " vectorizing a multidimensional array ref: "); 282169689Skan else if (TREE_CODE (data_ref_base) == COMPONENT_REF) 283169689Skan fprintf (vect_dump, " vectorizing a record based array ref: "); 284169689Skan else if (TREE_CODE (data_ref_base) == SSA_NAME) 285169689Skan fprintf (vect_dump, " vectorizing a pointer ref: "); 286169689Skan print_generic_expr (vect_dump, base_name, TDF_SLIM); 287169689Skan } 288169689Skan 289169689Skan /** (1) Create the new vector-pointer variable: **/ 290169689Skan 291169689Skan vect_ptr_type = build_pointer_type (vectype); 292169689Skan vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, 293169689Skan get_name (base_name)); 294169689Skan add_referenced_var (vect_ptr); 295169689Skan 296169689Skan 297169689Skan /** (2) Add aliasing information to the new vector-pointer: 298169689Skan (The points-to info (DR_PTR_INFO) may be defined later.) **/ 299169689Skan 300169689Skan tag = DR_MEMTAG (dr); 301169689Skan gcc_assert (tag); 302169689Skan 303169689Skan /* If tag is a variable (and NOT_A_TAG) than a new symbol memory 304169689Skan tag must be created with tag added to its may alias list. */ 305169689Skan if (!MTAG_P (tag)) 306169689Skan new_type_alias (vect_ptr, tag, DR_REF (dr)); 307169689Skan else 308169689Skan var_ann (vect_ptr)->symbol_mem_tag = tag; 309169689Skan 310169689Skan var_ann (vect_ptr)->subvars = DR_SUBVARS (dr); 311169689Skan 312169689Skan /** (3) Calculate the initial address the vector-pointer, and set 313169689Skan the vector-pointer to point to it before the loop: **/ 314169689Skan 315169689Skan /* Create: (&(base[init_val+offset]) in the loop preheader. */ 316169689Skan new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, 317169689Skan offset); 318169689Skan pe = loop_preheader_edge (loop); 319169689Skan new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); 320169689Skan gcc_assert (!new_bb); 321169689Skan *initial_address = new_temp; 322169689Skan 323169689Skan /* Create: p = (vectype *) initial_base */ 324169689Skan vec_stmt = fold_convert (vect_ptr_type, new_temp); 325169689Skan vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); 326169689Skan vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt); 327169689Skan TREE_OPERAND (vec_stmt, 0) = vect_ptr_init; 328169689Skan new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt); 329169689Skan gcc_assert (!new_bb); 330169689Skan 331169689Skan 332169689Skan /** (4) Handle the updating of the vector-pointer inside the loop: **/ 333169689Skan 334169689Skan if (only_init) /* No update in loop is required. */ 335169689Skan { 336169689Skan /* Copy the points-to information if it exists. */ 337169689Skan if (DR_PTR_INFO (dr)) 338169689Skan duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr)); 339169689Skan return vect_ptr_init; 340169689Skan } 341169689Skan else 342169689Skan { 343169689Skan block_stmt_iterator incr_bsi; 344169689Skan bool insert_after; 345169689Skan tree indx_before_incr, indx_after_incr; 346169689Skan tree incr; 347169689Skan 348169689Skan standard_iv_increment_position (loop, &incr_bsi, &insert_after); 349169689Skan create_iv (vect_ptr_init, 350169689Skan fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)), 351169689Skan NULL_TREE, loop, &incr_bsi, insert_after, 352169689Skan &indx_before_incr, &indx_after_incr); 353169689Skan incr = bsi_stmt (incr_bsi); 354169689Skan set_stmt_info (stmt_ann (incr), 355169689Skan new_stmt_vec_info (incr, loop_vinfo)); 356169689Skan 357169689Skan /* Copy the points-to information if it exists. */ 358169689Skan if (DR_PTR_INFO (dr)) 359169689Skan { 360169689Skan duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr)); 361169689Skan duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr)); 362169689Skan } 363169689Skan merge_alias_info (vect_ptr_init, indx_before_incr); 364169689Skan merge_alias_info (vect_ptr_init, indx_after_incr); 365169689Skan 366169689Skan return indx_before_incr; 367169689Skan } 368169689Skan} 369169689Skan 370169689Skan 371169689Skan/* Function vect_create_destination_var. 372169689Skan 373169689Skan Create a new temporary of type VECTYPE. */ 374169689Skan 375169689Skanstatic tree 376169689Skanvect_create_destination_var (tree scalar_dest, tree vectype) 377169689Skan{ 378169689Skan tree vec_dest; 379169689Skan const char *new_name; 380169689Skan tree type; 381169689Skan enum vect_var_kind kind; 382169689Skan 383169689Skan kind = vectype ? vect_simple_var : vect_scalar_var; 384169689Skan type = vectype ? vectype : TREE_TYPE (scalar_dest); 385169689Skan 386169689Skan gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); 387169689Skan 388169689Skan new_name = get_name (scalar_dest); 389169689Skan if (!new_name) 390169689Skan new_name = "var_"; 391169689Skan vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name); 392169689Skan add_referenced_var (vec_dest); 393169689Skan 394169689Skan return vec_dest; 395169689Skan} 396169689Skan 397169689Skan 398169689Skan/* Function vect_init_vector. 399169689Skan 400169689Skan Insert a new stmt (INIT_STMT) that initializes a new vector variable with 401169689Skan the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be 402169689Skan used in the vectorization of STMT. */ 403169689Skan 404169689Skanstatic tree 405169689Skanvect_init_vector (tree stmt, tree vector_var) 406169689Skan{ 407169689Skan stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 408169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 409169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 410169689Skan tree new_var; 411169689Skan tree init_stmt; 412169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 413169689Skan tree vec_oprnd; 414169689Skan edge pe; 415169689Skan tree new_temp; 416169689Skan basic_block new_bb; 417169689Skan 418169689Skan new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_"); 419169689Skan add_referenced_var (new_var); 420169689Skan 421169689Skan init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var); 422169689Skan new_temp = make_ssa_name (new_var, init_stmt); 423169689Skan TREE_OPERAND (init_stmt, 0) = new_temp; 424169689Skan 425169689Skan pe = loop_preheader_edge (loop); 426169689Skan new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); 427169689Skan gcc_assert (!new_bb); 428169689Skan 429169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 430169689Skan { 431169689Skan fprintf (vect_dump, "created new init_stmt: "); 432169689Skan print_generic_expr (vect_dump, init_stmt, TDF_SLIM); 433169689Skan } 434169689Skan 435169689Skan vec_oprnd = TREE_OPERAND (init_stmt, 0); 436169689Skan return vec_oprnd; 437169689Skan} 438169689Skan 439169689Skan 440169689Skan/* Function vect_get_vec_def_for_operand. 441169689Skan 442169689Skan OP is an operand in STMT. This function returns a (vector) def that will be 443169689Skan used in the vectorized stmt for STMT. 444169689Skan 445169689Skan In the case that OP is an SSA_NAME which is defined in the loop, then 446169689Skan STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. 447169689Skan 448169689Skan In case OP is an invariant or constant, a new stmt that creates a vector def 449169689Skan needs to be introduced. */ 450169689Skan 451169689Skanstatic tree 452169689Skanvect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) 453169689Skan{ 454169689Skan tree vec_oprnd; 455169689Skan tree vec_stmt; 456169689Skan tree def_stmt; 457169689Skan stmt_vec_info def_stmt_info = NULL; 458169689Skan stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 459169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 460169689Skan int nunits = TYPE_VECTOR_SUBPARTS (vectype); 461169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 462169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 463169689Skan tree vec_inv; 464169689Skan tree vec_cst; 465169689Skan tree t = NULL_TREE; 466169689Skan tree def; 467169689Skan int i; 468169689Skan enum vect_def_type dt; 469169689Skan bool is_simple_use; 470169689Skan 471169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 472169689Skan { 473169689Skan fprintf (vect_dump, "vect_get_vec_def_for_operand: "); 474169689Skan print_generic_expr (vect_dump, op, TDF_SLIM); 475169689Skan } 476169689Skan 477169689Skan is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt); 478169689Skan gcc_assert (is_simple_use); 479169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 480169689Skan { 481169689Skan if (def) 482169689Skan { 483169689Skan fprintf (vect_dump, "def = "); 484169689Skan print_generic_expr (vect_dump, def, TDF_SLIM); 485169689Skan } 486169689Skan if (def_stmt) 487169689Skan { 488169689Skan fprintf (vect_dump, " def_stmt = "); 489169689Skan print_generic_expr (vect_dump, def_stmt, TDF_SLIM); 490169689Skan } 491169689Skan } 492169689Skan 493169689Skan switch (dt) 494169689Skan { 495169689Skan /* Case 1: operand is a constant. */ 496169689Skan case vect_constant_def: 497169689Skan { 498169689Skan if (scalar_def) 499169689Skan *scalar_def = op; 500169689Skan 501169689Skan /* Create 'vect_cst_ = {cst,cst,...,cst}' */ 502169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 503169689Skan fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); 504169689Skan 505169689Skan for (i = nunits - 1; i >= 0; --i) 506169689Skan { 507169689Skan t = tree_cons (NULL_TREE, op, t); 508169689Skan } 509169689Skan vec_cst = build_vector (vectype, t); 510169689Skan return vect_init_vector (stmt, vec_cst); 511169689Skan } 512169689Skan 513169689Skan /* Case 2: operand is defined outside the loop - loop invariant. */ 514169689Skan case vect_invariant_def: 515169689Skan { 516169689Skan if (scalar_def) 517169689Skan *scalar_def = def; 518169689Skan 519169689Skan /* Create 'vec_inv = {inv,inv,..,inv}' */ 520169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 521169689Skan fprintf (vect_dump, "Create vector_inv."); 522169689Skan 523169689Skan for (i = nunits - 1; i >= 0; --i) 524169689Skan { 525169689Skan t = tree_cons (NULL_TREE, def, t); 526169689Skan } 527169689Skan 528169689Skan /* FIXME: use build_constructor directly. */ 529169689Skan vec_inv = build_constructor_from_list (vectype, t); 530169689Skan return vect_init_vector (stmt, vec_inv); 531169689Skan } 532169689Skan 533169689Skan /* Case 3: operand is defined inside the loop. */ 534169689Skan case vect_loop_def: 535169689Skan { 536169689Skan if (scalar_def) 537169689Skan *scalar_def = def_stmt; 538169689Skan 539169689Skan /* Get the def from the vectorized stmt. */ 540169689Skan def_stmt_info = vinfo_for_stmt (def_stmt); 541169689Skan vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 542169689Skan gcc_assert (vec_stmt); 543169689Skan vec_oprnd = TREE_OPERAND (vec_stmt, 0); 544169689Skan return vec_oprnd; 545169689Skan } 546169689Skan 547169689Skan /* Case 4: operand is defined by a loop header phi - reduction */ 548169689Skan case vect_reduction_def: 549169689Skan { 550169689Skan gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); 551169689Skan 552169689Skan /* Get the def before the loop */ 553169689Skan op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); 554169689Skan return get_initial_def_for_reduction (stmt, op, scalar_def); 555169689Skan } 556169689Skan 557169689Skan /* Case 5: operand is defined by loop-header phi - induction. */ 558169689Skan case vect_induction_def: 559169689Skan { 560169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 561169689Skan fprintf (vect_dump, "induction - unsupported."); 562169689Skan internal_error ("no support for induction"); /* FORNOW */ 563169689Skan } 564169689Skan 565169689Skan default: 566169689Skan gcc_unreachable (); 567169689Skan } 568169689Skan} 569169689Skan 570169689Skan 571169689Skan/* Function vect_finish_stmt_generation. 572169689Skan 573169689Skan Insert a new stmt. */ 574169689Skan 575169689Skanstatic void 576169689Skanvect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) 577169689Skan{ 578169689Skan bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); 579169689Skan 580169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 581169689Skan { 582169689Skan fprintf (vect_dump, "add new stmt: "); 583169689Skan print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); 584169689Skan } 585169689Skan 586169689Skan /* Make sure bsi points to the stmt that is being vectorized. */ 587169689Skan gcc_assert (stmt == bsi_stmt (*bsi)); 588169689Skan 589169689Skan#ifdef USE_MAPPED_LOCATION 590169689Skan SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt)); 591169689Skan#else 592169689Skan SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt)); 593169689Skan#endif 594169689Skan} 595169689Skan 596169689Skan 597169689Skan#define ADJUST_IN_EPILOG 1 598169689Skan 599169689Skan/* Function get_initial_def_for_reduction 600169689Skan 601169689Skan Input: 602169689Skan STMT - a stmt that performs a reduction operation in the loop. 603169689Skan INIT_VAL - the initial value of the reduction variable 604169689Skan 605169689Skan Output: 606169689Skan SCALAR_DEF - a tree that holds a value to be added to the final result 607169689Skan of the reduction (used for "ADJUST_IN_EPILOG" - see below). 608169689Skan Return a vector variable, initialized according to the operation that STMT 609169689Skan performs. This vector will be used as the initial value of the 610169689Skan vector of partial results. 611169689Skan 612169689Skan Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows: 613169689Skan add: [0,0,...,0,0] 614169689Skan mult: [1,1,...,1,1] 615169689Skan min/max: [init_val,init_val,..,init_val,init_val] 616169689Skan bit and/or: [init_val,init_val,..,init_val,init_val] 617169689Skan and when necessary (e.g. add/mult case) let the caller know 618169689Skan that it needs to adjust the result by init_val. 619169689Skan 620169689Skan Option2: Initialize the vector as follows: 621169689Skan add: [0,0,...,0,init_val] 622169689Skan mult: [1,1,...,1,init_val] 623169689Skan min/max: [init_val,init_val,...,init_val] 624169689Skan bit and/or: [init_val,init_val,...,init_val] 625169689Skan and no adjustments are needed. 626169689Skan 627169689Skan For example, for the following code: 628169689Skan 629169689Skan s = init_val; 630169689Skan for (i=0;i<n;i++) 631169689Skan s = s + a[i]; 632169689Skan 633169689Skan STMT is 's = s + a[i]', and the reduction variable is 's'. 634169689Skan For a vector of 4 units, we want to return either [0,0,0,init_val], 635169689Skan or [0,0,0,0] and let the caller know that it needs to adjust 636169689Skan the result at the end by 'init_val'. 637169689Skan 638169689Skan FORNOW: We use the "ADJUST_IN_EPILOG" scheme. 639169689Skan TODO: Use some cost-model to estimate which scheme is more profitable. 640169689Skan*/ 641169689Skan 642169689Skanstatic tree 643169689Skanget_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def) 644169689Skan{ 645169689Skan stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 646169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 647169689Skan int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); 648169689Skan int nelements; 649169689Skan enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); 650169689Skan tree type = TREE_TYPE (init_val); 651169689Skan tree def; 652169689Skan tree vec, t = NULL_TREE; 653169689Skan bool need_epilog_adjust; 654169689Skan int i; 655169689Skan 656169689Skan gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)); 657169689Skan 658169689Skan switch (code) 659169689Skan { 660169689Skan case WIDEN_SUM_EXPR: 661169689Skan case DOT_PROD_EXPR: 662169689Skan case PLUS_EXPR: 663169689Skan if (INTEGRAL_TYPE_P (type)) 664169689Skan def = build_int_cst (type, 0); 665169689Skan else 666169689Skan def = build_real (type, dconst0); 667169689Skan 668169689Skan#ifdef ADJUST_IN_EPILOG 669169689Skan /* All the 'nunits' elements are set to 0. The final result will be 670169689Skan adjusted by 'init_val' at the loop epilog. */ 671169689Skan nelements = nunits; 672169689Skan need_epilog_adjust = true; 673169689Skan#else 674169689Skan /* 'nunits - 1' elements are set to 0; The last element is set to 675169689Skan 'init_val'. No further adjustments at the epilog are needed. */ 676169689Skan nelements = nunits - 1; 677169689Skan need_epilog_adjust = false; 678169689Skan#endif 679169689Skan break; 680169689Skan 681169689Skan case MIN_EXPR: 682169689Skan case MAX_EXPR: 683169689Skan def = init_val; 684169689Skan nelements = nunits; 685169689Skan need_epilog_adjust = false; 686169689Skan break; 687169689Skan 688169689Skan default: 689169689Skan gcc_unreachable (); 690169689Skan } 691169689Skan 692169689Skan for (i = nelements - 1; i >= 0; --i) 693169689Skan t = tree_cons (NULL_TREE, def, t); 694169689Skan 695169689Skan if (nelements == nunits - 1) 696169689Skan { 697169689Skan /* Set the last element of the vector. */ 698169689Skan t = tree_cons (NULL_TREE, init_val, t); 699169689Skan nelements += 1; 700169689Skan } 701169689Skan gcc_assert (nelements == nunits); 702169689Skan 703169689Skan if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST) 704169689Skan vec = build_vector (vectype, t); 705169689Skan else 706169689Skan vec = build_constructor_from_list (vectype, t); 707169689Skan 708169689Skan if (!need_epilog_adjust) 709169689Skan *scalar_def = NULL_TREE; 710169689Skan else 711169689Skan *scalar_def = init_val; 712169689Skan 713169689Skan return vect_init_vector (stmt, vec); 714169689Skan} 715169689Skan 716169689Skan 717169689Skan/* Function vect_create_epilog_for_reduction 718169689Skan 719169689Skan Create code at the loop-epilog to finalize the result of a reduction 720169689Skan computation. 721169689Skan 722169689Skan VECT_DEF is a vector of partial results. 723169689Skan REDUC_CODE is the tree-code for the epilog reduction. 724169689Skan STMT is the scalar reduction stmt that is being vectorized. 725169689Skan REDUCTION_PHI is the phi-node that carries the reduction computation. 726169689Skan 727169689Skan This function: 728169689Skan 1. Creates the reduction def-use cycle: sets the the arguments for 729169689Skan REDUCTION_PHI: 730169689Skan The loop-entry argument is the vectorized initial-value of the reduction. 731169689Skan The loop-latch argument is VECT_DEF - the vector of partial sums. 732169689Skan 2. "Reduces" the vector of partial results VECT_DEF into a single result, 733169689Skan by applying the operation specified by REDUC_CODE if available, or by 734169689Skan other means (whole-vector shifts or a scalar loop). 735169689Skan The function also creates a new phi node at the loop exit to preserve 736169689Skan loop-closed form, as illustrated below. 737169689Skan 738169689Skan The flow at the entry to this function: 739169689Skan 740169689Skan loop: 741169689Skan vec_def = phi <null, null> # REDUCTION_PHI 742169689Skan VECT_DEF = vector_stmt # vectorized form of STMT 743169689Skan s_loop = scalar_stmt # (scalar) STMT 744169689Skan loop_exit: 745169689Skan s_out0 = phi <s_loop> # (scalar) EXIT_PHI 746169689Skan use <s_out0> 747169689Skan use <s_out0> 748169689Skan 749169689Skan The above is transformed by this function into: 750169689Skan 751169689Skan loop: 752169689Skan vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI 753169689Skan VECT_DEF = vector_stmt # vectorized form of STMT 754169689Skan s_loop = scalar_stmt # (scalar) STMT 755169689Skan loop_exit: 756169689Skan s_out0 = phi <s_loop> # (scalar) EXIT_PHI 757169689Skan v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI 758169689Skan v_out2 = reduce <v_out1> 759169689Skan s_out3 = extract_field <v_out2, 0> 760169689Skan s_out4 = adjust_result <s_out3> 761169689Skan use <s_out4> 762169689Skan use <s_out4> 763169689Skan*/ 764169689Skan 765169689Skanstatic void 766169689Skanvect_create_epilog_for_reduction (tree vect_def, tree stmt, 767169689Skan enum tree_code reduc_code, tree reduction_phi) 768169689Skan{ 769169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 770169689Skan tree vectype; 771169689Skan enum machine_mode mode; 772169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 773169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 774169689Skan basic_block exit_bb; 775169689Skan tree scalar_dest; 776169689Skan tree scalar_type; 777169689Skan tree new_phi; 778169689Skan block_stmt_iterator exit_bsi; 779169689Skan tree vec_dest; 780169689Skan tree new_temp; 781169689Skan tree new_name; 782169689Skan tree epilog_stmt; 783169689Skan tree new_scalar_dest, exit_phi; 784169689Skan tree bitsize, bitpos, bytesize; 785169689Skan enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); 786169689Skan tree scalar_initial_def; 787169689Skan tree vec_initial_def; 788169689Skan tree orig_name; 789169689Skan imm_use_iterator imm_iter; 790169689Skan use_operand_p use_p; 791169689Skan bool extract_scalar_result; 792169689Skan tree reduction_op; 793169689Skan tree orig_stmt; 794169689Skan tree use_stmt; 795169689Skan tree operation = TREE_OPERAND (stmt, 1); 796169689Skan int op_type; 797169689Skan 798169689Skan op_type = TREE_CODE_LENGTH (TREE_CODE (operation)); 799169689Skan reduction_op = TREE_OPERAND (operation, op_type-1); 800169689Skan vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); 801169689Skan mode = TYPE_MODE (vectype); 802169689Skan 803169689Skan /*** 1. Create the reduction def-use cycle ***/ 804169689Skan 805169689Skan /* 1.1 set the loop-entry arg of the reduction-phi: */ 806169689Skan /* For the case of reduction, vect_get_vec_def_for_operand returns 807169689Skan the scalar def before the loop, that defines the initial value 808169689Skan of the reduction variable. */ 809169689Skan vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, 810169689Skan &scalar_initial_def); 811169689Skan add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop)); 812169689Skan 813169689Skan /* 1.2 set the loop-latch arg for the reduction-phi: */ 814169689Skan add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop)); 815169689Skan 816169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 817169689Skan { 818169689Skan fprintf (vect_dump, "transform reduction: created def-use cycle:"); 819169689Skan print_generic_expr (vect_dump, reduction_phi, TDF_SLIM); 820169689Skan fprintf (vect_dump, "\n"); 821169689Skan print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM); 822169689Skan } 823169689Skan 824169689Skan 825169689Skan /*** 2. Create epilog code 826169689Skan The reduction epilog code operates across the elements of the vector 827169689Skan of partial results computed by the vectorized loop. 828169689Skan The reduction epilog code consists of: 829169689Skan step 1: compute the scalar result in a vector (v_out2) 830169689Skan step 2: extract the scalar result (s_out3) from the vector (v_out2) 831169689Skan step 3: adjust the scalar result (s_out3) if needed. 832169689Skan 833169689Skan Step 1 can be accomplished using one the following three schemes: 834169689Skan (scheme 1) using reduc_code, if available. 835169689Skan (scheme 2) using whole-vector shifts, if available. 836169689Skan (scheme 3) using a scalar loop. In this case steps 1+2 above are 837169689Skan combined. 838169689Skan 839169689Skan The overall epilog code looks like this: 840169689Skan 841169689Skan s_out0 = phi <s_loop> # original EXIT_PHI 842169689Skan v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI 843169689Skan v_out2 = reduce <v_out1> # step 1 844169689Skan s_out3 = extract_field <v_out2, 0> # step 2 845169689Skan s_out4 = adjust_result <s_out3> # step 3 846169689Skan 847169689Skan (step 3 is optional, and step2 1 and 2 may be combined). 848169689Skan Lastly, the uses of s_out0 are replaced by s_out4. 849169689Skan 850169689Skan ***/ 851169689Skan 852169689Skan /* 2.1 Create new loop-exit-phi to preserve loop-closed form: 853169689Skan v_out1 = phi <v_loop> */ 854169689Skan 855169689Skan exit_bb = loop->single_exit->dest; 856169689Skan new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); 857169689Skan SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def); 858169689Skan exit_bsi = bsi_start (exit_bb); 859169689Skan 860169689Skan /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 861169689Skan (i.e. when reduc_code is not available) and in the final adjustment code 862169689Skan (if needed). Also get the original scalar reduction variable as 863169689Skan defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it 864169689Skan represents a reduction pattern), the tree-code and scalar-def are 865169689Skan taken from the original stmt that the pattern-stmt (STMT) replaces. 866169689Skan Otherwise (it is a regular reduction) - the tree-code and scalar-def 867169689Skan are taken from STMT. */ 868169689Skan 869169689Skan orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 870169689Skan if (!orig_stmt) 871169689Skan { 872169689Skan /* Regular reduction */ 873169689Skan orig_stmt = stmt; 874169689Skan } 875169689Skan else 876169689Skan { 877169689Skan /* Reduction pattern */ 878169689Skan stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt); 879169689Skan gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)); 880169689Skan gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); 881169689Skan } 882169689Skan code = TREE_CODE (TREE_OPERAND (orig_stmt, 1)); 883169689Skan scalar_dest = TREE_OPERAND (orig_stmt, 0); 884169689Skan scalar_type = TREE_TYPE (scalar_dest); 885169689Skan new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); 886169689Skan bitsize = TYPE_SIZE (scalar_type); 887169689Skan bytesize = TYPE_SIZE_UNIT (scalar_type); 888169689Skan 889169689Skan /* 2.3 Create the reduction code, using one of the three schemes described 890169689Skan above. */ 891169689Skan 892169689Skan if (reduc_code < NUM_TREE_CODES) 893169689Skan { 894169689Skan /*** Case 1: Create: 895169689Skan v_out2 = reduc_expr <v_out1> */ 896169689Skan 897169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 898169689Skan fprintf (vect_dump, "Reduce using direct vector reduction."); 899169689Skan 900169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 901169689Skan epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 902169689Skan build1 (reduc_code, vectype, PHI_RESULT (new_phi))); 903169689Skan new_temp = make_ssa_name (vec_dest, epilog_stmt); 904169689Skan TREE_OPERAND (epilog_stmt, 0) = new_temp; 905169689Skan bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 906169689Skan 907169689Skan extract_scalar_result = true; 908169689Skan } 909169689Skan else 910169689Skan { 911169689Skan enum tree_code shift_code = 0; 912169689Skan bool have_whole_vector_shift = true; 913169689Skan int bit_offset; 914169689Skan int element_bitsize = tree_low_cst (bitsize, 1); 915169689Skan int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); 916169689Skan tree vec_temp; 917169689Skan 918169689Skan if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing) 919169689Skan shift_code = VEC_RSHIFT_EXPR; 920169689Skan else 921169689Skan have_whole_vector_shift = false; 922169689Skan 923169689Skan /* Regardless of whether we have a whole vector shift, if we're 924169689Skan emulating the operation via tree-vect-generic, we don't want 925169689Skan to use it. Only the first round of the reduction is likely 926169689Skan to still be profitable via emulation. */ 927169689Skan /* ??? It might be better to emit a reduction tree code here, so that 928169689Skan tree-vect-generic can expand the first round via bit tricks. */ 929169689Skan if (!VECTOR_MODE_P (mode)) 930169689Skan have_whole_vector_shift = false; 931169689Skan else 932169689Skan { 933169689Skan optab optab = optab_for_tree_code (code, vectype); 934169689Skan if (optab->handlers[mode].insn_code == CODE_FOR_nothing) 935169689Skan have_whole_vector_shift = false; 936169689Skan } 937169689Skan 938169689Skan if (have_whole_vector_shift) 939169689Skan { 940169689Skan /*** Case 2: Create: 941169689Skan for (offset = VS/2; offset >= element_size; offset/=2) 942169689Skan { 943169689Skan Create: va' = vec_shift <va, offset> 944169689Skan Create: va = vop <va, va'> 945169689Skan } */ 946169689Skan 947169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 948169689Skan fprintf (vect_dump, "Reduce using vector shifts"); 949169689Skan 950169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 951169689Skan new_temp = PHI_RESULT (new_phi); 952169689Skan 953169689Skan for (bit_offset = vec_size_in_bits/2; 954169689Skan bit_offset >= element_bitsize; 955169689Skan bit_offset /= 2) 956169689Skan { 957169689Skan tree bitpos = size_int (bit_offset); 958169689Skan 959169689Skan epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 960169689Skan build2 (shift_code, vectype, new_temp, bitpos)); 961169689Skan new_name = make_ssa_name (vec_dest, epilog_stmt); 962169689Skan TREE_OPERAND (epilog_stmt, 0) = new_name; 963169689Skan bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 964169689Skan 965169689Skan epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 966169689Skan build2 (code, vectype, new_name, new_temp)); 967169689Skan new_temp = make_ssa_name (vec_dest, epilog_stmt); 968169689Skan TREE_OPERAND (epilog_stmt, 0) = new_temp; 969169689Skan bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 970169689Skan } 971169689Skan 972169689Skan extract_scalar_result = true; 973169689Skan } 974169689Skan else 975169689Skan { 976169689Skan tree rhs; 977169689Skan 978169689Skan /*** Case 3: Create: 979169689Skan s = extract_field <v_out2, 0> 980169689Skan for (offset = element_size; 981169689Skan offset < vector_size; 982169689Skan offset += element_size;) 983169689Skan { 984169689Skan Create: s' = extract_field <v_out2, offset> 985169689Skan Create: s = op <s, s'> 986169689Skan } */ 987169689Skan 988169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 989169689Skan fprintf (vect_dump, "Reduce using scalar code. "); 990169689Skan 991169689Skan vec_temp = PHI_RESULT (new_phi); 992169689Skan vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); 993169689Skan rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, 994169689Skan bitsize_zero_node); 995169689Skan BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); 996169689Skan epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs); 997169689Skan new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 998169689Skan TREE_OPERAND (epilog_stmt, 0) = new_temp; 999169689Skan bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1000169689Skan 1001169689Skan for (bit_offset = element_bitsize; 1002169689Skan bit_offset < vec_size_in_bits; 1003169689Skan bit_offset += element_bitsize) 1004169689Skan { 1005169689Skan tree bitpos = bitsize_int (bit_offset); 1006169689Skan tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, 1007169689Skan bitpos); 1008169689Skan 1009169689Skan BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); 1010169689Skan epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, 1011169689Skan rhs); 1012169689Skan new_name = make_ssa_name (new_scalar_dest, epilog_stmt); 1013169689Skan TREE_OPERAND (epilog_stmt, 0) = new_name; 1014169689Skan bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1015169689Skan 1016169689Skan epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, 1017169689Skan build2 (code, scalar_type, new_name, new_temp)); 1018169689Skan new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 1019169689Skan TREE_OPERAND (epilog_stmt, 0) = new_temp; 1020169689Skan bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1021169689Skan } 1022169689Skan 1023169689Skan extract_scalar_result = false; 1024169689Skan } 1025169689Skan } 1026169689Skan 1027169689Skan /* 2.4 Extract the final scalar result. Create: 1028169689Skan s_out3 = extract_field <v_out2, bitpos> */ 1029169689Skan 1030169689Skan if (extract_scalar_result) 1031169689Skan { 1032169689Skan tree rhs; 1033169689Skan 1034169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1035169689Skan fprintf (vect_dump, "extract scalar result"); 1036169689Skan 1037169689Skan if (BYTES_BIG_ENDIAN) 1038169689Skan bitpos = size_binop (MULT_EXPR, 1039169689Skan bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1), 1040169689Skan TYPE_SIZE (scalar_type)); 1041169689Skan else 1042169689Skan bitpos = bitsize_zero_node; 1043169689Skan 1044169689Skan rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos); 1045169689Skan BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); 1046169689Skan epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs); 1047169689Skan new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 1048169689Skan TREE_OPERAND (epilog_stmt, 0) = new_temp; 1049169689Skan bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1050169689Skan } 1051169689Skan 1052169689Skan /* 2.4 Adjust the final result by the initial value of the reduction 1053169689Skan variable. (When such adjustment is not needed, then 1054169689Skan 'scalar_initial_def' is zero). 1055169689Skan 1056169689Skan Create: 1057169689Skan s_out4 = scalar_expr <s_out3, scalar_initial_def> */ 1058169689Skan 1059169689Skan if (scalar_initial_def) 1060169689Skan { 1061169689Skan epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, 1062169689Skan build2 (code, scalar_type, new_temp, scalar_initial_def)); 1063169689Skan new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 1064169689Skan TREE_OPERAND (epilog_stmt, 0) = new_temp; 1065169689Skan bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1066169689Skan } 1067169689Skan 1068169689Skan /* 2.6 Replace uses of s_out0 with uses of s_out3 */ 1069169689Skan 1070169689Skan /* Find the loop-closed-use at the loop exit of the original scalar result. 1071169689Skan (The reduction result is expected to have two immediate uses - one at the 1072169689Skan latch block, and one at the loop exit). */ 1073169689Skan exit_phi = NULL; 1074169689Skan FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) 1075169689Skan { 1076169689Skan if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p)))) 1077169689Skan { 1078169689Skan exit_phi = USE_STMT (use_p); 1079169689Skan break; 1080169689Skan } 1081169689Skan } 1082169689Skan /* We expect to have found an exit_phi because of loop-closed-ssa form. */ 1083169689Skan gcc_assert (exit_phi); 1084169689Skan /* Replace the uses: */ 1085169689Skan orig_name = PHI_RESULT (exit_phi); 1086169689Skan FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) 1087169689Skan FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 1088169689Skan SET_USE (use_p, new_temp); 1089169689Skan} 1090169689Skan 1091169689Skan 1092169689Skan/* Function vectorizable_reduction. 1093169689Skan 1094169689Skan Check if STMT performs a reduction operation that can be vectorized. 1095169689Skan If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1096169689Skan stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1097169689Skan Return FALSE if not a vectorizable STMT, TRUE otherwise. 1098169689Skan 1099169689Skan This function also handles reduction idioms (patterns) that have been 1100169689Skan recognized in advance during vect_pattern_recog. In this case, STMT may be 1101169689Skan of this form: 1102169689Skan X = pattern_expr (arg0, arg1, ..., X) 1103169689Skan and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original 1104169689Skan sequence that had been detected and replaced by the pattern-stmt (STMT). 1105169689Skan 1106169689Skan In some cases of reduction patterns, the type of the reduction variable X is 1107169689Skan different than the type of the other arguments of STMT. 1108169689Skan In such cases, the vectype that is used when transforming STMT into a vector 1109169689Skan stmt is different than the vectype that is used to determine the 1110169689Skan vectorization factor, because it consists of a different number of elements 1111169689Skan than the actual number of elements that are being operated upon in parallel. 1112169689Skan 1113169689Skan For example, consider an accumulation of shorts into an int accumulator. 1114169689Skan On some targets it's possible to vectorize this pattern operating on 8 1115169689Skan shorts at a time (hence, the vectype for purposes of determining the 1116169689Skan vectorization factor should be V8HI); on the other hand, the vectype that 1117169689Skan is used to create the vector form is actually V4SI (the type of the result). 1118169689Skan 1119169689Skan Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that 1120169689Skan indicates what is the actual level of parallelism (V8HI in the example), so 1121169689Skan that the right vectorization factor would be derived. This vectype 1122169689Skan corresponds to the type of arguments to the reduction stmt, and should *NOT* 1123169689Skan be used to create the vectorized stmt. The right vectype for the vectorized 1124169689Skan stmt is obtained from the type of the result X: 1125169689Skan get_vectype_for_scalar_type (TREE_TYPE (X)) 1126169689Skan 1127169689Skan This means that, contrary to "regular" reductions (or "regular" stmts in 1128169689Skan general), the following equation: 1129169689Skan STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X)) 1130169689Skan does *NOT* necessarily hold for reduction patterns. */ 1131169689Skan 1132169689Skanbool 1133169689Skanvectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1134169689Skan{ 1135169689Skan tree vec_dest; 1136169689Skan tree scalar_dest; 1137169689Skan tree op; 1138169689Skan tree loop_vec_def0, loop_vec_def1; 1139169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1140169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1141169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1142169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1143169689Skan tree operation; 1144169689Skan enum tree_code code, orig_code, epilog_reduc_code = 0; 1145169689Skan enum machine_mode vec_mode; 1146169689Skan int op_type; 1147169689Skan optab optab, reduc_optab; 1148169689Skan tree new_temp; 1149169689Skan tree def, def_stmt; 1150169689Skan enum vect_def_type dt; 1151169689Skan tree new_phi; 1152169689Skan tree scalar_type; 1153169689Skan bool is_simple_use; 1154169689Skan tree orig_stmt; 1155169689Skan stmt_vec_info orig_stmt_info; 1156169689Skan tree expr = NULL_TREE; 1157169689Skan int i; 1158169689Skan 1159169689Skan /* 1. Is vectorizable reduction? */ 1160169689Skan 1161169689Skan /* Not supportable if the reduction variable is used in the loop. */ 1162169689Skan if (STMT_VINFO_RELEVANT_P (stmt_info)) 1163169689Skan return false; 1164169689Skan 1165169689Skan if (!STMT_VINFO_LIVE_P (stmt_info)) 1166169689Skan return false; 1167169689Skan 1168169689Skan /* Make sure it was already recognized as a reduction computation. */ 1169169689Skan if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def) 1170169689Skan return false; 1171169689Skan 1172169689Skan /* 2. Has this been recognized as a reduction pattern? 1173169689Skan 1174169689Skan Check if STMT represents a pattern that has been recognized 1175169689Skan in earlier analysis stages. For stmts that represent a pattern, 1176169689Skan the STMT_VINFO_RELATED_STMT field records the last stmt in 1177169689Skan the original sequence that constitutes the pattern. */ 1178169689Skan 1179169689Skan orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 1180169689Skan if (orig_stmt) 1181169689Skan { 1182169689Skan orig_stmt_info = vinfo_for_stmt (orig_stmt); 1183169689Skan gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt); 1184169689Skan gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info)); 1185169689Skan gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info)); 1186169689Skan } 1187169689Skan 1188169689Skan /* 3. Check the operands of the operation. The first operands are defined 1189169689Skan inside the loop body. The last operand is the reduction variable, 1190169689Skan which is defined by the loop-header-phi. */ 1191169689Skan 1192169689Skan gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); 1193169689Skan 1194169689Skan operation = TREE_OPERAND (stmt, 1); 1195169689Skan code = TREE_CODE (operation); 1196169689Skan op_type = TREE_CODE_LENGTH (code); 1197169689Skan 1198169689Skan if (op_type != binary_op && op_type != ternary_op) 1199169689Skan return false; 1200169689Skan scalar_dest = TREE_OPERAND (stmt, 0); 1201169689Skan scalar_type = TREE_TYPE (scalar_dest); 1202169689Skan 1203169689Skan /* All uses but the last are expected to be defined in the loop. 1204169689Skan The last use is the reduction variable. */ 1205169689Skan for (i = 0; i < op_type-1; i++) 1206169689Skan { 1207169689Skan op = TREE_OPERAND (operation, i); 1208169689Skan is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt); 1209169689Skan gcc_assert (is_simple_use); 1210169689Skan gcc_assert (dt == vect_loop_def || dt == vect_invariant_def || 1211169689Skan dt == vect_constant_def); 1212169689Skan } 1213169689Skan 1214169689Skan op = TREE_OPERAND (operation, i); 1215169689Skan is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt); 1216169689Skan gcc_assert (is_simple_use); 1217169689Skan gcc_assert (dt == vect_reduction_def); 1218169689Skan gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); 1219169689Skan if (orig_stmt) 1220169689Skan gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt)); 1221169689Skan else 1222169689Skan gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt)); 1223169689Skan 1224169689Skan if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) 1225169689Skan return false; 1226169689Skan 1227169689Skan /* 4. Supportable by target? */ 1228169689Skan 1229169689Skan /* 4.1. check support for the operation in the loop */ 1230169689Skan optab = optab_for_tree_code (code, vectype); 1231169689Skan if (!optab) 1232169689Skan { 1233169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1234169689Skan fprintf (vect_dump, "no optab."); 1235169689Skan return false; 1236169689Skan } 1237169689Skan vec_mode = TYPE_MODE (vectype); 1238169689Skan if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) 1239169689Skan { 1240169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1241169689Skan fprintf (vect_dump, "op not supported by target."); 1242169689Skan if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 1243169689Skan || LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1244169689Skan < vect_min_worthwhile_factor (code)) 1245169689Skan return false; 1246169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1247169689Skan fprintf (vect_dump, "proceeding using word mode."); 1248169689Skan } 1249169689Skan 1250169689Skan /* Worthwhile without SIMD support? */ 1251169689Skan if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 1252169689Skan && LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1253169689Skan < vect_min_worthwhile_factor (code)) 1254169689Skan { 1255169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1256169689Skan fprintf (vect_dump, "not worthwhile without SIMD support."); 1257169689Skan return false; 1258169689Skan } 1259169689Skan 1260169689Skan /* 4.2. Check support for the epilog operation. 1261169689Skan 1262169689Skan If STMT represents a reduction pattern, then the type of the 1263169689Skan reduction variable may be different than the type of the rest 1264169689Skan of the arguments. For example, consider the case of accumulation 1265169689Skan of shorts into an int accumulator; The original code: 1266169689Skan S1: int_a = (int) short_a; 1267169689Skan orig_stmt-> S2: int_acc = plus <int_a ,int_acc>; 1268169689Skan 1269169689Skan was replaced with: 1270169689Skan STMT: int_acc = widen_sum <short_a, int_acc> 1271169689Skan 1272169689Skan This means that: 1273169689Skan 1. The tree-code that is used to create the vector operation in the 1274169689Skan epilog code (that reduces the partial results) is not the 1275169689Skan tree-code of STMT, but is rather the tree-code of the original 1276169689Skan stmt from the pattern that STMT is replacing. I.e, in the example 1277169689Skan above we want to use 'widen_sum' in the loop, but 'plus' in the 1278169689Skan epilog. 1279169689Skan 2. The type (mode) we use to check available target support 1280169689Skan for the vector operation to be created in the *epilog*, is 1281169689Skan determined by the type of the reduction variable (in the example 1282169689Skan above we'd check this: plus_optab[vect_int_mode]). 1283169689Skan However the type (mode) we use to check available target support 1284169689Skan for the vector operation to be created *inside the loop*, is 1285169689Skan determined by the type of the other arguments to STMT (in the 1286169689Skan example we'd check this: widen_sum_optab[vect_short_mode]). 1287169689Skan 1288169689Skan This is contrary to "regular" reductions, in which the types of all 1289169689Skan the arguments are the same as the type of the reduction variable. 1290169689Skan For "regular" reductions we can therefore use the same vector type 1291169689Skan (and also the same tree-code) when generating the epilog code and 1292169689Skan when generating the code inside the loop. */ 1293169689Skan 1294169689Skan if (orig_stmt) 1295169689Skan { 1296169689Skan /* This is a reduction pattern: get the vectype from the type of the 1297169689Skan reduction variable, and get the tree-code from orig_stmt. */ 1298169689Skan orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1)); 1299169689Skan vectype = get_vectype_for_scalar_type (TREE_TYPE (def)); 1300169689Skan vec_mode = TYPE_MODE (vectype); 1301169689Skan } 1302169689Skan else 1303169689Skan { 1304169689Skan /* Regular reduction: use the same vectype and tree-code as used for 1305169689Skan the vector code inside the loop can be used for the epilog code. */ 1306169689Skan orig_code = code; 1307169689Skan } 1308169689Skan 1309169689Skan if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) 1310169689Skan return false; 1311169689Skan reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype); 1312169689Skan if (!reduc_optab) 1313169689Skan { 1314169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1315169689Skan fprintf (vect_dump, "no optab for reduction."); 1316169689Skan epilog_reduc_code = NUM_TREE_CODES; 1317169689Skan } 1318169689Skan if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) 1319169689Skan { 1320169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1321169689Skan fprintf (vect_dump, "reduc op not supported by target."); 1322169689Skan epilog_reduc_code = NUM_TREE_CODES; 1323169689Skan } 1324169689Skan 1325169689Skan if (!vec_stmt) /* transformation not required. */ 1326169689Skan { 1327169689Skan STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; 1328169689Skan return true; 1329169689Skan } 1330169689Skan 1331169689Skan /** Transform. **/ 1332169689Skan 1333169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1334169689Skan fprintf (vect_dump, "transform reduction."); 1335169689Skan 1336169689Skan /* Create the destination vector */ 1337169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 1338169689Skan 1339169689Skan /* Create the reduction-phi that defines the reduction-operand. */ 1340169689Skan new_phi = create_phi_node (vec_dest, loop->header); 1341169689Skan 1342169689Skan /* Prepare the operand that is defined inside the loop body */ 1343169689Skan op = TREE_OPERAND (operation, 0); 1344169689Skan loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL); 1345169689Skan if (op_type == binary_op) 1346169689Skan expr = build2 (code, vectype, loop_vec_def0, PHI_RESULT (new_phi)); 1347169689Skan else if (op_type == ternary_op) 1348169689Skan { 1349169689Skan op = TREE_OPERAND (operation, 1); 1350169689Skan loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL); 1351169689Skan expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1, 1352169689Skan PHI_RESULT (new_phi)); 1353169689Skan } 1354169689Skan 1355169689Skan /* Create the vectorized operation that computes the partial results */ 1356169689Skan *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr); 1357169689Skan new_temp = make_ssa_name (vec_dest, *vec_stmt); 1358169689Skan TREE_OPERAND (*vec_stmt, 0) = new_temp; 1359169689Skan vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1360169689Skan 1361169689Skan /* Finalize the reduction-phi (set it's arguments) and create the 1362169689Skan epilog reduction code. */ 1363169689Skan vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi); 1364169689Skan return true; 1365169689Skan} 1366169689Skan 1367169689Skan 1368169689Skan/* Function vectorizable_assignment. 1369169689Skan 1370169689Skan Check if STMT performs an assignment (copy) that can be vectorized. 1371169689Skan If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1372169689Skan stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1373169689Skan Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1374169689Skan 1375169689Skanbool 1376169689Skanvectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1377169689Skan{ 1378169689Skan tree vec_dest; 1379169689Skan tree scalar_dest; 1380169689Skan tree op; 1381169689Skan tree vec_oprnd; 1382169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1383169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1384169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1385169689Skan tree new_temp; 1386169689Skan tree def, def_stmt; 1387169689Skan enum vect_def_type dt; 1388169689Skan 1389169689Skan /* Is vectorizable assignment? */ 1390169689Skan if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1391169689Skan return false; 1392169689Skan 1393169689Skan gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 1394169689Skan 1395169689Skan if (TREE_CODE (stmt) != MODIFY_EXPR) 1396169689Skan return false; 1397169689Skan 1398169689Skan scalar_dest = TREE_OPERAND (stmt, 0); 1399169689Skan if (TREE_CODE (scalar_dest) != SSA_NAME) 1400169689Skan return false; 1401169689Skan 1402169689Skan op = TREE_OPERAND (stmt, 1); 1403169689Skan if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 1404169689Skan { 1405169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1406169689Skan fprintf (vect_dump, "use not simple."); 1407169689Skan return false; 1408169689Skan } 1409169689Skan 1410169689Skan if (!vec_stmt) /* transformation not required. */ 1411169689Skan { 1412169689Skan STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 1413169689Skan return true; 1414169689Skan } 1415169689Skan 1416169689Skan /** Transform. **/ 1417169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1418169689Skan fprintf (vect_dump, "transform assignment."); 1419169689Skan 1420169689Skan /* Handle def. */ 1421169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 1422169689Skan 1423169689Skan /* Handle use. */ 1424169689Skan op = TREE_OPERAND (stmt, 1); 1425169689Skan vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL); 1426169689Skan 1427169689Skan /* Arguments are ready. create the new vector stmt. */ 1428169689Skan *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); 1429169689Skan new_temp = make_ssa_name (vec_dest, *vec_stmt); 1430169689Skan TREE_OPERAND (*vec_stmt, 0) = new_temp; 1431169689Skan vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1432169689Skan 1433169689Skan return true; 1434169689Skan} 1435169689Skan 1436169689Skan 1437169689Skan/* Function vect_min_worthwhile_factor. 1438169689Skan 1439169689Skan For a loop where we could vectorize the operation indicated by CODE, 1440169689Skan return the minimum vectorization factor that makes it worthwhile 1441169689Skan to use generic vectors. */ 1442169689Skanstatic int 1443169689Skanvect_min_worthwhile_factor (enum tree_code code) 1444169689Skan{ 1445169689Skan switch (code) 1446169689Skan { 1447169689Skan case PLUS_EXPR: 1448169689Skan case MINUS_EXPR: 1449169689Skan case NEGATE_EXPR: 1450169689Skan return 4; 1451169689Skan 1452169689Skan case BIT_AND_EXPR: 1453169689Skan case BIT_IOR_EXPR: 1454169689Skan case BIT_XOR_EXPR: 1455169689Skan case BIT_NOT_EXPR: 1456169689Skan return 2; 1457169689Skan 1458169689Skan default: 1459169689Skan return INT_MAX; 1460169689Skan } 1461169689Skan} 1462169689Skan 1463169689Skan 1464169689Skan/* Function vectorizable_operation. 1465169689Skan 1466169689Skan Check if STMT performs a binary or unary operation that can be vectorized. 1467169689Skan If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1468169689Skan stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1469169689Skan Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1470169689Skan 1471169689Skanbool 1472169689Skanvectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1473169689Skan{ 1474169689Skan tree vec_dest; 1475169689Skan tree scalar_dest; 1476169689Skan tree operation; 1477169689Skan tree op0, op1 = NULL; 1478169689Skan tree vec_oprnd0, vec_oprnd1=NULL; 1479169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1480169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1481169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1482169689Skan int i; 1483169689Skan enum tree_code code; 1484169689Skan enum machine_mode vec_mode; 1485169689Skan tree new_temp; 1486169689Skan int op_type; 1487169689Skan tree op; 1488169689Skan optab optab; 1489169689Skan int icode; 1490169689Skan enum machine_mode optab_op2_mode; 1491169689Skan tree def, def_stmt; 1492169689Skan enum vect_def_type dt; 1493169689Skan 1494169689Skan /* Is STMT a vectorizable binary/unary operation? */ 1495169689Skan if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1496169689Skan return false; 1497169689Skan 1498169689Skan gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 1499169689Skan 1500169689Skan if (STMT_VINFO_LIVE_P (stmt_info)) 1501169689Skan { 1502169689Skan /* FORNOW: not yet supported. */ 1503169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1504169689Skan fprintf (vect_dump, "value used after loop."); 1505169689Skan return false; 1506169689Skan } 1507169689Skan 1508169689Skan if (TREE_CODE (stmt) != MODIFY_EXPR) 1509169689Skan return false; 1510169689Skan 1511169689Skan if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) 1512169689Skan return false; 1513169689Skan 1514169689Skan operation = TREE_OPERAND (stmt, 1); 1515169689Skan code = TREE_CODE (operation); 1516169689Skan optab = optab_for_tree_code (code, vectype); 1517169689Skan 1518169689Skan /* Support only unary or binary operations. */ 1519169689Skan op_type = TREE_CODE_LENGTH (code); 1520169689Skan if (op_type != unary_op && op_type != binary_op) 1521169689Skan { 1522169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1523169689Skan fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type); 1524169689Skan return false; 1525169689Skan } 1526169689Skan 1527169689Skan for (i = 0; i < op_type; i++) 1528169689Skan { 1529169689Skan op = TREE_OPERAND (operation, i); 1530169689Skan if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 1531169689Skan { 1532169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1533169689Skan fprintf (vect_dump, "use not simple."); 1534169689Skan return false; 1535169689Skan } 1536169689Skan } 1537169689Skan 1538169689Skan /* Supportable by target? */ 1539169689Skan if (!optab) 1540169689Skan { 1541169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1542169689Skan fprintf (vect_dump, "no optab."); 1543169689Skan return false; 1544169689Skan } 1545169689Skan vec_mode = TYPE_MODE (vectype); 1546169689Skan icode = (int) optab->handlers[(int) vec_mode].insn_code; 1547169689Skan if (icode == CODE_FOR_nothing) 1548169689Skan { 1549169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1550169689Skan fprintf (vect_dump, "op not supported by target."); 1551169689Skan if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 1552169689Skan || LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1553169689Skan < vect_min_worthwhile_factor (code)) 1554169689Skan return false; 1555169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1556169689Skan fprintf (vect_dump, "proceeding using word mode."); 1557169689Skan } 1558169689Skan 1559169689Skan /* Worthwhile without SIMD support? */ 1560169689Skan if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 1561169689Skan && LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1562169689Skan < vect_min_worthwhile_factor (code)) 1563169689Skan { 1564169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1565169689Skan fprintf (vect_dump, "not worthwhile without SIMD support."); 1566169689Skan return false; 1567169689Skan } 1568169689Skan 1569169689Skan if (code == LSHIFT_EXPR || code == RSHIFT_EXPR) 1570169689Skan { 1571169689Skan /* FORNOW: not yet supported. */ 1572169689Skan if (!VECTOR_MODE_P (vec_mode)) 1573169689Skan return false; 1574169689Skan 1575169689Skan /* Invariant argument is needed for a vector shift 1576169689Skan by a scalar shift operand. */ 1577169689Skan optab_op2_mode = insn_data[icode].operand[2].mode; 1578169689Skan if (! (VECTOR_MODE_P (optab_op2_mode) 1579169689Skan || dt == vect_constant_def 1580169689Skan || dt == vect_invariant_def)) 1581169689Skan { 1582169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1583169689Skan fprintf (vect_dump, "operand mode requires invariant argument."); 1584169689Skan return false; 1585169689Skan } 1586169689Skan } 1587169689Skan 1588169689Skan if (!vec_stmt) /* transformation not required. */ 1589169689Skan { 1590169689Skan STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; 1591169689Skan return true; 1592169689Skan } 1593169689Skan 1594169689Skan /** Transform. **/ 1595169689Skan 1596169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1597169689Skan fprintf (vect_dump, "transform binary/unary operation."); 1598169689Skan 1599169689Skan /* Handle def. */ 1600169689Skan scalar_dest = TREE_OPERAND (stmt, 0); 1601169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 1602169689Skan 1603169689Skan /* Handle uses. */ 1604169689Skan op0 = TREE_OPERAND (operation, 0); 1605169689Skan vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); 1606169689Skan 1607169689Skan if (op_type == binary_op) 1608169689Skan { 1609169689Skan op1 = TREE_OPERAND (operation, 1); 1610169689Skan 1611169689Skan if (code == LSHIFT_EXPR || code == RSHIFT_EXPR) 1612169689Skan { 1613169689Skan /* Vector shl and shr insn patterns can be defined with 1614169689Skan scalar operand 2 (shift operand). In this case, use 1615169689Skan constant or loop invariant op1 directly, without 1616169689Skan extending it to vector mode first. */ 1617169689Skan 1618169689Skan optab_op2_mode = insn_data[icode].operand[2].mode; 1619169689Skan if (!VECTOR_MODE_P (optab_op2_mode)) 1620169689Skan { 1621169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1622169689Skan fprintf (vect_dump, "operand 1 using scalar mode."); 1623169689Skan vec_oprnd1 = op1; 1624169689Skan } 1625169689Skan } 1626169689Skan 1627169689Skan if (!vec_oprnd1) 1628169689Skan vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); 1629169689Skan } 1630169689Skan 1631169689Skan /* Arguments are ready. create the new vector stmt. */ 1632169689Skan 1633169689Skan if (op_type == binary_op) 1634169689Skan *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 1635169689Skan build2 (code, vectype, vec_oprnd0, vec_oprnd1)); 1636169689Skan else 1637169689Skan *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 1638169689Skan build1 (code, vectype, vec_oprnd0)); 1639169689Skan new_temp = make_ssa_name (vec_dest, *vec_stmt); 1640169689Skan TREE_OPERAND (*vec_stmt, 0) = new_temp; 1641169689Skan vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1642169689Skan 1643169689Skan return true; 1644169689Skan} 1645169689Skan 1646169689Skan 1647169689Skan/* Function vectorizable_store. 1648169689Skan 1649169689Skan Check if STMT defines a non scalar data-ref (array/pointer/structure) that 1650169689Skan can be vectorized. 1651169689Skan If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1652169689Skan stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1653169689Skan Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1654169689Skan 1655169689Skanbool 1656169689Skanvectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1657169689Skan{ 1658169689Skan tree scalar_dest; 1659169689Skan tree data_ref; 1660169689Skan tree op; 1661169689Skan tree vec_oprnd1; 1662169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1663169689Skan struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 1664169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1665169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1666169689Skan enum machine_mode vec_mode; 1667169689Skan tree dummy; 1668169689Skan enum dr_alignment_support alignment_support_cheme; 1669169689Skan ssa_op_iter iter; 1670169689Skan tree def, def_stmt; 1671169689Skan enum vect_def_type dt; 1672169689Skan 1673169689Skan /* Is vectorizable store? */ 1674169689Skan 1675169689Skan if (TREE_CODE (stmt) != MODIFY_EXPR) 1676169689Skan return false; 1677169689Skan 1678169689Skan scalar_dest = TREE_OPERAND (stmt, 0); 1679169689Skan if (TREE_CODE (scalar_dest) != ARRAY_REF 1680169689Skan && TREE_CODE (scalar_dest) != INDIRECT_REF) 1681169689Skan return false; 1682169689Skan 1683169689Skan op = TREE_OPERAND (stmt, 1); 1684169689Skan if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 1685169689Skan { 1686169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1687169689Skan fprintf (vect_dump, "use not simple."); 1688169689Skan return false; 1689169689Skan } 1690169689Skan 1691169689Skan vec_mode = TYPE_MODE (vectype); 1692169689Skan /* FORNOW. In some cases can vectorize even if data-type not supported 1693169689Skan (e.g. - array initialization with 0). */ 1694169689Skan if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) 1695169689Skan return false; 1696169689Skan 1697169689Skan if (!STMT_VINFO_DATA_REF (stmt_info)) 1698169689Skan return false; 1699169689Skan 1700169689Skan 1701169689Skan if (!vec_stmt) /* transformation not required. */ 1702169689Skan { 1703169689Skan STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; 1704169689Skan return true; 1705169689Skan } 1706169689Skan 1707169689Skan /** Transform. **/ 1708169689Skan 1709169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1710169689Skan fprintf (vect_dump, "transform store"); 1711169689Skan 1712169689Skan alignment_support_cheme = vect_supportable_dr_alignment (dr); 1713169689Skan gcc_assert (alignment_support_cheme); 1714169689Skan gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */ 1715169689Skan 1716169689Skan /* Handle use - get the vectorized def from the defining stmt. */ 1717169689Skan vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL); 1718169689Skan 1719169689Skan /* Handle def. */ 1720169689Skan /* FORNOW: make sure the data reference is aligned. */ 1721169689Skan vect_align_data_ref (stmt); 1722169689Skan data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); 1723169689Skan data_ref = build_fold_indirect_ref (data_ref); 1724169689Skan 1725169689Skan /* Arguments are ready. create the new vector stmt. */ 1726169689Skan *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1); 1727169689Skan vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1728169689Skan 1729169689Skan /* Copy the V_MAY_DEFS representing the aliasing of the original array 1730169689Skan element's definition to the vector's definition then update the 1731169689Skan defining statement. The original is being deleted so the same 1732169689Skan SSA_NAMEs can be used. */ 1733169689Skan copy_virtual_operands (*vec_stmt, stmt); 1734169689Skan 1735169689Skan FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF) 1736169689Skan { 1737169689Skan SSA_NAME_DEF_STMT (def) = *vec_stmt; 1738169689Skan 1739169689Skan /* If this virtual def has a use outside the loop and a loop peel is 1740169689Skan performed then the def may be renamed by the peel. Mark it for 1741169689Skan renaming so the later use will also be renamed. */ 1742169689Skan mark_sym_for_renaming (SSA_NAME_VAR (def)); 1743169689Skan } 1744169689Skan 1745169689Skan return true; 1746169689Skan} 1747169689Skan 1748169689Skan 1749169689Skan/* vectorizable_load. 1750169689Skan 1751169689Skan Check if STMT reads a non scalar data-ref (array/pointer/structure) that 1752169689Skan can be vectorized. 1753169689Skan If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1754169689Skan stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1755169689Skan Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1756169689Skan 1757169689Skanbool 1758169689Skanvectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1759169689Skan{ 1760169689Skan tree scalar_dest; 1761169689Skan tree vec_dest = NULL; 1762169689Skan tree data_ref = NULL; 1763169689Skan tree op; 1764169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1765169689Skan struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 1766169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1767169689Skan tree new_temp; 1768169689Skan int mode; 1769169689Skan tree init_addr; 1770169689Skan tree new_stmt; 1771169689Skan tree dummy; 1772169689Skan basic_block new_bb; 1773169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1774169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1775169689Skan edge pe = loop_preheader_edge (loop); 1776169689Skan enum dr_alignment_support alignment_support_cheme; 1777169689Skan 1778169689Skan /* Is vectorizable load? */ 1779169689Skan if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1780169689Skan return false; 1781169689Skan 1782169689Skan gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 1783169689Skan 1784169689Skan if (STMT_VINFO_LIVE_P (stmt_info)) 1785169689Skan { 1786169689Skan /* FORNOW: not yet supported. */ 1787169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1788169689Skan fprintf (vect_dump, "value used after loop."); 1789169689Skan return false; 1790169689Skan } 1791169689Skan 1792169689Skan if (TREE_CODE (stmt) != MODIFY_EXPR) 1793169689Skan return false; 1794169689Skan 1795169689Skan scalar_dest = TREE_OPERAND (stmt, 0); 1796169689Skan if (TREE_CODE (scalar_dest) != SSA_NAME) 1797169689Skan return false; 1798169689Skan 1799169689Skan op = TREE_OPERAND (stmt, 1); 1800169689Skan if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF) 1801169689Skan return false; 1802169689Skan 1803169689Skan if (!STMT_VINFO_DATA_REF (stmt_info)) 1804169689Skan return false; 1805169689Skan 1806169689Skan mode = (int) TYPE_MODE (vectype); 1807169689Skan 1808169689Skan /* FORNOW. In some cases can vectorize even if data-type not supported 1809169689Skan (e.g. - data copies). */ 1810169689Skan if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing) 1811169689Skan { 1812169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1813169689Skan fprintf (vect_dump, "Aligned load, but unsupported type."); 1814169689Skan return false; 1815169689Skan } 1816169689Skan 1817169689Skan if (!vec_stmt) /* transformation not required. */ 1818169689Skan { 1819169689Skan STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 1820169689Skan return true; 1821169689Skan } 1822169689Skan 1823169689Skan /** Transform. **/ 1824169689Skan 1825169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1826169689Skan fprintf (vect_dump, "transform load."); 1827169689Skan 1828169689Skan alignment_support_cheme = vect_supportable_dr_alignment (dr); 1829169689Skan gcc_assert (alignment_support_cheme); 1830169689Skan 1831169689Skan if (alignment_support_cheme == dr_aligned 1832169689Skan || alignment_support_cheme == dr_unaligned_supported) 1833169689Skan { 1834169689Skan /* Create: 1835169689Skan p = initial_addr; 1836169689Skan indx = 0; 1837169689Skan loop { 1838169689Skan vec_dest = *(p); 1839169689Skan indx = indx + 1; 1840169689Skan } 1841169689Skan */ 1842169689Skan 1843169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 1844169689Skan data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); 1845169689Skan if (aligned_access_p (dr)) 1846169689Skan data_ref = build_fold_indirect_ref (data_ref); 1847169689Skan else 1848169689Skan { 1849169689Skan int mis = DR_MISALIGNMENT (dr); 1850169689Skan tree tmis = (mis == -1 ? size_zero_node : size_int (mis)); 1851169689Skan tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT)); 1852169689Skan data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis); 1853169689Skan } 1854169689Skan new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); 1855169689Skan new_temp = make_ssa_name (vec_dest, new_stmt); 1856169689Skan TREE_OPERAND (new_stmt, 0) = new_temp; 1857169689Skan vect_finish_stmt_generation (stmt, new_stmt, bsi); 1858169689Skan copy_virtual_operands (new_stmt, stmt); 1859169689Skan } 1860169689Skan else if (alignment_support_cheme == dr_unaligned_software_pipeline) 1861169689Skan { 1862169689Skan /* Create: 1863169689Skan p1 = initial_addr; 1864169689Skan msq_init = *(floor(p1)) 1865169689Skan p2 = initial_addr + VS - 1; 1866169689Skan magic = have_builtin ? builtin_result : initial_address; 1867169689Skan indx = 0; 1868169689Skan loop { 1869169689Skan p2' = p2 + indx * vectype_size 1870169689Skan lsq = *(floor(p2')) 1871169689Skan vec_dest = realign_load (msq, lsq, magic) 1872169689Skan indx = indx + 1; 1873169689Skan msq = lsq; 1874169689Skan } 1875169689Skan */ 1876169689Skan 1877169689Skan tree offset; 1878169689Skan tree magic; 1879169689Skan tree phi_stmt; 1880169689Skan tree msq_init; 1881169689Skan tree msq, lsq; 1882169689Skan tree dataref_ptr; 1883169689Skan tree params; 1884169689Skan 1885169689Skan /* <1> Create msq_init = *(floor(p1)) in the loop preheader */ 1886169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 1887169689Skan data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, 1888169689Skan &init_addr, true); 1889169689Skan data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref); 1890169689Skan new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); 1891169689Skan new_temp = make_ssa_name (vec_dest, new_stmt); 1892169689Skan TREE_OPERAND (new_stmt, 0) = new_temp; 1893169689Skan new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); 1894169689Skan gcc_assert (!new_bb); 1895169689Skan msq_init = TREE_OPERAND (new_stmt, 0); 1896169689Skan copy_virtual_operands (new_stmt, stmt); 1897169689Skan update_vuses_to_preheader (new_stmt, loop); 1898169689Skan 1899169689Skan 1900169689Skan /* <2> Create lsq = *(floor(p2')) in the loop */ 1901169689Skan offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); 1902169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 1903169689Skan dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false); 1904169689Skan data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); 1905169689Skan new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); 1906169689Skan new_temp = make_ssa_name (vec_dest, new_stmt); 1907169689Skan TREE_OPERAND (new_stmt, 0) = new_temp; 1908169689Skan vect_finish_stmt_generation (stmt, new_stmt, bsi); 1909169689Skan lsq = TREE_OPERAND (new_stmt, 0); 1910169689Skan copy_virtual_operands (new_stmt, stmt); 1911169689Skan 1912169689Skan 1913169689Skan /* <3> */ 1914169689Skan if (targetm.vectorize.builtin_mask_for_load) 1915169689Skan { 1916169689Skan /* Create permutation mask, if required, in loop preheader. */ 1917169689Skan tree builtin_decl; 1918169689Skan params = build_tree_list (NULL_TREE, init_addr); 1919169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 1920169689Skan builtin_decl = targetm.vectorize.builtin_mask_for_load (); 1921169689Skan new_stmt = build_function_call_expr (builtin_decl, params); 1922169689Skan new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); 1923169689Skan new_temp = make_ssa_name (vec_dest, new_stmt); 1924169689Skan TREE_OPERAND (new_stmt, 0) = new_temp; 1925169689Skan new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); 1926169689Skan gcc_assert (!new_bb); 1927169689Skan magic = TREE_OPERAND (new_stmt, 0); 1928169689Skan 1929169689Skan /* The result of the CALL_EXPR to this builtin is determined from 1930169689Skan the value of the parameter and no global variables are touched 1931169689Skan which makes the builtin a "const" function. Requiring the 1932169689Skan builtin to have the "const" attribute makes it unnecessary 1933169689Skan to call mark_call_clobbered. */ 1934169689Skan gcc_assert (TREE_READONLY (builtin_decl)); 1935169689Skan } 1936169689Skan else 1937169689Skan { 1938169689Skan /* Use current address instead of init_addr for reduced reg pressure. 1939169689Skan */ 1940169689Skan magic = dataref_ptr; 1941169689Skan } 1942169689Skan 1943169689Skan 1944169689Skan /* <4> Create msq = phi <msq_init, lsq> in loop */ 1945169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 1946169689Skan msq = make_ssa_name (vec_dest, NULL_TREE); 1947169689Skan phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */ 1948169689Skan SSA_NAME_DEF_STMT (msq) = phi_stmt; 1949169689Skan add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop)); 1950169689Skan add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop)); 1951169689Skan 1952169689Skan 1953169689Skan /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */ 1954169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 1955169689Skan new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic); 1956169689Skan new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); 1957169689Skan new_temp = make_ssa_name (vec_dest, new_stmt); 1958169689Skan TREE_OPERAND (new_stmt, 0) = new_temp; 1959169689Skan vect_finish_stmt_generation (stmt, new_stmt, bsi); 1960169689Skan } 1961169689Skan else 1962169689Skan gcc_unreachable (); 1963169689Skan 1964169689Skan *vec_stmt = new_stmt; 1965169689Skan return true; 1966169689Skan} 1967169689Skan 1968169689Skan 1969169689Skan/* Function vectorizable_live_operation. 1970169689Skan 1971169689Skan STMT computes a value that is used outside the loop. Check if 1972169689Skan it can be supported. */ 1973169689Skan 1974169689Skanbool 1975169689Skanvectorizable_live_operation (tree stmt, 1976169689Skan block_stmt_iterator *bsi ATTRIBUTE_UNUSED, 1977169689Skan tree *vec_stmt ATTRIBUTE_UNUSED) 1978169689Skan{ 1979169689Skan tree operation; 1980169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1981169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1982169689Skan int i; 1983169689Skan enum tree_code code; 1984169689Skan int op_type; 1985169689Skan tree op; 1986169689Skan tree def, def_stmt; 1987169689Skan enum vect_def_type dt; 1988169689Skan 1989169689Skan if (!STMT_VINFO_LIVE_P (stmt_info)) 1990169689Skan return false; 1991169689Skan 1992169689Skan if (TREE_CODE (stmt) != MODIFY_EXPR) 1993169689Skan return false; 1994169689Skan 1995169689Skan if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) 1996169689Skan return false; 1997169689Skan 1998169689Skan operation = TREE_OPERAND (stmt, 1); 1999169689Skan code = TREE_CODE (operation); 2000169689Skan 2001169689Skan op_type = TREE_CODE_LENGTH (code); 2002169689Skan 2003169689Skan /* FORNOW: support only if all uses are invariant. This means 2004169689Skan that the scalar operations can remain in place, unvectorized. 2005169689Skan The original last scalar value that they compute will be used. */ 2006169689Skan 2007169689Skan for (i = 0; i < op_type; i++) 2008169689Skan { 2009169689Skan op = TREE_OPERAND (operation, i); 2010169689Skan if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 2011169689Skan { 2012169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2013169689Skan fprintf (vect_dump, "use not simple."); 2014169689Skan return false; 2015169689Skan } 2016169689Skan 2017169689Skan if (dt != vect_invariant_def && dt != vect_constant_def) 2018169689Skan return false; 2019169689Skan } 2020169689Skan 2021169689Skan /* No transformation is required for the cases we currently support. */ 2022169689Skan return true; 2023169689Skan} 2024169689Skan 2025169689Skan 2026169689Skan/* Function vect_is_simple_cond. 2027169689Skan 2028169689Skan Input: 2029169689Skan LOOP - the loop that is being vectorized. 2030169689Skan COND - Condition that is checked for simple use. 2031169689Skan 2032169689Skan Returns whether a COND can be vectorized. Checks whether 2033169689Skan condition operands are supportable using vec_is_simple_use. */ 2034169689Skan 2035169689Skanstatic bool 2036169689Skanvect_is_simple_cond (tree cond, loop_vec_info loop_vinfo) 2037169689Skan{ 2038169689Skan tree lhs, rhs; 2039169689Skan tree def; 2040169689Skan enum vect_def_type dt; 2041169689Skan 2042169689Skan if (!COMPARISON_CLASS_P (cond)) 2043169689Skan return false; 2044169689Skan 2045169689Skan lhs = TREE_OPERAND (cond, 0); 2046169689Skan rhs = TREE_OPERAND (cond, 1); 2047169689Skan 2048169689Skan if (TREE_CODE (lhs) == SSA_NAME) 2049169689Skan { 2050169689Skan tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); 2051169689Skan if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt)) 2052169689Skan return false; 2053169689Skan } 2054169689Skan else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST) 2055169689Skan return false; 2056169689Skan 2057169689Skan if (TREE_CODE (rhs) == SSA_NAME) 2058169689Skan { 2059169689Skan tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); 2060169689Skan if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt)) 2061169689Skan return false; 2062169689Skan } 2063169689Skan else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST) 2064169689Skan return false; 2065169689Skan 2066169689Skan return true; 2067169689Skan} 2068169689Skan 2069169689Skan/* vectorizable_condition. 2070169689Skan 2071169689Skan Check if STMT is conditional modify expression that can be vectorized. 2072169689Skan If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2073169689Skan stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it 2074169689Skan at BSI. 2075169689Skan 2076169689Skan Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2077169689Skan 2078169689Skanbool 2079169689Skanvectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 2080169689Skan{ 2081169689Skan tree scalar_dest = NULL_TREE; 2082169689Skan tree vec_dest = NULL_TREE; 2083169689Skan tree op = NULL_TREE; 2084169689Skan tree cond_expr, then_clause, else_clause; 2085169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2086169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2087169689Skan tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause; 2088169689Skan tree vec_compare, vec_cond_expr; 2089169689Skan tree new_temp; 2090169689Skan loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2091169689Skan enum machine_mode vec_mode; 2092169689Skan tree def; 2093169689Skan enum vect_def_type dt; 2094169689Skan 2095169689Skan if (!STMT_VINFO_RELEVANT_P (stmt_info)) 2096169689Skan return false; 2097169689Skan 2098169689Skan gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 2099169689Skan 2100169689Skan if (STMT_VINFO_LIVE_P (stmt_info)) 2101169689Skan { 2102169689Skan /* FORNOW: not yet supported. */ 2103169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2104169689Skan fprintf (vect_dump, "value used after loop."); 2105169689Skan return false; 2106169689Skan } 2107169689Skan 2108169689Skan if (TREE_CODE (stmt) != MODIFY_EXPR) 2109169689Skan return false; 2110169689Skan 2111169689Skan op = TREE_OPERAND (stmt, 1); 2112169689Skan 2113169689Skan if (TREE_CODE (op) != COND_EXPR) 2114169689Skan return false; 2115169689Skan 2116169689Skan cond_expr = TREE_OPERAND (op, 0); 2117169689Skan then_clause = TREE_OPERAND (op, 1); 2118169689Skan else_clause = TREE_OPERAND (op, 2); 2119169689Skan 2120169689Skan if (!vect_is_simple_cond (cond_expr, loop_vinfo)) 2121169689Skan return false; 2122169689Skan 2123169689Skan /* We do not handle two different vector types for the condition 2124169689Skan and the values. */ 2125169689Skan if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype)) 2126169689Skan return false; 2127169689Skan 2128169689Skan if (TREE_CODE (then_clause) == SSA_NAME) 2129169689Skan { 2130169689Skan tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause); 2131169689Skan if (!vect_is_simple_use (then_clause, loop_vinfo, 2132169689Skan &then_def_stmt, &def, &dt)) 2133169689Skan return false; 2134169689Skan } 2135169689Skan else if (TREE_CODE (then_clause) != INTEGER_CST 2136169689Skan && TREE_CODE (then_clause) != REAL_CST) 2137169689Skan return false; 2138169689Skan 2139169689Skan if (TREE_CODE (else_clause) == SSA_NAME) 2140169689Skan { 2141169689Skan tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause); 2142169689Skan if (!vect_is_simple_use (else_clause, loop_vinfo, 2143169689Skan &else_def_stmt, &def, &dt)) 2144169689Skan return false; 2145169689Skan } 2146169689Skan else if (TREE_CODE (else_clause) != INTEGER_CST 2147169689Skan && TREE_CODE (else_clause) != REAL_CST) 2148169689Skan return false; 2149169689Skan 2150169689Skan 2151169689Skan vec_mode = TYPE_MODE (vectype); 2152169689Skan 2153169689Skan if (!vec_stmt) 2154169689Skan { 2155169689Skan STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; 2156169689Skan return expand_vec_cond_expr_p (op, vec_mode); 2157169689Skan } 2158169689Skan 2159169689Skan /* Transform */ 2160169689Skan 2161169689Skan /* Handle def. */ 2162169689Skan scalar_dest = TREE_OPERAND (stmt, 0); 2163169689Skan vec_dest = vect_create_destination_var (scalar_dest, vectype); 2164169689Skan 2165169689Skan /* Handle cond expr. */ 2166169689Skan vec_cond_lhs = 2167169689Skan vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL); 2168169689Skan vec_cond_rhs = 2169169689Skan vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL); 2170169689Skan vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL); 2171169689Skan vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL); 2172169689Skan 2173169689Skan /* Arguments are ready. create the new vector stmt. */ 2174169689Skan vec_compare = build2 (TREE_CODE (cond_expr), vectype, 2175169689Skan vec_cond_lhs, vec_cond_rhs); 2176169689Skan vec_cond_expr = build3 (VEC_COND_EXPR, vectype, 2177169689Skan vec_compare, vec_then_clause, vec_else_clause); 2178169689Skan 2179169689Skan *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr); 2180169689Skan new_temp = make_ssa_name (vec_dest, *vec_stmt); 2181169689Skan TREE_OPERAND (*vec_stmt, 0) = new_temp; 2182169689Skan vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 2183169689Skan 2184169689Skan return true; 2185169689Skan} 2186169689Skan 2187169689Skan/* Function vect_transform_stmt. 2188169689Skan 2189169689Skan Create a vectorized stmt to replace STMT, and insert it at BSI. */ 2190169689Skan 2191169689Skanbool 2192169689Skanvect_transform_stmt (tree stmt, block_stmt_iterator *bsi) 2193169689Skan{ 2194169689Skan bool is_store = false; 2195169689Skan tree vec_stmt = NULL_TREE; 2196169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2197169689Skan tree orig_stmt_in_pattern; 2198169689Skan bool done; 2199169689Skan 2200169689Skan if (STMT_VINFO_RELEVANT_P (stmt_info)) 2201169689Skan { 2202169689Skan switch (STMT_VINFO_TYPE (stmt_info)) 2203169689Skan { 2204169689Skan case op_vec_info_type: 2205169689Skan done = vectorizable_operation (stmt, bsi, &vec_stmt); 2206169689Skan gcc_assert (done); 2207169689Skan break; 2208169689Skan 2209169689Skan case assignment_vec_info_type: 2210169689Skan done = vectorizable_assignment (stmt, bsi, &vec_stmt); 2211169689Skan gcc_assert (done); 2212169689Skan break; 2213169689Skan 2214169689Skan case load_vec_info_type: 2215169689Skan done = vectorizable_load (stmt, bsi, &vec_stmt); 2216169689Skan gcc_assert (done); 2217169689Skan break; 2218169689Skan 2219169689Skan case store_vec_info_type: 2220169689Skan done = vectorizable_store (stmt, bsi, &vec_stmt); 2221169689Skan gcc_assert (done); 2222169689Skan is_store = true; 2223169689Skan break; 2224169689Skan 2225169689Skan case condition_vec_info_type: 2226169689Skan done = vectorizable_condition (stmt, bsi, &vec_stmt); 2227169689Skan gcc_assert (done); 2228169689Skan break; 2229169689Skan 2230169689Skan default: 2231169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2232169689Skan fprintf (vect_dump, "stmt not supported."); 2233169689Skan gcc_unreachable (); 2234169689Skan } 2235169689Skan 2236169689Skan gcc_assert (vec_stmt); 2237169689Skan STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 2238169689Skan orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); 2239169689Skan if (orig_stmt_in_pattern) 2240169689Skan { 2241169689Skan stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern); 2242169689Skan if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 2243169689Skan { 2244169689Skan gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); 2245169689Skan 2246169689Skan /* STMT was inserted by the vectorizer to replace a computation 2247169689Skan idiom. ORIG_STMT_IN_PATTERN is a stmt in the original 2248169689Skan sequence that computed this idiom. We need to record a pointer 2249169689Skan to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN. See more 2250169689Skan detail in the documentation of vect_pattern_recog. */ 2251169689Skan 2252169689Skan STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; 2253169689Skan } 2254169689Skan } 2255169689Skan } 2256169689Skan 2257169689Skan if (STMT_VINFO_LIVE_P (stmt_info)) 2258169689Skan { 2259169689Skan switch (STMT_VINFO_TYPE (stmt_info)) 2260169689Skan { 2261169689Skan case reduc_vec_info_type: 2262169689Skan done = vectorizable_reduction (stmt, bsi, &vec_stmt); 2263169689Skan gcc_assert (done); 2264169689Skan break; 2265169689Skan 2266169689Skan default: 2267169689Skan done = vectorizable_live_operation (stmt, bsi, &vec_stmt); 2268169689Skan gcc_assert (done); 2269169689Skan } 2270169689Skan 2271169689Skan if (vec_stmt) 2272169689Skan { 2273169689Skan gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info)); 2274169689Skan STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 2275169689Skan } 2276169689Skan } 2277169689Skan 2278169689Skan return is_store; 2279169689Skan} 2280169689Skan 2281169689Skan 2282169689Skan/* This function builds ni_name = number of iterations loop executes 2283169689Skan on the loop preheader. */ 2284169689Skan 2285169689Skanstatic tree 2286169689Skanvect_build_loop_niters (loop_vec_info loop_vinfo) 2287169689Skan{ 2288169689Skan tree ni_name, stmt, var; 2289169689Skan edge pe; 2290169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2291169689Skan tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); 2292169689Skan 2293169689Skan var = create_tmp_var (TREE_TYPE (ni), "niters"); 2294169689Skan add_referenced_var (var); 2295169689Skan ni_name = force_gimple_operand (ni, &stmt, false, var); 2296169689Skan 2297169689Skan pe = loop_preheader_edge (loop); 2298169689Skan if (stmt) 2299169689Skan { 2300169689Skan basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2301169689Skan gcc_assert (!new_bb); 2302169689Skan } 2303169689Skan 2304169689Skan return ni_name; 2305169689Skan} 2306169689Skan 2307169689Skan 2308169689Skan/* This function generates the following statements: 2309169689Skan 2310169689Skan ni_name = number of iterations loop executes 2311169689Skan ratio = ni_name / vf 2312169689Skan ratio_mult_vf_name = ratio * vf 2313169689Skan 2314169689Skan and places them at the loop preheader edge. */ 2315169689Skan 2316169689Skanstatic void 2317169689Skanvect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, 2318169689Skan tree *ni_name_ptr, 2319169689Skan tree *ratio_mult_vf_name_ptr, 2320169689Skan tree *ratio_name_ptr) 2321169689Skan{ 2322169689Skan 2323169689Skan edge pe; 2324169689Skan basic_block new_bb; 2325169689Skan tree stmt, ni_name; 2326169689Skan tree var; 2327169689Skan tree ratio_name; 2328169689Skan tree ratio_mult_vf_name; 2329169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2330169689Skan tree ni = LOOP_VINFO_NITERS (loop_vinfo); 2331169689Skan int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2332169689Skan tree log_vf; 2333169689Skan 2334169689Skan pe = loop_preheader_edge (loop); 2335169689Skan 2336169689Skan /* Generate temporary variable that contains 2337169689Skan number of iterations loop executes. */ 2338169689Skan 2339169689Skan ni_name = vect_build_loop_niters (loop_vinfo); 2340169689Skan log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); 2341169689Skan 2342169689Skan /* Create: ratio = ni >> log2(vf) */ 2343169689Skan 2344169689Skan var = create_tmp_var (TREE_TYPE (ni), "bnd"); 2345169689Skan add_referenced_var (var); 2346169689Skan ratio_name = make_ssa_name (var, NULL_TREE); 2347169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name, 2348169689Skan build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf)); 2349169689Skan SSA_NAME_DEF_STMT (ratio_name) = stmt; 2350169689Skan 2351169689Skan pe = loop_preheader_edge (loop); 2352169689Skan new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2353169689Skan gcc_assert (!new_bb); 2354169689Skan 2355169689Skan /* Create: ratio_mult_vf = ratio << log2 (vf). */ 2356169689Skan 2357169689Skan var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); 2358169689Skan add_referenced_var (var); 2359169689Skan ratio_mult_vf_name = make_ssa_name (var, NULL_TREE); 2360169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name, 2361169689Skan build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf)); 2362169689Skan SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt; 2363169689Skan 2364169689Skan pe = loop_preheader_edge (loop); 2365169689Skan new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2366169689Skan gcc_assert (!new_bb); 2367169689Skan 2368169689Skan *ni_name_ptr = ni_name; 2369169689Skan *ratio_mult_vf_name_ptr = ratio_mult_vf_name; 2370169689Skan *ratio_name_ptr = ratio_name; 2371169689Skan 2372169689Skan return; 2373169689Skan} 2374169689Skan 2375169689Skan 2376169689Skan/* Function update_vuses_to_preheader. 2377169689Skan 2378169689Skan Input: 2379169689Skan STMT - a statement with potential VUSEs. 2380169689Skan LOOP - the loop whose preheader will contain STMT. 2381169689Skan 2382169689Skan It's possible to vectorize a loop even though an SSA_NAME from a VUSE 2383169689Skan appears to be defined in a V_MAY_DEF in another statement in a loop. 2384169689Skan One such case is when the VUSE is at the dereference of a __restricted__ 2385169689Skan pointer in a load and the V_MAY_DEF is at the dereference of a different 2386169689Skan __restricted__ pointer in a store. Vectorization may result in 2387169689Skan copy_virtual_uses being called to copy the problematic VUSE to a new 2388169689Skan statement that is being inserted in the loop preheader. This procedure 2389169689Skan is called to change the SSA_NAME in the new statement's VUSE from the 2390169689Skan SSA_NAME updated in the loop to the related SSA_NAME available on the 2391169689Skan path entering the loop. 2392169689Skan 2393169689Skan When this function is called, we have the following situation: 2394169689Skan 2395169689Skan # vuse <name1> 2396169689Skan S1: vload 2397169689Skan do { 2398169689Skan # name1 = phi < name0 , name2> 2399169689Skan 2400169689Skan # vuse <name1> 2401169689Skan S2: vload 2402169689Skan 2403169689Skan # name2 = vdef <name1> 2404169689Skan S3: vstore 2405169689Skan 2406169689Skan }while... 2407169689Skan 2408169689Skan Stmt S1 was created in the loop preheader block as part of misaligned-load 2409169689Skan handling. This function fixes the name of the vuse of S1 from 'name1' to 2410169689Skan 'name0'. */ 2411169689Skan 2412169689Skanstatic void 2413169689Skanupdate_vuses_to_preheader (tree stmt, struct loop *loop) 2414169689Skan{ 2415169689Skan basic_block header_bb = loop->header; 2416169689Skan edge preheader_e = loop_preheader_edge (loop); 2417169689Skan ssa_op_iter iter; 2418169689Skan use_operand_p use_p; 2419169689Skan 2420169689Skan FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE) 2421169689Skan { 2422169689Skan tree ssa_name = USE_FROM_PTR (use_p); 2423169689Skan tree def_stmt = SSA_NAME_DEF_STMT (ssa_name); 2424169689Skan tree name_var = SSA_NAME_VAR (ssa_name); 2425169689Skan basic_block bb = bb_for_stmt (def_stmt); 2426169689Skan 2427169689Skan /* For a use before any definitions, def_stmt is a NOP_EXPR. */ 2428169689Skan if (!IS_EMPTY_STMT (def_stmt) 2429169689Skan && flow_bb_inside_loop_p (loop, bb)) 2430169689Skan { 2431169689Skan /* If the block containing the statement defining the SSA_NAME 2432169689Skan is in the loop then it's necessary to find the definition 2433169689Skan outside the loop using the PHI nodes of the header. */ 2434169689Skan tree phi; 2435169689Skan bool updated = false; 2436169689Skan 2437169689Skan for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi)) 2438169689Skan { 2439169689Skan if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var) 2440169689Skan { 2441169689Skan SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx)); 2442169689Skan updated = true; 2443169689Skan break; 2444169689Skan } 2445169689Skan } 2446169689Skan gcc_assert (updated); 2447169689Skan } 2448169689Skan } 2449169689Skan} 2450169689Skan 2451169689Skan 2452169689Skan/* Function vect_update_ivs_after_vectorizer. 2453169689Skan 2454169689Skan "Advance" the induction variables of LOOP to the value they should take 2455169689Skan after the execution of LOOP. This is currently necessary because the 2456169689Skan vectorizer does not handle induction variables that are used after the 2457169689Skan loop. Such a situation occurs when the last iterations of LOOP are 2458169689Skan peeled, because: 2459169689Skan 1. We introduced new uses after LOOP for IVs that were not originally used 2460169689Skan after LOOP: the IVs of LOOP are now used by an epilog loop. 2461169689Skan 2. LOOP is going to be vectorized; this means that it will iterate N/VF 2462169689Skan times, whereas the loop IVs should be bumped N times. 2463169689Skan 2464169689Skan Input: 2465169689Skan - LOOP - a loop that is going to be vectorized. The last few iterations 2466169689Skan of LOOP were peeled. 2467169689Skan - NITERS - the number of iterations that LOOP executes (before it is 2468169689Skan vectorized). i.e, the number of times the ivs should be bumped. 2469169689Skan - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path 2470169689Skan coming out from LOOP on which there are uses of the LOOP ivs 2471169689Skan (this is the path from LOOP->exit to epilog_loop->preheader). 2472169689Skan 2473169689Skan The new definitions of the ivs are placed in LOOP->exit. 2474169689Skan The phi args associated with the edge UPDATE_E in the bb 2475169689Skan UPDATE_E->dest are updated accordingly. 2476169689Skan 2477169689Skan Assumption 1: Like the rest of the vectorizer, this function assumes 2478169689Skan a single loop exit that has a single predecessor. 2479169689Skan 2480169689Skan Assumption 2: The phi nodes in the LOOP header and in update_bb are 2481169689Skan organized in the same order. 2482169689Skan 2483169689Skan Assumption 3: The access function of the ivs is simple enough (see 2484169689Skan vect_can_advance_ivs_p). This assumption will be relaxed in the future. 2485169689Skan 2486169689Skan Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path 2487169689Skan coming out of LOOP on which the ivs of LOOP are used (this is the path 2488169689Skan that leads to the epilog loop; other paths skip the epilog loop). This 2489169689Skan path starts with the edge UPDATE_E, and its destination (denoted update_bb) 2490169689Skan needs to have its phis updated. 2491169689Skan */ 2492169689Skan 2493169689Skanstatic void 2494169689Skanvect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, 2495169689Skan edge update_e) 2496169689Skan{ 2497169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2498169689Skan basic_block exit_bb = loop->single_exit->dest; 2499169689Skan tree phi, phi1; 2500169689Skan basic_block update_bb = update_e->dest; 2501169689Skan 2502169689Skan /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */ 2503169689Skan 2504169689Skan /* Make sure there exists a single-predecessor exit bb: */ 2505169689Skan gcc_assert (single_pred_p (exit_bb)); 2506169689Skan 2507169689Skan for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb); 2508169689Skan phi && phi1; 2509169689Skan phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1)) 2510169689Skan { 2511169689Skan tree access_fn = NULL; 2512169689Skan tree evolution_part; 2513169689Skan tree init_expr; 2514169689Skan tree step_expr; 2515169689Skan tree var, stmt, ni, ni_name; 2516169689Skan block_stmt_iterator last_bsi; 2517169689Skan 2518169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2519169689Skan { 2520169689Skan fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: "); 2521169689Skan print_generic_expr (vect_dump, phi, TDF_SLIM); 2522169689Skan } 2523169689Skan 2524169689Skan /* Skip virtual phi's. */ 2525169689Skan if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) 2526169689Skan { 2527169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2528169689Skan fprintf (vect_dump, "virtual phi. skip."); 2529169689Skan continue; 2530169689Skan } 2531169689Skan 2532169689Skan /* Skip reduction phis. */ 2533169689Skan if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) 2534169689Skan { 2535169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2536169689Skan fprintf (vect_dump, "reduc phi. skip."); 2537169689Skan continue; 2538169689Skan } 2539169689Skan 2540169689Skan access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); 2541169689Skan gcc_assert (access_fn); 2542169689Skan evolution_part = 2543169689Skan unshare_expr (evolution_part_in_loop_num (access_fn, loop->num)); 2544169689Skan gcc_assert (evolution_part != NULL_TREE); 2545169689Skan 2546169689Skan /* FORNOW: We do not support IVs whose evolution function is a polynomial 2547169689Skan of degree >= 2 or exponential. */ 2548169689Skan gcc_assert (!tree_is_chrec (evolution_part)); 2549169689Skan 2550169689Skan step_expr = evolution_part; 2551169689Skan init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, 2552169689Skan loop->num)); 2553169689Skan 2554169689Skan ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr), 2555169689Skan build2 (MULT_EXPR, TREE_TYPE (niters), 2556169689Skan niters, step_expr), init_expr); 2557169689Skan 2558169689Skan var = create_tmp_var (TREE_TYPE (init_expr), "tmp"); 2559169689Skan add_referenced_var (var); 2560169689Skan 2561169689Skan ni_name = force_gimple_operand (ni, &stmt, false, var); 2562169689Skan 2563169689Skan /* Insert stmt into exit_bb. */ 2564169689Skan last_bsi = bsi_last (exit_bb); 2565169689Skan if (stmt) 2566169689Skan bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT); 2567169689Skan 2568169689Skan /* Fix phi expressions in the successor bb. */ 2569169689Skan SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name); 2570169689Skan } 2571169689Skan} 2572169689Skan 2573169689Skan 2574169689Skan/* Function vect_do_peeling_for_loop_bound 2575169689Skan 2576169689Skan Peel the last iterations of the loop represented by LOOP_VINFO. 2577169689Skan The peeled iterations form a new epilog loop. Given that the loop now 2578169689Skan iterates NITERS times, the new epilog loop iterates 2579169689Skan NITERS % VECTORIZATION_FACTOR times. 2580169689Skan 2581169689Skan The original loop will later be made to iterate 2582169689Skan NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */ 2583169689Skan 2584169689Skanstatic void 2585169689Skanvect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, 2586169689Skan struct loops *loops) 2587169689Skan{ 2588169689Skan tree ni_name, ratio_mult_vf_name; 2589169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2590169689Skan struct loop *new_loop; 2591169689Skan edge update_e; 2592169689Skan basic_block preheader; 2593169689Skan int loop_num; 2594169689Skan 2595169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2596169689Skan fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ==="); 2597169689Skan 2598169689Skan initialize_original_copy_tables (); 2599169689Skan 2600169689Skan /* Generate the following variables on the preheader of original loop: 2601169689Skan 2602169689Skan ni_name = number of iteration the original loop executes 2603169689Skan ratio = ni_name / vf 2604169689Skan ratio_mult_vf_name = ratio * vf */ 2605169689Skan vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, 2606169689Skan &ratio_mult_vf_name, ratio); 2607169689Skan 2608169689Skan loop_num = loop->num; 2609169689Skan new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit, 2610169689Skan ratio_mult_vf_name, ni_name, false); 2611169689Skan gcc_assert (new_loop); 2612169689Skan gcc_assert (loop_num == loop->num); 2613169689Skan#ifdef ENABLE_CHECKING 2614169689Skan slpeel_verify_cfg_after_peeling (loop, new_loop); 2615169689Skan#endif 2616169689Skan 2617169689Skan /* A guard that controls whether the new_loop is to be executed or skipped 2618169689Skan is placed in LOOP->exit. LOOP->exit therefore has two successors - one 2619169689Skan is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other 2620169689Skan is a bb after NEW_LOOP, where these IVs are not used. Find the edge that 2621169689Skan is on the path where the LOOP IVs are used and need to be updated. */ 2622169689Skan 2623169689Skan preheader = loop_preheader_edge (new_loop)->src; 2624169689Skan if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest) 2625169689Skan update_e = EDGE_PRED (preheader, 0); 2626169689Skan else 2627169689Skan update_e = EDGE_PRED (preheader, 1); 2628169689Skan 2629169689Skan /* Update IVs of original loop as if they were advanced 2630169689Skan by ratio_mult_vf_name steps. */ 2631169689Skan vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e); 2632169689Skan 2633169689Skan /* After peeling we have to reset scalar evolution analyzer. */ 2634169689Skan scev_reset (); 2635169689Skan 2636169689Skan free_original_copy_tables (); 2637169689Skan} 2638169689Skan 2639169689Skan 2640169689Skan/* Function vect_gen_niters_for_prolog_loop 2641169689Skan 2642169689Skan Set the number of iterations for the loop represented by LOOP_VINFO 2643169689Skan to the minimum between LOOP_NITERS (the original iteration count of the loop) 2644169689Skan and the misalignment of DR - the data reference recorded in 2645169689Skan LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of 2646169689Skan this loop, the data reference DR will refer to an aligned location. 2647169689Skan 2648169689Skan The following computation is generated: 2649169689Skan 2650169689Skan If the misalignment of DR is known at compile time: 2651169689Skan addr_mis = int mis = DR_MISALIGNMENT (dr); 2652169689Skan Else, compute address misalignment in bytes: 2653169689Skan addr_mis = addr & (vectype_size - 1) 2654169689Skan 2655169689Skan prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) ) 2656169689Skan 2657169689Skan (elem_size = element type size; an element is the scalar element 2658169689Skan whose type is the inner type of the vectype) */ 2659169689Skan 2660169689Skanstatic tree 2661169689Skanvect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) 2662169689Skan{ 2663169689Skan struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); 2664169689Skan int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2665169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2666169689Skan tree var, stmt; 2667169689Skan tree iters, iters_name; 2668169689Skan edge pe; 2669169689Skan basic_block new_bb; 2670169689Skan tree dr_stmt = DR_STMT (dr); 2671169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); 2672169689Skan tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2673169689Skan int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; 2674169689Skan tree niters_type = TREE_TYPE (loop_niters); 2675169689Skan 2676169689Skan pe = loop_preheader_edge (loop); 2677169689Skan 2678169689Skan if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) 2679169689Skan { 2680169689Skan int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); 2681169689Skan int element_size = vectype_align/vf; 2682169689Skan int elem_misalign = byte_misalign / element_size; 2683169689Skan 2684169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2685169689Skan fprintf (vect_dump, "known alignment = %d.", byte_misalign); 2686169689Skan iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1)); 2687169689Skan } 2688169689Skan else 2689169689Skan { 2690169689Skan tree new_stmts = NULL_TREE; 2691169689Skan tree start_addr = 2692169689Skan vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE); 2693169689Skan tree ptr_type = TREE_TYPE (start_addr); 2694169689Skan tree size = TYPE_SIZE (ptr_type); 2695169689Skan tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); 2696169689Skan tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); 2697169689Skan tree elem_size_log = 2698169689Skan build_int_cst (type, exact_log2 (vectype_align/vf)); 2699169689Skan tree vf_minus_1 = build_int_cst (type, vf - 1); 2700169689Skan tree vf_tree = build_int_cst (type, vf); 2701169689Skan tree byte_misalign; 2702169689Skan tree elem_misalign; 2703169689Skan 2704169689Skan new_bb = bsi_insert_on_edge_immediate (pe, new_stmts); 2705169689Skan gcc_assert (!new_bb); 2706169689Skan 2707169689Skan /* Create: byte_misalign = addr & (vectype_size - 1) */ 2708169689Skan byte_misalign = 2709169689Skan build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1); 2710169689Skan 2711169689Skan /* Create: elem_misalign = byte_misalign / element_size */ 2712169689Skan elem_misalign = 2713169689Skan build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log); 2714169689Skan 2715169689Skan /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */ 2716169689Skan iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign); 2717169689Skan iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1); 2718169689Skan iters = fold_convert (niters_type, iters); 2719169689Skan } 2720169689Skan 2721169689Skan /* Create: prolog_loop_niters = min (iters, loop_niters) */ 2722169689Skan /* If the loop bound is known at compile time we already verified that it is 2723169689Skan greater than vf; since the misalignment ('iters') is at most vf, there's 2724169689Skan no need to generate the MIN_EXPR in this case. */ 2725169689Skan if (TREE_CODE (loop_niters) != INTEGER_CST) 2726169689Skan iters = build2 (MIN_EXPR, niters_type, iters, loop_niters); 2727169689Skan 2728169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2729169689Skan { 2730169689Skan fprintf (vect_dump, "niters for prolog loop: "); 2731169689Skan print_generic_expr (vect_dump, iters, TDF_SLIM); 2732169689Skan } 2733169689Skan 2734169689Skan var = create_tmp_var (niters_type, "prolog_loop_niters"); 2735169689Skan add_referenced_var (var); 2736169689Skan iters_name = force_gimple_operand (iters, &stmt, false, var); 2737169689Skan 2738169689Skan /* Insert stmt on loop preheader edge. */ 2739169689Skan if (stmt) 2740169689Skan { 2741169689Skan basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2742169689Skan gcc_assert (!new_bb); 2743169689Skan } 2744169689Skan 2745169689Skan return iters_name; 2746169689Skan} 2747169689Skan 2748169689Skan 2749169689Skan/* Function vect_update_init_of_dr 2750169689Skan 2751169689Skan NITERS iterations were peeled from LOOP. DR represents a data reference 2752169689Skan in LOOP. This function updates the information recorded in DR to 2753169689Skan account for the fact that the first NITERS iterations had already been 2754169689Skan executed. Specifically, it updates the OFFSET field of DR. */ 2755169689Skan 2756169689Skanstatic void 2757169689Skanvect_update_init_of_dr (struct data_reference *dr, tree niters) 2758169689Skan{ 2759169689Skan tree offset = DR_OFFSET (dr); 2760169689Skan 2761169689Skan niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr)); 2762169689Skan offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters); 2763169689Skan DR_OFFSET (dr) = offset; 2764169689Skan} 2765169689Skan 2766169689Skan 2767169689Skan/* Function vect_update_inits_of_drs 2768169689Skan 2769169689Skan NITERS iterations were peeled from the loop represented by LOOP_VINFO. 2770169689Skan This function updates the information recorded for the data references in 2771169689Skan the loop to account for the fact that the first NITERS iterations had 2772169689Skan already been executed. Specifically, it updates the initial_condition of the 2773169689Skan access_function of all the data_references in the loop. */ 2774169689Skan 2775169689Skanstatic void 2776169689Skanvect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) 2777169689Skan{ 2778169689Skan unsigned int i; 2779169689Skan VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 2780169689Skan struct data_reference *dr; 2781169689Skan 2782169689Skan if (vect_dump && (dump_flags & TDF_DETAILS)) 2783169689Skan fprintf (vect_dump, "=== vect_update_inits_of_dr ==="); 2784169689Skan 2785169689Skan for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 2786169689Skan vect_update_init_of_dr (dr, niters); 2787169689Skan} 2788169689Skan 2789169689Skan 2790169689Skan/* Function vect_do_peeling_for_alignment 2791169689Skan 2792169689Skan Peel the first 'niters' iterations of the loop represented by LOOP_VINFO. 2793169689Skan 'niters' is set to the misalignment of one of the data references in the 2794169689Skan loop, thereby forcing it to refer to an aligned location at the beginning 2795169689Skan of the execution of this loop. The data reference for which we are 2796169689Skan peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ 2797169689Skan 2798169689Skanstatic void 2799169689Skanvect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) 2800169689Skan{ 2801169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2802169689Skan tree niters_of_prolog_loop, ni_name; 2803169689Skan tree n_iters; 2804169689Skan struct loop *new_loop; 2805169689Skan 2806169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2807169689Skan fprintf (vect_dump, "=== vect_do_peeling_for_alignment ==="); 2808169689Skan 2809169689Skan initialize_original_copy_tables (); 2810169689Skan 2811169689Skan ni_name = vect_build_loop_niters (loop_vinfo); 2812169689Skan niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); 2813169689Skan 2814169689Skan /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ 2815169689Skan new_loop = 2816169689Skan slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), 2817169689Skan niters_of_prolog_loop, ni_name, true); 2818169689Skan gcc_assert (new_loop); 2819169689Skan#ifdef ENABLE_CHECKING 2820169689Skan slpeel_verify_cfg_after_peeling (new_loop, loop); 2821169689Skan#endif 2822169689Skan 2823169689Skan /* Update number of times loop executes. */ 2824169689Skan n_iters = LOOP_VINFO_NITERS (loop_vinfo); 2825169689Skan LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR, 2826169689Skan TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); 2827169689Skan 2828169689Skan /* Update the init conditions of the access functions of all data refs. */ 2829169689Skan vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); 2830169689Skan 2831169689Skan /* After peeling we have to reset scalar evolution analyzer. */ 2832169689Skan scev_reset (); 2833169689Skan 2834169689Skan free_original_copy_tables (); 2835169689Skan} 2836169689Skan 2837169689Skan 2838169689Skan/* Function vect_create_cond_for_align_checks. 2839169689Skan 2840169689Skan Create a conditional expression that represents the alignment checks for 2841169689Skan all of data references (array element references) whose alignment must be 2842169689Skan checked at runtime. 2843169689Skan 2844169689Skan Input: 2845169689Skan LOOP_VINFO - two fields of the loop information are used. 2846169689Skan LOOP_VINFO_PTR_MASK is the mask used to check the alignment. 2847169689Skan LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked. 2848169689Skan 2849169689Skan Output: 2850169689Skan COND_EXPR_STMT_LIST - statements needed to construct the conditional 2851169689Skan expression. 2852169689Skan The returned value is the conditional expression to be used in the if 2853169689Skan statement that controls which version of the loop gets executed at runtime. 2854169689Skan 2855169689Skan The algorithm makes two assumptions: 2856169689Skan 1) The number of bytes "n" in a vector is a power of 2. 2857169689Skan 2) An address "a" is aligned if a%n is zero and that this 2858169689Skan test can be done as a&(n-1) == 0. For example, for 16 2859169689Skan byte vectors the test is a&0xf == 0. */ 2860169689Skan 2861169689Skanstatic tree 2862169689Skanvect_create_cond_for_align_checks (loop_vec_info loop_vinfo, 2863169689Skan tree *cond_expr_stmt_list) 2864169689Skan{ 2865169689Skan VEC(tree,heap) *may_misalign_stmts 2866169689Skan = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); 2867169689Skan tree ref_stmt; 2868169689Skan int mask = LOOP_VINFO_PTR_MASK (loop_vinfo); 2869169689Skan tree mask_cst; 2870169689Skan unsigned int i; 2871169689Skan tree psize; 2872169689Skan tree int_ptrsize_type; 2873169689Skan char tmp_name[20]; 2874169689Skan tree or_tmp_name = NULL_TREE; 2875169689Skan tree and_tmp, and_tmp_name, and_stmt; 2876169689Skan tree ptrsize_zero; 2877169689Skan 2878169689Skan /* Check that mask is one less than a power of 2, i.e., mask is 2879169689Skan all zeros followed by all ones. */ 2880169689Skan gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0)); 2881169689Skan 2882169689Skan /* CHECKME: what is the best integer or unsigned type to use to hold a 2883169689Skan cast from a pointer value? */ 2884169689Skan psize = TYPE_SIZE (ptr_type_node); 2885169689Skan int_ptrsize_type 2886169689Skan = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0); 2887169689Skan 2888169689Skan /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address 2889169689Skan of the first vector of the i'th data reference. */ 2890169689Skan 2891169689Skan for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++) 2892169689Skan { 2893169689Skan tree new_stmt_list = NULL_TREE; 2894169689Skan tree addr_base; 2895169689Skan tree addr_tmp, addr_tmp_name, addr_stmt; 2896169689Skan tree or_tmp, new_or_tmp_name, or_stmt; 2897169689Skan 2898169689Skan /* create: addr_tmp = (int)(address_of_first_vector) */ 2899169689Skan addr_base = vect_create_addr_base_for_vector_ref (ref_stmt, 2900169689Skan &new_stmt_list, 2901169689Skan NULL_TREE); 2902169689Skan 2903169689Skan if (new_stmt_list != NULL_TREE) 2904169689Skan append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list); 2905169689Skan 2906169689Skan sprintf (tmp_name, "%s%d", "addr2int", i); 2907169689Skan addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name); 2908169689Skan add_referenced_var (addr_tmp); 2909169689Skan addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE); 2910169689Skan addr_stmt = fold_convert (int_ptrsize_type, addr_base); 2911169689Skan addr_stmt = build2 (MODIFY_EXPR, void_type_node, 2912169689Skan addr_tmp_name, addr_stmt); 2913169689Skan SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt; 2914169689Skan append_to_statement_list_force (addr_stmt, cond_expr_stmt_list); 2915169689Skan 2916169689Skan /* The addresses are OR together. */ 2917169689Skan 2918169689Skan if (or_tmp_name != NULL_TREE) 2919169689Skan { 2920169689Skan /* create: or_tmp = or_tmp | addr_tmp */ 2921169689Skan sprintf (tmp_name, "%s%d", "orptrs", i); 2922169689Skan or_tmp = create_tmp_var (int_ptrsize_type, tmp_name); 2923169689Skan add_referenced_var (or_tmp); 2924169689Skan new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE); 2925169689Skan or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name, 2926169689Skan build2 (BIT_IOR_EXPR, int_ptrsize_type, 2927169689Skan or_tmp_name, 2928169689Skan addr_tmp_name)); 2929169689Skan SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt; 2930169689Skan append_to_statement_list_force (or_stmt, cond_expr_stmt_list); 2931169689Skan or_tmp_name = new_or_tmp_name; 2932169689Skan } 2933169689Skan else 2934169689Skan or_tmp_name = addr_tmp_name; 2935169689Skan 2936169689Skan } /* end for i */ 2937169689Skan 2938169689Skan mask_cst = build_int_cst (int_ptrsize_type, mask); 2939169689Skan 2940169689Skan /* create: and_tmp = or_tmp & mask */ 2941169689Skan and_tmp = create_tmp_var (int_ptrsize_type, "andmask" ); 2942169689Skan add_referenced_var (and_tmp); 2943169689Skan and_tmp_name = make_ssa_name (and_tmp, NULL_TREE); 2944169689Skan 2945169689Skan and_stmt = build2 (MODIFY_EXPR, void_type_node, 2946169689Skan and_tmp_name, 2947169689Skan build2 (BIT_AND_EXPR, int_ptrsize_type, 2948169689Skan or_tmp_name, mask_cst)); 2949169689Skan SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt; 2950169689Skan append_to_statement_list_force (and_stmt, cond_expr_stmt_list); 2951169689Skan 2952169689Skan /* Make and_tmp the left operand of the conditional test against zero. 2953169689Skan if and_tmp has a nonzero bit then some address is unaligned. */ 2954169689Skan ptrsize_zero = build_int_cst (int_ptrsize_type, 0); 2955169689Skan return build2 (EQ_EXPR, boolean_type_node, 2956169689Skan and_tmp_name, ptrsize_zero); 2957169689Skan} 2958169689Skan 2959169689Skan 2960169689Skan/* Function vect_transform_loop. 2961169689Skan 2962169689Skan The analysis phase has determined that the loop is vectorizable. 2963169689Skan Vectorize the loop - created vectorized stmts to replace the scalar 2964169689Skan stmts in the loop, and update the loop exit condition. */ 2965169689Skan 2966169689Skanvoid 2967169689Skanvect_transform_loop (loop_vec_info loop_vinfo, 2968169689Skan struct loops *loops ATTRIBUTE_UNUSED) 2969169689Skan{ 2970169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2971169689Skan basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 2972169689Skan int nbbs = loop->num_nodes; 2973169689Skan block_stmt_iterator si; 2974169689Skan int i; 2975169689Skan tree ratio = NULL; 2976169689Skan int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2977169689Skan bitmap_iterator bi; 2978169689Skan unsigned int j; 2979169689Skan 2980169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2981169689Skan fprintf (vect_dump, "=== vec_transform_loop ==="); 2982169689Skan 2983169689Skan /* If the loop has data references that may or may not be aligned then 2984169689Skan two versions of the loop need to be generated, one which is vectorized 2985169689Skan and one which isn't. A test is then generated to control which of the 2986169689Skan loops is executed. The test checks for the alignment of all of the 2987169689Skan data references that may or may not be aligned. */ 2988169689Skan 2989169689Skan if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))) 2990169689Skan { 2991169689Skan struct loop *nloop; 2992169689Skan tree cond_expr; 2993169689Skan tree cond_expr_stmt_list = NULL_TREE; 2994169689Skan basic_block condition_bb; 2995169689Skan block_stmt_iterator cond_exp_bsi; 2996169689Skan basic_block merge_bb; 2997169689Skan basic_block new_exit_bb; 2998169689Skan edge new_exit_e, e; 2999169689Skan tree orig_phi, new_phi, arg; 3000169689Skan 3001169689Skan cond_expr = vect_create_cond_for_align_checks (loop_vinfo, 3002169689Skan &cond_expr_stmt_list); 3003169689Skan initialize_original_copy_tables (); 3004169689Skan nloop = loop_version (loops, loop, cond_expr, &condition_bb, true); 3005169689Skan free_original_copy_tables(); 3006169689Skan 3007169689Skan /** Loop versioning violates an assumption we try to maintain during 3008169689Skan vectorization - that the loop exit block has a single predecessor. 3009169689Skan After versioning, the exit block of both loop versions is the same 3010169689Skan basic block (i.e. it has two predecessors). Just in order to simplify 3011169689Skan following transformations in the vectorizer, we fix this situation 3012169689Skan here by adding a new (empty) block on the exit-edge of the loop, 3013169689Skan with the proper loop-exit phis to maintain loop-closed-form. **/ 3014169689Skan 3015169689Skan merge_bb = loop->single_exit->dest; 3016169689Skan gcc_assert (EDGE_COUNT (merge_bb->preds) == 2); 3017169689Skan new_exit_bb = split_edge (loop->single_exit); 3018169689Skan add_bb_to_loop (new_exit_bb, loop->outer); 3019169689Skan new_exit_e = loop->single_exit; 3020169689Skan e = EDGE_SUCC (new_exit_bb, 0); 3021169689Skan 3022169689Skan for (orig_phi = phi_nodes (merge_bb); orig_phi; 3023169689Skan orig_phi = PHI_CHAIN (orig_phi)) 3024169689Skan { 3025169689Skan new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), 3026169689Skan new_exit_bb); 3027169689Skan arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); 3028169689Skan add_phi_arg (new_phi, arg, new_exit_e); 3029169689Skan SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi)); 3030169689Skan } 3031169689Skan 3032169689Skan /** end loop-exit-fixes after versioning **/ 3033169689Skan 3034169689Skan update_ssa (TODO_update_ssa); 3035169689Skan cond_exp_bsi = bsi_last (condition_bb); 3036169689Skan bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT); 3037169689Skan } 3038169689Skan 3039169689Skan /* CHECKME: we wouldn't need this if we called update_ssa once 3040169689Skan for all loops. */ 3041169689Skan bitmap_zero (vect_vnames_to_rename); 3042169689Skan 3043169689Skan /* Peel the loop if there are data refs with unknown alignment. 3044169689Skan Only one data ref with unknown store is allowed. */ 3045169689Skan 3046169689Skan if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) 3047169689Skan vect_do_peeling_for_alignment (loop_vinfo, loops); 3048169689Skan 3049169689Skan /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a 3050169689Skan compile time constant), or it is a constant that doesn't divide by the 3051169689Skan vectorization factor, then an epilog loop needs to be created. 3052169689Skan We therefore duplicate the loop: the original loop will be vectorized, 3053169689Skan and will compute the first (n/VF) iterations. The second copy of the loop 3054169689Skan will remain scalar and will compute the remaining (n%VF) iterations. 3055169689Skan (VF is the vectorization factor). */ 3056169689Skan 3057169689Skan if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 3058169689Skan || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 3059169689Skan && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)) 3060169689Skan vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops); 3061169689Skan else 3062169689Skan ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), 3063169689Skan LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); 3064169689Skan 3065169689Skan /* 1) Make sure the loop header has exactly two entries 3066169689Skan 2) Make sure we have a preheader basic block. */ 3067169689Skan 3068169689Skan gcc_assert (EDGE_COUNT (loop->header->preds) == 2); 3069169689Skan 3070169689Skan loop_split_edge_with (loop_preheader_edge (loop), NULL); 3071169689Skan 3072169689Skan 3073169689Skan /* FORNOW: the vectorizer supports only loops which body consist 3074169689Skan of one basic block (header + empty latch). When the vectorizer will 3075169689Skan support more involved loop forms, the order by which the BBs are 3076169689Skan traversed need to be reconsidered. */ 3077169689Skan 3078169689Skan for (i = 0; i < nbbs; i++) 3079169689Skan { 3080169689Skan basic_block bb = bbs[i]; 3081169689Skan 3082169689Skan for (si = bsi_start (bb); !bsi_end_p (si);) 3083169689Skan { 3084169689Skan tree stmt = bsi_stmt (si); 3085169689Skan stmt_vec_info stmt_info; 3086169689Skan bool is_store; 3087169689Skan 3088169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 3089169689Skan { 3090169689Skan fprintf (vect_dump, "------>vectorizing statement: "); 3091169689Skan print_generic_expr (vect_dump, stmt, TDF_SLIM); 3092169689Skan } 3093169689Skan stmt_info = vinfo_for_stmt (stmt); 3094169689Skan gcc_assert (stmt_info); 3095169689Skan if (!STMT_VINFO_RELEVANT_P (stmt_info) 3096169689Skan && !STMT_VINFO_LIVE_P (stmt_info)) 3097169689Skan { 3098169689Skan bsi_next (&si); 3099169689Skan continue; 3100169689Skan } 3101169689Skan /* FORNOW: Verify that all stmts operate on the same number of 3102169689Skan units and no inner unrolling is necessary. */ 3103169689Skan gcc_assert 3104169689Skan (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)) 3105169689Skan == (unsigned HOST_WIDE_INT) vectorization_factor); 3106169689Skan 3107169689Skan /* -------- vectorize statement ------------ */ 3108169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 3109169689Skan fprintf (vect_dump, "transform statement."); 3110169689Skan 3111169689Skan is_store = vect_transform_stmt (stmt, &si); 3112169689Skan if (is_store) 3113169689Skan { 3114169689Skan /* Free the attached stmt_vec_info and remove the stmt. */ 3115169689Skan stmt_ann_t ann = stmt_ann (stmt); 3116169689Skan free (stmt_info); 3117169689Skan set_stmt_info (ann, NULL); 3118169689Skan bsi_remove (&si, true); 3119169689Skan continue; 3120169689Skan } 3121169689Skan 3122169689Skan bsi_next (&si); 3123169689Skan } /* stmts in BB */ 3124169689Skan } /* BBs in loop */ 3125169689Skan 3126169689Skan slpeel_make_loop_iterate_ntimes (loop, ratio); 3127169689Skan 3128169689Skan EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi) 3129169689Skan mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j))); 3130169689Skan 3131169689Skan /* The memory tags and pointers in vectorized statements need to 3132169689Skan have their SSA forms updated. FIXME, why can't this be delayed 3133169689Skan until all the loops have been transformed? */ 3134169689Skan update_ssa (TODO_update_ssa); 3135169689Skan 3136169689Skan if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)) 3137169689Skan fprintf (vect_dump, "LOOP VECTORIZED."); 3138169689Skan} 3139