1/* Subroutines used to remove unnecessary doubleword swaps 2 for p8 little-endian VSX code. 3 Copyright (C) 1991-2022 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published 9 by the Free Software Foundation; either version 3, or (at your 10 option) any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21#define IN_TARGET_CODE 1 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "backend.h" 27#include "rtl.h" 28#include "tree.h" 29#include "memmodel.h" 30#include "df.h" 31#include "tm_p.h" 32#include "ira.h" 33#include "print-tree.h" 34#include "varasm.h" 35#include "explow.h" 36#include "expr.h" 37#include "output.h" 38#include "tree-pass.h" 39#include "rtx-vector-builder.h" 40 41/* Analyze vector computations and remove unnecessary doubleword 42 swaps (xxswapdi instructions). This pass is performed only 43 for little-endian VSX code generation. 44 45 For this specific case, loads and stores of 4x32 and 2x64 vectors 46 are inefficient. These are implemented using the lvx2dx and 47 stvx2dx instructions, which invert the order of doublewords in 48 a vector register. Thus the code generation inserts an xxswapdi 49 after each such load, and prior to each such store. (For spill 50 code after register assignment, an additional xxswapdi is inserted 51 following each store in order to return a hard register to its 52 unpermuted value.) 53 54 The extra xxswapdi instructions reduce performance. This can be 55 particularly bad for vectorized code. The purpose of this pass 56 is to reduce the number of xxswapdi instructions required for 57 correctness. 58 59 The primary insight is that much code that operates on vectors 60 does not care about the relative order of elements in a register, 61 so long as the correct memory order is preserved. If we have 62 a computation where all input values are provided by lvxd2x/xxswapdi 63 sequences, all outputs are stored using xxswapdi/stvxd2x sequences, 64 and all intermediate computations are pure SIMD (independent of 65 element order), then all the xxswapdi's associated with the loads 66 and stores may be removed. 67 68 This pass uses some of the infrastructure and logical ideas from 69 the "web" pass in web.cc. We create maximal webs of computations 70 fitting the description above using union-find. Each such web is 71 then optimized by removing its unnecessary xxswapdi instructions. 72 73 The pass is placed prior to global optimization so that we can 74 perform the optimization in the safest and simplest way possible; 75 that is, by replacing each xxswapdi insn with a register copy insn. 76 Subsequent forward propagation will remove copies where possible. 77 78 There are some operations sensitive to element order for which we 79 can still allow the operation, provided we modify those operations. 80 These include CONST_VECTORs, for which we must swap the first and 81 second halves of the constant vector; and SUBREGs, for which we 82 must adjust the byte offset to account for the swapped doublewords. 83 A remaining opportunity would be non-immediate-form splats, for 84 which we should adjust the selected lane of the input. We should 85 also make code generation adjustments for sum-across operations, 86 since this is a common vectorizer reduction. 87 88 Because we run prior to the first split, we can see loads and stores 89 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla 90 vector loads and stores that have not yet been split into a permuting 91 load/store and a swap. (One way this can happen is with a builtin 92 call to vec_vsx_{ld,st}.) We can handle these as well, but rather 93 than deleting a swap, we convert the load/store into a permuting 94 load/store (which effectively removes the swap). */ 95 96/* Notes on Permutes 97 98 We do not currently handle computations that contain permutes. There 99 is a general transformation that can be performed correctly, but it 100 may introduce more expensive code than it replaces. To handle these 101 would require a cost model to determine when to perform the optimization. 102 This commentary records how this could be done if desired. 103 104 The most general permute is something like this (example for V16QI): 105 106 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI)) 107 (parallel [(const_int a0) (const_int a1) 108 ... 109 (const_int a14) (const_int a15)])) 110 111 where a0,...,a15 are in [0,31] and select elements from op1 and op2 112 to produce in the result. 113 114 Regardless of mode, we can convert the PARALLEL to a mask of 16 115 byte-element selectors. Let's call this M, with M[i] representing 116 the ith byte-element selector value. Then if we swap doublewords 117 throughout the computation, we can get correct behavior by replacing 118 M with M' as follows: 119 120 M'[i] = { (M[i]+8)%16 : M[i] in [0,15] 121 { ((M[i]+8)%16)+16 : M[i] in [16,31] 122 123 This seems promising at first, since we are just replacing one mask 124 with another. But certain masks are preferable to others. If M 125 is a mask that matches a vmrghh pattern, for example, M' certainly 126 will not. Instead of a single vmrghh, we would generate a load of 127 M' and a vperm. So we would need to know how many xxswapd's we can 128 remove as a result of this transformation to determine if it's 129 profitable; and preferably the logic would need to be aware of all 130 the special preferable masks. 131 132 Another form of permute is an UNSPEC_VPERM, in which the mask is 133 already in a register. In some cases, this mask may be a constant 134 that we can discover with ud-chains, in which case the above 135 transformation is ok. However, the common usage here is for the 136 mask to be produced by an UNSPEC_LVSL, in which case the mask 137 cannot be known at compile time. In such a case we would have to 138 generate several instructions to compute M' as above at run time, 139 and a cost model is needed again. 140 141 However, when the mask M for an UNSPEC_VPERM is loaded from the 142 constant pool, we can replace M with M' as above at no cost 143 beyond adding a constant pool entry. */ 144 145/* This is based on the union-find logic in web.cc. web_entry_base is 146 defined in df.h. */ 147class swap_web_entry : public web_entry_base 148{ 149 public: 150 /* Pointer to the insn. */ 151 rtx_insn *insn; 152 /* Set if insn contains a mention of a vector register. All other 153 fields are undefined if this field is unset. */ 154 unsigned int is_relevant : 1; 155 /* Set if insn is a load. */ 156 unsigned int is_load : 1; 157 /* Set if insn is a store. */ 158 unsigned int is_store : 1; 159 /* Set if insn is a doubleword swap. This can either be a register swap 160 or a permuting load or store (test is_load and is_store for this). */ 161 unsigned int is_swap : 1; 162 /* Set if the insn has a live-in use of a parameter register. */ 163 unsigned int is_live_in : 1; 164 /* Set if the insn has a live-out def of a return register. */ 165 unsigned int is_live_out : 1; 166 /* Set if the insn contains a subreg reference of a vector register. */ 167 unsigned int contains_subreg : 1; 168 /* Set if the insn contains a 128-bit integer operand. */ 169 unsigned int is_128_int : 1; 170 /* Set if this is a call-insn. */ 171 unsigned int is_call : 1; 172 /* Set if this insn does not perform a vector operation for which 173 element order matters, or if we know how to fix it up if it does. 174 Undefined if is_swap is set. */ 175 unsigned int is_swappable : 1; 176 /* A nonzero value indicates what kind of special handling for this 177 insn is required if doublewords are swapped. Undefined if 178 is_swappable is not set. */ 179 unsigned int special_handling : 4; 180 /* Set if the web represented by this entry cannot be optimized. */ 181 unsigned int web_not_optimizable : 1; 182 /* Set if this insn should be deleted. */ 183 unsigned int will_delete : 1; 184}; 185 186enum special_handling_values { 187 SH_NONE = 0, 188 SH_CONST_VECTOR, 189 SH_SUBREG, 190 SH_NOSWAP_LD, 191 SH_NOSWAP_ST, 192 SH_EXTRACT, 193 SH_SPLAT, 194 SH_XXPERMDI, 195 SH_CONCAT, 196 SH_VPERM 197}; 198 199/* Union INSN with all insns containing definitions that reach USE. 200 Detect whether USE is live-in to the current function. */ 201static void 202union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use) 203{ 204 struct df_link *link = DF_REF_CHAIN (use); 205 206 if (!link) 207 insn_entry[INSN_UID (insn)].is_live_in = 1; 208 209 while (link) 210 { 211 if (DF_REF_IS_ARTIFICIAL (link->ref)) 212 insn_entry[INSN_UID (insn)].is_live_in = 1; 213 214 if (DF_REF_INSN_INFO (link->ref)) 215 { 216 rtx def_insn = DF_REF_INSN (link->ref); 217 (void)unionfind_union (insn_entry + INSN_UID (insn), 218 insn_entry + INSN_UID (def_insn)); 219 } 220 221 link = link->next; 222 } 223} 224 225/* Union INSN with all insns containing uses reached from DEF. 226 Detect whether DEF is live-out from the current function. */ 227static void 228union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def) 229{ 230 struct df_link *link = DF_REF_CHAIN (def); 231 232 if (!link) 233 insn_entry[INSN_UID (insn)].is_live_out = 1; 234 235 while (link) 236 { 237 /* This could be an eh use or some other artificial use; 238 we treat these all the same (killing the optimization). */ 239 if (DF_REF_IS_ARTIFICIAL (link->ref)) 240 insn_entry[INSN_UID (insn)].is_live_out = 1; 241 242 if (DF_REF_INSN_INFO (link->ref)) 243 { 244 rtx use_insn = DF_REF_INSN (link->ref); 245 (void)unionfind_union (insn_entry + INSN_UID (insn), 246 insn_entry + INSN_UID (use_insn)); 247 } 248 249 link = link->next; 250 } 251} 252 253/* Return 1 iff PAT (a SINGLE_SET) is a rotate 64 bit expression; else return 254 0. */ 255 256static bool 257pattern_is_rotate64 (rtx pat) 258{ 259 rtx rot = SET_SRC (pat); 260 261 if (GET_CODE (rot) == ROTATE && CONST_INT_P (XEXP (rot, 1)) 262 && INTVAL (XEXP (rot, 1)) == 64) 263 return true; 264 265 return false; 266} 267 268/* Return 1 iff INSN is a load insn, including permuting loads that 269 represent an lvxd2x instruction; else return 0. */ 270static unsigned int 271insn_is_load_p (rtx insn) 272{ 273 rtx body = PATTERN (insn); 274 275 if (GET_CODE (body) == SET) 276 { 277 if (MEM_P (SET_SRC (body))) 278 return 1; 279 280 if (GET_CODE (SET_SRC (body)) == VEC_SELECT 281 && MEM_P (XEXP (SET_SRC (body), 0))) 282 return 1; 283 284 if (pattern_is_rotate64 (body) && MEM_P (XEXP (SET_SRC (body), 0))) 285 return 1; 286 287 return 0; 288 } 289 290 if (GET_CODE (body) != PARALLEL) 291 return 0; 292 293 rtx set = XVECEXP (body, 0, 0); 294 295 if (GET_CODE (set) == SET && MEM_P (SET_SRC (set))) 296 return 1; 297 298 return 0; 299} 300 301/* Return 1 iff INSN is a store insn, including permuting stores that 302 represent an stvxd2x instruction; else return 0. */ 303static unsigned int 304insn_is_store_p (rtx insn) 305{ 306 rtx body = PATTERN (insn); 307 if (GET_CODE (body) == SET && MEM_P (SET_DEST (body))) 308 return 1; 309 if (GET_CODE (body) != PARALLEL) 310 return 0; 311 rtx set = XVECEXP (body, 0, 0); 312 if (GET_CODE (set) == SET && MEM_P (SET_DEST (set))) 313 return 1; 314 return 0; 315} 316 317/* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap, 318 a permuting load, or a permuting store. */ 319static unsigned int 320insn_is_swap_p (rtx insn) 321{ 322 rtx body = PATTERN (insn); 323 if (GET_CODE (body) != SET) 324 return 0; 325 rtx rhs = SET_SRC (body); 326 if (pattern_is_rotate64 (body)) 327 return 1; 328 if (GET_CODE (rhs) != VEC_SELECT) 329 return 0; 330 rtx parallel = XEXP (rhs, 1); 331 if (GET_CODE (parallel) != PARALLEL) 332 return 0; 333 unsigned int len = XVECLEN (parallel, 0); 334 if (len != 2 && len != 4 && len != 8 && len != 16) 335 return 0; 336 for (unsigned int i = 0; i < len / 2; ++i) 337 { 338 rtx op = XVECEXP (parallel, 0, i); 339 if (!CONST_INT_P (op) || INTVAL (op) != len / 2 + i) 340 return 0; 341 } 342 for (unsigned int i = len / 2; i < len; ++i) 343 { 344 rtx op = XVECEXP (parallel, 0, i); 345 if (!CONST_INT_P (op) || INTVAL (op) != i - len / 2) 346 return 0; 347 } 348 return 1; 349} 350 351/* Return true iff EXPR represents the sum of two registers. */ 352bool 353rs6000_sum_of_two_registers_p (const_rtx expr) 354{ 355 if (GET_CODE (expr) == PLUS) 356 { 357 const_rtx operand1 = XEXP (expr, 0); 358 const_rtx operand2 = XEXP (expr, 1); 359 return (REG_P (operand1) && REG_P (operand2)); 360 } 361 return false; 362} 363 364/* Return true iff EXPR represents an address expression that masks off 365 the low-order 4 bits in the style of an lvx or stvx rtl pattern. */ 366bool 367rs6000_quadword_masked_address_p (const_rtx expr) 368{ 369 if (GET_CODE (expr) == AND) 370 { 371 const_rtx operand1 = XEXP (expr, 0); 372 const_rtx operand2 = XEXP (expr, 1); 373 if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1)) 374 && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16) 375 return true; 376 } 377 return false; 378} 379 380/* Return TRUE if INSN represents a swap of a swapped load from memory 381 and the memory address is quad-word aligned. */ 382static bool 383quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn) 384{ 385 unsigned uid = INSN_UID (insn); 386 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) 387 return false; 388 389 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 390 391 /* Since insn is known to represent a swap instruction, we know it 392 "uses" only one input variable. */ 393 df_ref use = DF_INSN_INFO_USES (insn_info); 394 395 /* Figure out where this input variable is defined. */ 396 struct df_link *def_link = DF_REF_CHAIN (use); 397 398 /* If there is no definition or the definition is artificial or there are 399 multiple definitions, punt. */ 400 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) 401 || def_link->next) 402 return false; 403 404 rtx def_insn = DF_REF_INSN (def_link->ref); 405 unsigned uid2 = INSN_UID (def_insn); 406 /* We're looking for a load-with-swap insn. If this is not that, 407 return false. */ 408 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) 409 return false; 410 411 /* If the source of the rtl def is not a set from memory, return 412 false. */ 413 rtx body = PATTERN (def_insn); 414 if (GET_CODE (body) != SET 415 || !(GET_CODE (SET_SRC (body)) == VEC_SELECT 416 || pattern_is_rotate64 (body)) 417 || !MEM_P (XEXP (SET_SRC (body), 0))) 418 return false; 419 420 rtx mem = XEXP (SET_SRC (body), 0); 421 rtx base_reg = XEXP (mem, 0); 422 return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg)) 423 && MEM_ALIGN (mem) >= 128) ? true : false; 424} 425 426/* Return TRUE if INSN represents a store-with-swap of a swapped value 427 and the memory address is quad-word aligned. */ 428static bool 429quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn) 430{ 431 unsigned uid = INSN_UID (insn); 432 if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store) 433 return false; 434 435 rtx body = PATTERN (insn); 436 rtx dest_address = XEXP (SET_DEST (body), 0); 437 rtx swap_reg = XEXP (SET_SRC (body), 0); 438 439 /* If the base address for the memory expression is not represented 440 by a single register and is not the sum of two registers, punt. */ 441 if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address)) 442 return false; 443 444 /* Confirm that the value to be stored is produced by a swap 445 instruction. */ 446 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 447 df_ref use; 448 FOR_EACH_INSN_INFO_USE (use, insn_info) 449 { 450 struct df_link *def_link = DF_REF_CHAIN (use); 451 452 /* If this is not the definition of the candidate swap register, 453 then skip it. I am interested in a different definition. */ 454 if (!rtx_equal_p (DF_REF_REG (use), swap_reg)) 455 continue; 456 457 /* If there is no def or the def is artifical or there are 458 multiple defs, punt. */ 459 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) 460 || def_link->next) 461 return false; 462 463 rtx def_insn = DF_REF_INSN (def_link->ref); 464 unsigned uid2 = INSN_UID (def_insn); 465 466 /* If this source value is not a simple swap, return false */ 467 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load 468 || insn_entry[uid2].is_store) 469 return false; 470 471 /* I've processed the use that I care about, so break out of 472 this loop. */ 473 break; 474 } 475 476 /* At this point, we know the source data comes from a swap. The 477 remaining question is whether the memory address is aligned. */ 478 rtx set = single_set (insn); 479 if (set) 480 { 481 rtx dest = SET_DEST (set); 482 if (MEM_P (dest)) 483 return (MEM_ALIGN (dest) >= 128); 484 } 485 return false; 486} 487 488/* Return 1 iff UID, known to reference a swap, is both fed by a load 489 and a feeder of a store. */ 490static unsigned int 491swap_feeds_both_load_and_store (swap_web_entry *insn_entry) 492{ 493 rtx insn = insn_entry->insn; 494 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 495 df_ref def, use; 496 struct df_link *link = 0; 497 rtx_insn *load = 0, *store = 0; 498 bool fed_by_load = 0; 499 bool feeds_store = 0; 500 501 FOR_EACH_INSN_INFO_USE (use, insn_info) 502 { 503 link = DF_REF_CHAIN (use); 504 load = DF_REF_INSN (link->ref); 505 if (insn_is_load_p (load) && insn_is_swap_p (load)) 506 fed_by_load = 1; 507 } 508 509 FOR_EACH_INSN_INFO_DEF (def, insn_info) 510 { 511 link = DF_REF_CHAIN (def); 512 store = DF_REF_INSN (link->ref); 513 if (insn_is_store_p (store) && insn_is_swap_p (store)) 514 feeds_store = 1; 515 } 516 517 return fed_by_load && feeds_store; 518} 519 520/* Return TRUE if insn is a swap fed by a load from the constant pool. */ 521static bool 522const_load_sequence_p (swap_web_entry *insn_entry, rtx insn) 523{ 524 unsigned uid = INSN_UID (insn); 525 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) 526 return false; 527 528 const_rtx tocrel_base; 529 530 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 531 df_ref use; 532 533 /* Iterate over the definitions that are used by this insn. Since 534 this is known to be a swap insn, expect only one used definnition. */ 535 FOR_EACH_INSN_INFO_USE (use, insn_info) 536 { 537 struct df_link *def_link = DF_REF_CHAIN (use); 538 539 /* If there is no def or the def is artificial or there are 540 multiple defs, punt. */ 541 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) 542 || def_link->next) 543 return false; 544 545 rtx def_insn = DF_REF_INSN (def_link->ref); 546 unsigned uid2 = INSN_UID (def_insn); 547 /* If this is not a load or is not a swap, return false. */ 548 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) 549 return false; 550 551 /* If the source of the rtl def is not a set from memory, return 552 false. */ 553 rtx body = PATTERN (def_insn); 554 if (GET_CODE (body) != SET 555 || !(GET_CODE (SET_SRC (body)) == VEC_SELECT 556 || pattern_is_rotate64 (body)) 557 || !MEM_P (XEXP (SET_SRC (body), 0))) 558 return false; 559 560 rtx mem = XEXP (SET_SRC (body), 0); 561 rtx base_reg = XEXP (mem, 0); 562 /* If the base address for the memory expression is not 563 represented by a register, punt. */ 564 if (!REG_P (base_reg)) 565 return false; 566 567 df_ref base_use; 568 insn_info = DF_INSN_INFO_GET (def_insn); 569 FOR_EACH_INSN_INFO_USE (base_use, insn_info) 570 { 571 /* If base_use does not represent base_reg, look for another 572 use. */ 573 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) 574 continue; 575 576 struct df_link *base_def_link = DF_REF_CHAIN (base_use); 577 if (!base_def_link || base_def_link->next) 578 return false; 579 580 /* Constants held on the stack are not "true" constants 581 because their values are not part of the static load 582 image. If this constant's base reference is a stack 583 or frame pointer, it is seen as an artificial 584 reference. */ 585 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) 586 return false; 587 588 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref); 589 rtx tocrel_body = PATTERN (tocrel_insn); 590 rtx base, offset; 591 if (GET_CODE (tocrel_body) != SET) 592 return false; 593 /* There is an extra level of indirection for small/large 594 code models. */ 595 rtx tocrel_expr = SET_SRC (tocrel_body); 596 if (MEM_P (tocrel_expr)) 597 tocrel_expr = XEXP (tocrel_expr, 0); 598 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL)) 599 return false; 600 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); 601 602 if (!SYMBOL_REF_P (base) || !CONSTANT_POOL_ADDRESS_P (base)) 603 return false; 604 else 605 { 606 /* FIXME: The conditions under which 607 (SYMBOL_REF_P (const_vector) 608 && !CONSTANT_POOL_ADDRESS_P (const_vector)) 609 are not well understood. This code prevents 610 an internal compiler error which will occur in 611 replace_swapped_load_constant () if we were to return 612 true. Some day, we should figure out how to properly 613 handle this condition in 614 replace_swapped_load_constant () and then we can 615 remove this special test. */ 616 rtx const_vector = get_pool_constant (base); 617 if (SYMBOL_REF_P (const_vector) 618 && CONSTANT_POOL_ADDRESS_P (const_vector)) 619 const_vector = get_pool_constant (const_vector); 620 if (GET_CODE (const_vector) != CONST_VECTOR) 621 return false; 622 } 623 } 624 } 625 return true; 626} 627 628/* Return TRUE iff OP matches a V2DF reduction pattern. See the 629 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */ 630static bool 631v2df_reduction_p (rtx op) 632{ 633 if (GET_MODE (op) != V2DFmode) 634 return false; 635 636 enum rtx_code code = GET_CODE (op); 637 if (code != PLUS && code != SMIN && code != SMAX) 638 return false; 639 640 rtx concat = XEXP (op, 0); 641 if (GET_CODE (concat) != VEC_CONCAT) 642 return false; 643 644 rtx select0 = XEXP (concat, 0); 645 rtx select1 = XEXP (concat, 1); 646 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT) 647 return false; 648 649 rtx reg0 = XEXP (select0, 0); 650 rtx reg1 = XEXP (select1, 0); 651 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0)) 652 return false; 653 654 rtx parallel0 = XEXP (select0, 1); 655 rtx parallel1 = XEXP (select1, 1); 656 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL) 657 return false; 658 659 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx) 660 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx)) 661 return false; 662 663 return true; 664} 665 666/* Return 1 iff OP is an operand that will not be affected by having 667 vector doublewords swapped in memory. */ 668static unsigned int 669rtx_is_swappable_p (rtx op, unsigned int *special) 670{ 671 enum rtx_code code = GET_CODE (op); 672 int i, j; 673 rtx parallel; 674 675 switch (code) 676 { 677 case LABEL_REF: 678 case SYMBOL_REF: 679 case CLOBBER: 680 case REG: 681 return 1; 682 683 case VEC_CONCAT: 684 case ASM_INPUT: 685 case ASM_OPERANDS: 686 return 0; 687 688 case CONST_VECTOR: 689 { 690 *special = SH_CONST_VECTOR; 691 return 1; 692 } 693 694 case VEC_DUPLICATE: 695 /* Opportunity: If XEXP (op, 0) has the same mode as the result, 696 and XEXP (op, 1) is a PARALLEL with a single QImode const int, 697 it represents a vector splat for which we can do special 698 handling. */ 699 if (CONST_INT_P (XEXP (op, 0))) 700 return 1; 701 else if (REG_P (XEXP (op, 0)) 702 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) 703 /* This catches V2DF and V2DI splat, at a minimum. */ 704 return 1; 705 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE 706 && REG_P (XEXP (XEXP (op, 0), 0)) 707 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) 708 /* This catches splat of a truncated value. */ 709 return 1; 710 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT) 711 /* If the duplicated item is from a select, defer to the select 712 processing to see if we can change the lane for the splat. */ 713 return rtx_is_swappable_p (XEXP (op, 0), special); 714 else 715 return 0; 716 717 case VEC_SELECT: 718 /* A vec_extract operation is ok if we change the lane. */ 719 if (REG_P (XEXP (op, 0)) 720 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op) 721 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL 722 && XVECLEN (parallel, 0) == 1 723 && CONST_INT_P (XVECEXP (parallel, 0, 0))) 724 { 725 *special = SH_EXTRACT; 726 return 1; 727 } 728 /* An XXPERMDI is ok if we adjust the lanes. Note that if the 729 XXPERMDI is a swap operation, it will be identified by 730 insn_is_swap_p and therefore we won't get here. */ 731 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT 732 && (GET_MODE (XEXP (op, 0)) == V4DFmode 733 || GET_MODE (XEXP (op, 0)) == V4DImode) 734 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL 735 && XVECLEN (parallel, 0) == 2 736 && CONST_INT_P (XVECEXP (parallel, 0, 0)) 737 && CONST_INT_P (XVECEXP (parallel, 0, 1))) 738 { 739 *special = SH_XXPERMDI; 740 return 1; 741 } 742 else if (v2df_reduction_p (op)) 743 return 1; 744 else 745 return 0; 746 747 case UNSPEC: 748 { 749 /* Various operations are unsafe for this optimization, at least 750 without significant additional work. Permutes are obviously 751 problematic, as both the permute control vector and the ordering 752 of the target values are invalidated by doubleword swapping. 753 Vector pack and unpack modify the number of vector lanes. 754 Merge-high/low will not operate correctly on swapped operands. 755 Vector shifts across element boundaries are clearly uncool, 756 as are vector select and concatenate operations. Vector 757 sum-across instructions define one operand with a specific 758 order-dependent element, so additional fixup code would be 759 needed to make those work. Vector set and non-immediate-form 760 vector splat are element-order sensitive. A few of these 761 cases might be workable with special handling if required. 762 Adding cost modeling would be appropriate in some cases. */ 763 int val = XINT (op, 1); 764 switch (val) 765 { 766 default: 767 break; 768 case UNSPEC_VBPERMQ: 769 case UNSPEC_VPACK_SIGN_SIGN_SAT: 770 case UNSPEC_VPACK_SIGN_UNS_SAT: 771 case UNSPEC_VPACK_UNS_UNS_MOD: 772 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT: 773 case UNSPEC_VPACK_UNS_UNS_SAT: 774 case UNSPEC_VPERM: 775 case UNSPEC_VPERM_UNS: 776 case UNSPEC_VPERMHI: 777 case UNSPEC_VPERMSI: 778 case UNSPEC_VPERMXOR: 779 case UNSPEC_VPKPX: 780 case UNSPEC_VSLDOI: 781 case UNSPEC_VSLO: 782 case UNSPEC_VSRO: 783 case UNSPEC_VSUM2SWS: 784 case UNSPEC_VSUM4S: 785 case UNSPEC_VSUM4UBS: 786 case UNSPEC_VSUMSWS: 787 case UNSPEC_VSUMSWS_DIRECT: 788 case UNSPEC_VSX_CONCAT: 789 case UNSPEC_VSX_CVDPSPN: 790 case UNSPEC_VSX_CVSPDP: 791 case UNSPEC_VSX_CVSPDPN: 792 case UNSPEC_VSX_EXTRACT: 793 case UNSPEC_VSX_SET: 794 case UNSPEC_VSX_SLDWI: 795 case UNSPEC_VSX_VSLO: 796 case UNSPEC_VUNPACK_HI_SIGN: 797 case UNSPEC_VUNPACK_HI_SIGN_DIRECT: 798 case UNSPEC_VUNPACK_LO_SIGN: 799 case UNSPEC_VUNPACK_LO_SIGN_DIRECT: 800 case UNSPEC_VUPKHPX: 801 case UNSPEC_VUPKHS_V4SF: 802 case UNSPEC_VUPKHU_V4SF: 803 case UNSPEC_VUPKLPX: 804 case UNSPEC_VUPKLS_V4SF: 805 case UNSPEC_VUPKLU_V4SF: 806 return 0; 807 case UNSPEC_VSPLT_DIRECT: 808 case UNSPEC_VSX_XXSPLTD: 809 *special = SH_SPLAT; 810 return 1; 811 case UNSPEC_REDUC_PLUS: 812 case UNSPEC_REDUC: 813 return 1; 814 case UNSPEC_VPMSUM: 815 /* vpmsumd is not swappable, but vpmsum[bhw] are. */ 816 if (GET_MODE (op) == V2DImode) 817 return 0; 818 break; 819 } 820 } 821 822 default: 823 break; 824 } 825 826 const char *fmt = GET_RTX_FORMAT (code); 827 int ok = 1; 828 829 for (i = 0; i < GET_RTX_LENGTH (code); ++i) 830 if (fmt[i] == 'e' || fmt[i] == 'u') 831 { 832 unsigned int special_op = SH_NONE; 833 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op); 834 if (special_op == SH_NONE) 835 continue; 836 /* Ensure we never have two kinds of special handling 837 for the same insn. */ 838 if (*special != SH_NONE && *special != special_op) 839 return 0; 840 *special = special_op; 841 } 842 else if (fmt[i] == 'E') 843 for (j = 0; j < XVECLEN (op, i); ++j) 844 { 845 unsigned int special_op = SH_NONE; 846 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op); 847 if (special_op == SH_NONE) 848 continue; 849 /* Ensure we never have two kinds of special handling 850 for the same insn. */ 851 if (*special != SH_NONE && *special != special_op) 852 return 0; 853 *special = special_op; 854 } 855 856 return ok; 857} 858 859/* Return 1 iff INSN is an operand that will not be affected by 860 having vector doublewords swapped in memory (in which case 861 *SPECIAL is unchanged), or that can be modified to be correct 862 if vector doublewords are swapped in memory (in which case 863 *SPECIAL is changed to a value indicating how). */ 864static unsigned int 865insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, 866 unsigned int *special) 867{ 868 /* Calls are always bad. */ 869 if (GET_CODE (insn) == CALL_INSN) 870 return 0; 871 872 /* Loads and stores seen here are not permuting, but we can still 873 fix them up by converting them to permuting ones. Exceptions: 874 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL 875 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC 876 for the SET source. Also we must now make an exception for lvx 877 and stvx when they are not in the UNSPEC_LVX/STVX form (with the 878 explicit "& -16") since this leads to unrecognizable insns. */ 879 rtx body = PATTERN (insn); 880 int i = INSN_UID (insn); 881 882 if (insn_entry[i].is_load) 883 { 884 if (GET_CODE (body) == SET) 885 { 886 rtx rhs = SET_SRC (body); 887 /* Even without a swap, the RHS might be a vec_select for, say, 888 a byte-reversing load. */ 889 if (!MEM_P (rhs)) 890 return 0; 891 if (GET_CODE (XEXP (rhs, 0)) == AND) 892 return 0; 893 894 *special = SH_NOSWAP_LD; 895 return 1; 896 } 897 else 898 return 0; 899 } 900 901 if (insn_entry[i].is_store) 902 { 903 if (GET_CODE (body) == SET 904 && GET_CODE (SET_SRC (body)) != UNSPEC 905 && GET_CODE (SET_SRC (body)) != VEC_SELECT) 906 { 907 rtx lhs = SET_DEST (body); 908 /* Even without a swap, the RHS might be a vec_select for, say, 909 a byte-reversing store. */ 910 if (!MEM_P (lhs)) 911 return 0; 912 if (GET_CODE (XEXP (lhs, 0)) == AND) 913 return 0; 914 915 *special = SH_NOSWAP_ST; 916 return 1; 917 } 918 else 919 return 0; 920 } 921 922 /* A convert to single precision can be left as is provided that 923 all of its uses are in xxspltw instructions that splat BE element 924 zero. */ 925 if (GET_CODE (body) == SET 926 && GET_CODE (SET_SRC (body)) == UNSPEC 927 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN) 928 { 929 df_ref def; 930 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 931 932 FOR_EACH_INSN_INFO_DEF (def, insn_info) 933 { 934 struct df_link *link = DF_REF_CHAIN (def); 935 if (!link) 936 return 0; 937 938 for (; link; link = link->next) { 939 rtx use_insn = DF_REF_INSN (link->ref); 940 rtx use_body = PATTERN (use_insn); 941 if (GET_CODE (use_body) != SET 942 || GET_CODE (SET_SRC (use_body)) != UNSPEC 943 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW 944 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx) 945 return 0; 946 } 947 } 948 949 return 1; 950 } 951 952 /* A concatenation of two doublewords is ok if we reverse the 953 order of the inputs. */ 954 if (GET_CODE (body) == SET 955 && GET_CODE (SET_SRC (body)) == VEC_CONCAT 956 && (GET_MODE (SET_SRC (body)) == V2DFmode 957 || GET_MODE (SET_SRC (body)) == V2DImode)) 958 { 959 *special = SH_CONCAT; 960 return 1; 961 } 962 963 /* V2DF reductions are always swappable. */ 964 if (GET_CODE (body) == PARALLEL) 965 { 966 rtx expr = XVECEXP (body, 0, 0); 967 if (GET_CODE (expr) == SET 968 && v2df_reduction_p (SET_SRC (expr))) 969 return 1; 970 } 971 972 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the 973 constant pool. */ 974 if (GET_CODE (body) == SET 975 && GET_CODE (SET_SRC (body)) == UNSPEC 976 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM 977 && XVECLEN (SET_SRC (body), 0) == 3 978 && REG_P (XVECEXP (SET_SRC (body), 0, 2))) 979 { 980 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2); 981 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 982 df_ref use; 983 FOR_EACH_INSN_INFO_USE (use, insn_info) 984 if (rtx_equal_p (DF_REF_REG (use), mask_reg)) 985 { 986 struct df_link *def_link = DF_REF_CHAIN (use); 987 /* Punt if multiple definitions for this reg. */ 988 if (def_link && !def_link->next && 989 const_load_sequence_p (insn_entry, 990 DF_REF_INSN (def_link->ref))) 991 { 992 *special = SH_VPERM; 993 return 1; 994 } 995 } 996 } 997 998 /* Otherwise check the operands for vector lane violations. */ 999 return rtx_is_swappable_p (body, special); 1000} 1001 1002enum chain_purpose { FOR_LOADS, FOR_STORES }; 1003 1004/* Return true if the UD or DU chain headed by LINK is non-empty, 1005 and every entry on the chain references an insn that is a 1006 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such 1007 register swap must have only permuting loads as reaching defs. 1008 If PURPOSE is FOR_STORES, each such register swap must have only 1009 register swaps or permuting stores as reached uses. */ 1010static bool 1011chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link, 1012 enum chain_purpose purpose) 1013{ 1014 if (!link) 1015 return false; 1016 1017 for (; link; link = link->next) 1018 { 1019 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref)))) 1020 continue; 1021 1022 if (DF_REF_IS_ARTIFICIAL (link->ref)) 1023 return false; 1024 1025 rtx reached_insn = DF_REF_INSN (link->ref); 1026 unsigned uid = INSN_UID (reached_insn); 1027 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn); 1028 1029 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load 1030 || insn_entry[uid].is_store) 1031 return false; 1032 1033 if (purpose == FOR_LOADS) 1034 { 1035 df_ref use; 1036 FOR_EACH_INSN_INFO_USE (use, insn_info) 1037 { 1038 struct df_link *swap_link = DF_REF_CHAIN (use); 1039 1040 while (swap_link) 1041 { 1042 if (DF_REF_IS_ARTIFICIAL (link->ref)) 1043 return false; 1044 1045 rtx swap_def_insn = DF_REF_INSN (swap_link->ref); 1046 unsigned uid2 = INSN_UID (swap_def_insn); 1047 1048 /* Only permuting loads are allowed. */ 1049 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load) 1050 return false; 1051 1052 swap_link = swap_link->next; 1053 } 1054 } 1055 } 1056 else if (purpose == FOR_STORES) 1057 { 1058 df_ref def; 1059 FOR_EACH_INSN_INFO_DEF (def, insn_info) 1060 { 1061 struct df_link *swap_link = DF_REF_CHAIN (def); 1062 1063 while (swap_link) 1064 { 1065 if (DF_REF_IS_ARTIFICIAL (link->ref)) 1066 return false; 1067 1068 rtx swap_use_insn = DF_REF_INSN (swap_link->ref); 1069 unsigned uid2 = INSN_UID (swap_use_insn); 1070 1071 /* Permuting stores or register swaps are allowed. */ 1072 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load) 1073 return false; 1074 1075 swap_link = swap_link->next; 1076 } 1077 } 1078 } 1079 } 1080 1081 return true; 1082} 1083 1084/* Mark the xxswapdi instructions associated with permuting loads and 1085 stores for removal. Note that we only flag them for deletion here, 1086 as there is a possibility of a swap being reached from multiple 1087 loads, etc. */ 1088static void 1089mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i) 1090{ 1091 rtx insn = insn_entry[i].insn; 1092 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 1093 1094 if (insn_entry[i].is_load) 1095 { 1096 df_ref def; 1097 FOR_EACH_INSN_INFO_DEF (def, insn_info) 1098 { 1099 struct df_link *link = DF_REF_CHAIN (def); 1100 1101 /* We know by now that these are swaps, so we can delete 1102 them confidently. */ 1103 while (link) 1104 { 1105 rtx use_insn = DF_REF_INSN (link->ref); 1106 insn_entry[INSN_UID (use_insn)].will_delete = 1; 1107 link = link->next; 1108 } 1109 } 1110 } 1111 else if (insn_entry[i].is_store) 1112 { 1113 df_ref use; 1114 FOR_EACH_INSN_INFO_USE (use, insn_info) 1115 { 1116 /* Ignore uses for addressability. */ 1117 machine_mode mode = GET_MODE (DF_REF_REG (use)); 1118 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode)) 1119 continue; 1120 1121 struct df_link *link = DF_REF_CHAIN (use); 1122 1123 /* We know by now that these are swaps, so we can delete 1124 them confidently. */ 1125 while (link) 1126 { 1127 rtx def_insn = DF_REF_INSN (link->ref); 1128 insn_entry[INSN_UID (def_insn)].will_delete = 1; 1129 link = link->next; 1130 } 1131 } 1132 } 1133} 1134 1135/* *OP_PTR is either a CONST_VECTOR or an expression containing one. 1136 Swap the first half of the vector with the second in the first 1137 case. Recurse to find it in the second. */ 1138static void 1139swap_const_vector_halves (rtx *op_ptr) 1140{ 1141 int i; 1142 rtx op = *op_ptr; 1143 enum rtx_code code = GET_CODE (op); 1144 if (GET_CODE (op) == CONST_VECTOR) 1145 { 1146 int units = GET_MODE_NUNITS (GET_MODE (op)); 1147 rtx_vector_builder builder (GET_MODE (op), units, 1); 1148 for (i = 0; i < units / 2; ++i) 1149 builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2)); 1150 for (i = 0; i < units / 2; ++i) 1151 builder.quick_push (CONST_VECTOR_ELT (op, i)); 1152 *op_ptr = builder.build (); 1153 } 1154 else 1155 { 1156 int j; 1157 const char *fmt = GET_RTX_FORMAT (code); 1158 for (i = 0; i < GET_RTX_LENGTH (code); ++i) 1159 if (fmt[i] == 'e' || fmt[i] == 'u') 1160 swap_const_vector_halves (&XEXP (op, i)); 1161 else if (fmt[i] == 'E') 1162 for (j = 0; j < XVECLEN (op, i); ++j) 1163 swap_const_vector_halves (&XVECEXP (op, i, j)); 1164 } 1165} 1166 1167/* Find all subregs of a vector expression that perform a narrowing, 1168 and adjust the subreg index to account for doubleword swapping. */ 1169static void 1170adjust_subreg_index (rtx op) 1171{ 1172 enum rtx_code code = GET_CODE (op); 1173 if (code == SUBREG 1174 && (GET_MODE_SIZE (GET_MODE (op)) 1175 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))) 1176 { 1177 unsigned int index = SUBREG_BYTE (op); 1178 if (index < 8) 1179 index += 8; 1180 else 1181 index -= 8; 1182 SUBREG_BYTE (op) = index; 1183 } 1184 1185 const char *fmt = GET_RTX_FORMAT (code); 1186 int i,j; 1187 for (i = 0; i < GET_RTX_LENGTH (code); ++i) 1188 if (fmt[i] == 'e' || fmt[i] == 'u') 1189 adjust_subreg_index (XEXP (op, i)); 1190 else if (fmt[i] == 'E') 1191 for (j = 0; j < XVECLEN (op, i); ++j) 1192 adjust_subreg_index (XVECEXP (op, i, j)); 1193} 1194 1195/* Convert the non-permuting load INSN to a permuting one. */ 1196static void 1197permute_load (rtx_insn *insn) 1198{ 1199 rtx body = PATTERN (insn); 1200 rtx mem_op = SET_SRC (body); 1201 rtx tgt_reg = SET_DEST (body); 1202 machine_mode mode = GET_MODE (tgt_reg); 1203 int n_elts = GET_MODE_NUNITS (mode); 1204 int half_elts = n_elts / 2; 1205 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); 1206 int i, j; 1207 for (i = 0, j = half_elts; i < half_elts; ++i, ++j) 1208 XVECEXP (par, 0, i) = GEN_INT (j); 1209 for (i = half_elts, j = 0; j < half_elts; ++i, ++j) 1210 XVECEXP (par, 0, i) = GEN_INT (j); 1211 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par); 1212 SET_SRC (body) = sel; 1213 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1214 df_insn_rescan (insn); 1215 1216 if (dump_file) 1217 fprintf (dump_file, "Replacing load %d with permuted load\n", 1218 INSN_UID (insn)); 1219} 1220 1221/* Convert the non-permuting store INSN to a permuting one. */ 1222static void 1223permute_store (rtx_insn *insn) 1224{ 1225 rtx body = PATTERN (insn); 1226 rtx src_reg = SET_SRC (body); 1227 machine_mode mode = GET_MODE (src_reg); 1228 int n_elts = GET_MODE_NUNITS (mode); 1229 int half_elts = n_elts / 2; 1230 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); 1231 int i, j; 1232 for (i = 0, j = half_elts; i < half_elts; ++i, ++j) 1233 XVECEXP (par, 0, i) = GEN_INT (j); 1234 for (i = half_elts, j = 0; j < half_elts; ++i, ++j) 1235 XVECEXP (par, 0, i) = GEN_INT (j); 1236 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par); 1237 SET_SRC (body) = sel; 1238 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1239 df_insn_rescan (insn); 1240 1241 if (dump_file) 1242 fprintf (dump_file, "Replacing store %d with permuted store\n", 1243 INSN_UID (insn)); 1244} 1245 1246/* Given OP that contains a vector extract operation, adjust the index 1247 of the extracted lane to account for the doubleword swap. */ 1248static void 1249adjust_extract (rtx_insn *insn) 1250{ 1251 rtx pattern = PATTERN (insn); 1252 if (GET_CODE (pattern) == PARALLEL) 1253 pattern = XVECEXP (pattern, 0, 0); 1254 rtx src = SET_SRC (pattern); 1255 /* The vec_select may be wrapped in a vec_duplicate for a splat, so 1256 account for that. */ 1257 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src; 1258 rtx par = XEXP (sel, 1); 1259 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1; 1260 int lane = INTVAL (XVECEXP (par, 0, 0)); 1261 lane = lane >= half_elts ? lane - half_elts : lane + half_elts; 1262 XVECEXP (par, 0, 0) = GEN_INT (lane); 1263 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1264 df_insn_rescan (insn); 1265 1266 if (dump_file) 1267 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn)); 1268} 1269 1270/* Given OP that contains a vector direct-splat operation, adjust the index 1271 of the source lane to account for the doubleword swap. */ 1272static void 1273adjust_splat (rtx_insn *insn) 1274{ 1275 rtx body = PATTERN (insn); 1276 rtx unspec = XEXP (body, 1); 1277 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1; 1278 int lane = INTVAL (XVECEXP (unspec, 0, 1)); 1279 lane = lane >= half_elts ? lane - half_elts : lane + half_elts; 1280 XVECEXP (unspec, 0, 1) = GEN_INT (lane); 1281 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1282 df_insn_rescan (insn); 1283 1284 if (dump_file) 1285 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); 1286} 1287 1288/* Given OP that contains an XXPERMDI operation (that is not a doubleword 1289 swap), reverse the order of the source operands and adjust the indices 1290 of the source lanes to account for doubleword reversal. */ 1291static void 1292adjust_xxpermdi (rtx_insn *insn) 1293{ 1294 rtx set = PATTERN (insn); 1295 rtx select = XEXP (set, 1); 1296 rtx concat = XEXP (select, 0); 1297 rtx src0 = XEXP (concat, 0); 1298 XEXP (concat, 0) = XEXP (concat, 1); 1299 XEXP (concat, 1) = src0; 1300 rtx parallel = XEXP (select, 1); 1301 int lane0 = INTVAL (XVECEXP (parallel, 0, 0)); 1302 int lane1 = INTVAL (XVECEXP (parallel, 0, 1)); 1303 int new_lane0 = 3 - lane1; 1304 int new_lane1 = 3 - lane0; 1305 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0); 1306 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1); 1307 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1308 df_insn_rescan (insn); 1309 1310 if (dump_file) 1311 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn)); 1312} 1313 1314/* Given OP that contains a VEC_CONCAT operation of two doublewords, 1315 reverse the order of those inputs. */ 1316static void 1317adjust_concat (rtx_insn *insn) 1318{ 1319 rtx set = PATTERN (insn); 1320 rtx concat = XEXP (set, 1); 1321 rtx src0 = XEXP (concat, 0); 1322 XEXP (concat, 0) = XEXP (concat, 1); 1323 XEXP (concat, 1) = src0; 1324 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1325 df_insn_rescan (insn); 1326 1327 if (dump_file) 1328 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); 1329} 1330 1331/* Given an UNSPEC_VPERM insn, modify the mask loaded from the 1332 constant pool to reflect swapped doublewords. */ 1333static void 1334adjust_vperm (rtx_insn *insn) 1335{ 1336 /* We previously determined that the UNSPEC_VPERM was fed by a 1337 swap of a swapping load of a TOC-relative constant pool symbol. 1338 Find the MEM in the swapping load and replace it with a MEM for 1339 the adjusted mask constant. */ 1340 rtx set = PATTERN (insn); 1341 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2); 1342 1343 /* Find the swap. */ 1344 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 1345 df_ref use; 1346 rtx_insn *swap_insn = 0; 1347 FOR_EACH_INSN_INFO_USE (use, insn_info) 1348 if (rtx_equal_p (DF_REF_REG (use), mask_reg)) 1349 { 1350 struct df_link *def_link = DF_REF_CHAIN (use); 1351 gcc_assert (def_link && !def_link->next); 1352 swap_insn = DF_REF_INSN (def_link->ref); 1353 break; 1354 } 1355 gcc_assert (swap_insn); 1356 1357 /* Find the load. */ 1358 insn_info = DF_INSN_INFO_GET (swap_insn); 1359 rtx_insn *load_insn = 0; 1360 FOR_EACH_INSN_INFO_USE (use, insn_info) 1361 { 1362 struct df_link *def_link = DF_REF_CHAIN (use); 1363 gcc_assert (def_link && !def_link->next); 1364 load_insn = DF_REF_INSN (def_link->ref); 1365 break; 1366 } 1367 gcc_assert (load_insn); 1368 1369 /* Find the TOC-relative symbol access. */ 1370 insn_info = DF_INSN_INFO_GET (load_insn); 1371 rtx_insn *tocrel_insn = 0; 1372 FOR_EACH_INSN_INFO_USE (use, insn_info) 1373 { 1374 struct df_link *def_link = DF_REF_CHAIN (use); 1375 gcc_assert (def_link && !def_link->next); 1376 tocrel_insn = DF_REF_INSN (def_link->ref); 1377 break; 1378 } 1379 gcc_assert (tocrel_insn); 1380 1381 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p 1382 to set tocrel_base; otherwise it would be unnecessary as we've 1383 already established it will return true. */ 1384 rtx base, offset; 1385 const_rtx tocrel_base; 1386 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn)); 1387 /* There is an extra level of indirection for small/large code models. */ 1388 if (MEM_P (tocrel_expr)) 1389 tocrel_expr = XEXP (tocrel_expr, 0); 1390 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL)) 1391 gcc_unreachable (); 1392 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); 1393 rtx const_vector = get_pool_constant (base); 1394 /* With the extra indirection, get_pool_constant will produce the 1395 real constant from the reg_equal expression, so get the real 1396 constant. */ 1397 if (SYMBOL_REF_P (const_vector)) 1398 const_vector = get_pool_constant (const_vector); 1399 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); 1400 1401 /* Create an adjusted mask from the initial mask. */ 1402 unsigned int new_mask[16], i, val; 1403 for (i = 0; i < 16; ++i) { 1404 val = INTVAL (XVECEXP (const_vector, 0, i)); 1405 if (val < 16) 1406 new_mask[i] = (val + 8) % 16; 1407 else 1408 new_mask[i] = ((val + 8) % 16) + 16; 1409 } 1410 1411 /* Create a new CONST_VECTOR and a MEM that references it. */ 1412 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); 1413 for (i = 0; i < 16; ++i) 1414 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]); 1415 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0)); 1416 rtx new_mem = force_const_mem (V16QImode, new_const_vector); 1417 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we 1418 can't recognize. Force the SYMBOL_REF into a register. */ 1419 if (!REG_P (XEXP (new_mem, 0))) { 1420 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); 1421 XEXP (new_mem, 0) = base_reg; 1422 /* Move the newly created insn ahead of the load insn. */ 1423 rtx_insn *force_insn = get_last_insn (); 1424 remove_insn (force_insn); 1425 rtx_insn *before_load_insn = PREV_INSN (load_insn); 1426 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); 1427 df_insn_rescan (before_load_insn); 1428 df_insn_rescan (force_insn); 1429 } 1430 1431 /* Replace the MEM in the load instruction and rescan it. */ 1432 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; 1433 INSN_CODE (load_insn) = -1; /* Force re-recognition. */ 1434 df_insn_rescan (load_insn); 1435 1436 if (dump_file) 1437 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn)); 1438} 1439 1440/* The insn described by INSN_ENTRY[I] can be swapped, but only 1441 with special handling. Take care of that here. */ 1442static void 1443handle_special_swappables (swap_web_entry *insn_entry, unsigned i) 1444{ 1445 rtx_insn *insn = insn_entry[i].insn; 1446 rtx body = PATTERN (insn); 1447 1448 switch (insn_entry[i].special_handling) 1449 { 1450 default: 1451 gcc_unreachable (); 1452 case SH_CONST_VECTOR: 1453 { 1454 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */ 1455 gcc_assert (GET_CODE (body) == SET); 1456 swap_const_vector_halves (&SET_SRC (body)); 1457 if (dump_file) 1458 fprintf (dump_file, "Swapping constant halves in insn %d\n", i); 1459 break; 1460 } 1461 case SH_SUBREG: 1462 /* A subreg of the same size is already safe. For subregs that 1463 select a smaller portion of a reg, adjust the index for 1464 swapped doublewords. */ 1465 adjust_subreg_index (body); 1466 if (dump_file) 1467 fprintf (dump_file, "Adjusting subreg in insn %d\n", i); 1468 break; 1469 case SH_NOSWAP_LD: 1470 /* Convert a non-permuting load to a permuting one. */ 1471 permute_load (insn); 1472 break; 1473 case SH_NOSWAP_ST: 1474 /* Convert a non-permuting store to a permuting one. */ 1475 permute_store (insn); 1476 break; 1477 case SH_EXTRACT: 1478 /* Change the lane on an extract operation. */ 1479 adjust_extract (insn); 1480 break; 1481 case SH_SPLAT: 1482 /* Change the lane on a direct-splat operation. */ 1483 adjust_splat (insn); 1484 break; 1485 case SH_XXPERMDI: 1486 /* Change the lanes on an XXPERMDI operation. */ 1487 adjust_xxpermdi (insn); 1488 break; 1489 case SH_CONCAT: 1490 /* Reverse the order of a concatenation operation. */ 1491 adjust_concat (insn); 1492 break; 1493 case SH_VPERM: 1494 /* Change the mask loaded from the constant pool for a VPERM. */ 1495 adjust_vperm (insn); 1496 break; 1497 } 1498} 1499 1500/* Find the insn from the Ith table entry, which is known to be a 1501 register swap Y = SWAP(X). Replace it with a copy Y = X. */ 1502static void 1503replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) 1504{ 1505 rtx_insn *insn = insn_entry[i].insn; 1506 rtx body = PATTERN (insn); 1507 rtx src_reg = XEXP (SET_SRC (body), 0); 1508 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg); 1509 rtx_insn *new_insn = emit_insn_before (copy, insn); 1510 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn)); 1511 df_insn_rescan (new_insn); 1512 1513 if (dump_file) 1514 { 1515 unsigned int new_uid = INSN_UID (new_insn); 1516 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid); 1517 } 1518 1519 df_insn_delete (insn); 1520 remove_insn (insn); 1521 insn->set_deleted (); 1522} 1523 1524/* INSN is known to contain a SUBREG, which we can normally handle, 1525 but if the SUBREG itself contains a MULT then we need to leave it alone 1526 to avoid turning a mult_hipart into a mult_lopart, for example. */ 1527static bool 1528has_part_mult (rtx_insn *insn) 1529{ 1530 rtx body = PATTERN (insn); 1531 if (GET_CODE (body) != SET) 1532 return false; 1533 rtx src = SET_SRC (body); 1534 if (GET_CODE (src) != SUBREG) 1535 return false; 1536 rtx inner = XEXP (src, 0); 1537 return (GET_CODE (inner) == MULT); 1538} 1539 1540/* Make NEW_MEM_EXP's attributes and flags resemble those of 1541 ORIGINAL_MEM_EXP. */ 1542static void 1543mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp) 1544{ 1545 RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump); 1546 RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call); 1547 RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging); 1548 RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil); 1549 RTX_FLAG (new_mem_exp, frame_related) = 1550 RTX_FLAG (original_mem_exp, frame_related); 1551 1552 /* The following fields may not be used with MEM subexpressions */ 1553 RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct); 1554 RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val); 1555 1556 struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp); 1557 1558 alias_set_type set = original_attrs.alias; 1559 set_mem_alias_set (new_mem_exp, set); 1560 1561 addr_space_t addrspace = original_attrs.addrspace; 1562 set_mem_addr_space (new_mem_exp, addrspace); 1563 1564 unsigned int align = original_attrs.align; 1565 set_mem_align (new_mem_exp, align); 1566 1567 tree expr = original_attrs.expr; 1568 set_mem_expr (new_mem_exp, expr); 1569 1570 if (original_attrs.offset_known_p) 1571 { 1572 HOST_WIDE_INT offset = original_attrs.offset; 1573 set_mem_offset (new_mem_exp, offset); 1574 } 1575 else 1576 clear_mem_offset (new_mem_exp); 1577 1578 if (original_attrs.size_known_p) 1579 { 1580 HOST_WIDE_INT size = original_attrs.size; 1581 set_mem_size (new_mem_exp, size); 1582 } 1583 else 1584 clear_mem_size (new_mem_exp); 1585} 1586 1587/* Generate an rtx expression to represent use of the stvx insn to store 1588 the value represented by register SRC_EXP into the memory at address 1589 DEST_EXP, with vector mode MODE. */ 1590rtx 1591rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) 1592{ 1593 rtx stvx; 1594 1595 if (mode == V16QImode) 1596 stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp); 1597 else if (mode == V8HImode) 1598 stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp); 1599#ifdef HAVE_V8HFmode 1600 else if (mode == V8HFmode) 1601 stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp); 1602#endif 1603 else if (mode == V4SImode) 1604 stvx = gen_altivec_stvx_v4si (src_exp, dest_exp); 1605 else if (mode == V4SFmode) 1606 stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp); 1607 else if (mode == V2DImode) 1608 stvx = gen_altivec_stvx_v2di (src_exp, dest_exp); 1609 else if (mode == V2DFmode) 1610 stvx = gen_altivec_stvx_v2df (src_exp, dest_exp); 1611 else if (mode == V1TImode) 1612 stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp); 1613 else 1614 /* KFmode, TFmode, other modes not expected in this context. */ 1615 gcc_unreachable (); 1616 1617 rtx new_mem_exp = SET_DEST (PATTERN (stvx)); 1618 mimic_memory_attributes_and_flags (new_mem_exp, dest_exp); 1619 return stvx; 1620} 1621 1622/* Given that STORE_INSN represents an aligned store-with-swap of a 1623 swapped value, replace the store with an aligned store (without 1624 swap) and replace the swap with a copy insn. */ 1625static void 1626replace_swapped_aligned_store (swap_web_entry *insn_entry, 1627 rtx_insn *store_insn) 1628{ 1629 unsigned uid = INSN_UID (store_insn); 1630 gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store); 1631 1632 rtx body = PATTERN (store_insn); 1633 rtx dest_address = XEXP (SET_DEST (body), 0); 1634 rtx swap_reg = XEXP (SET_SRC (body), 0); 1635 gcc_assert (REG_P (dest_address) 1636 || rs6000_sum_of_two_registers_p (dest_address)); 1637 1638 /* Find the swap instruction that provides the value to be stored by 1639 * this store-with-swap instruction. */ 1640 struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn); 1641 df_ref use; 1642 rtx_insn *swap_insn = NULL; 1643 unsigned uid2 = 0; 1644 FOR_EACH_INSN_INFO_USE (use, insn_info) 1645 { 1646 struct df_link *def_link = DF_REF_CHAIN (use); 1647 1648 /* if this is not the definition of the candidate swap register, 1649 then skip it. I am only interested in the swap insnd. */ 1650 if (!rtx_equal_p (DF_REF_REG (use), swap_reg)) 1651 continue; 1652 1653 /* If there is no def or the def is artifical or there are 1654 multiple defs, we should not be here. */ 1655 gcc_assert (def_link && def_link->ref && !def_link->next 1656 && !DF_REF_IS_ARTIFICIAL (def_link->ref)); 1657 1658 swap_insn = DF_REF_INSN (def_link->ref); 1659 uid2 = INSN_UID (swap_insn); 1660 1661 /* If this source value is not a simple swap, we should not be here. */ 1662 gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load 1663 && !insn_entry[uid2].is_store); 1664 1665 /* We've processed the use we care about, so break out of 1666 this loop. */ 1667 break; 1668 } 1669 1670 /* At this point, swap_insn and uid2 represent the swap instruction 1671 that feeds the store. */ 1672 gcc_assert (swap_insn); 1673 rtx set = single_set (store_insn); 1674 gcc_assert (set); 1675 rtx dest_exp = SET_DEST (set); 1676 rtx src_exp = XEXP (SET_SRC (body), 0); 1677 enum machine_mode mode = GET_MODE (dest_exp); 1678 gcc_assert (MEM_P (dest_exp)); 1679 gcc_assert (MEM_ALIGN (dest_exp) >= 128); 1680 1681 /* Replace the copy with a new insn. */ 1682 rtx stvx; 1683 stvx = rs6000_gen_stvx (mode, dest_exp, src_exp); 1684 1685 rtx_insn *new_insn = emit_insn_before (stvx, store_insn); 1686 rtx new_body = PATTERN (new_insn); 1687 1688 gcc_assert ((GET_CODE (new_body) == SET) 1689 && MEM_P (SET_DEST (new_body))); 1690 1691 basic_block bb = BLOCK_FOR_INSN (store_insn); 1692 set_block_for_insn (new_insn, bb); 1693 /* Handle REG_EH_REGION note. */ 1694 if (cfun->can_throw_non_call_exceptions && BB_END (bb) == store_insn) 1695 { 1696 rtx note = find_reg_note (store_insn, REG_EH_REGION, NULL_RTX); 1697 if (note) 1698 add_reg_note (new_insn, REG_EH_REGION, XEXP (note, 0)); 1699 } 1700 df_insn_rescan (new_insn); 1701 1702 df_insn_delete (store_insn); 1703 remove_insn (store_insn); 1704 store_insn->set_deleted (); 1705 1706 /* Replace the swap with a copy. */ 1707 uid2 = INSN_UID (swap_insn); 1708 mark_swaps_for_removal (insn_entry, uid2); 1709 replace_swap_with_copy (insn_entry, uid2); 1710} 1711 1712/* Generate an rtx expression to represent use of the lvx insn to load 1713 from memory SRC_EXP into register DEST_EXP with vector mode MODE. */ 1714rtx 1715rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) 1716{ 1717 rtx lvx; 1718 1719 if (mode == V16QImode) 1720 lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp); 1721 else if (mode == V8HImode) 1722 lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp); 1723#ifdef HAVE_V8HFmode 1724 else if (mode == V8HFmode) 1725 lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp); 1726#endif 1727 else if (mode == V4SImode) 1728 lvx = gen_altivec_lvx_v4si (dest_exp, src_exp); 1729 else if (mode == V4SFmode) 1730 lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp); 1731 else if (mode == V2DImode) 1732 lvx = gen_altivec_lvx_v2di (dest_exp, src_exp); 1733 else if (mode == V2DFmode) 1734 lvx = gen_altivec_lvx_v2df (dest_exp, src_exp); 1735 else if (mode == V1TImode) 1736 lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp); 1737 else 1738 /* KFmode, TFmode, other modes not expected in this context. */ 1739 gcc_unreachable (); 1740 1741 rtx new_mem_exp = SET_SRC (PATTERN (lvx)); 1742 mimic_memory_attributes_and_flags (new_mem_exp, src_exp); 1743 1744 return lvx; 1745} 1746 1747/* Given that SWAP_INSN represents a swap of an aligned 1748 load-with-swap, replace the load with an aligned load (without 1749 swap) and replace the swap with a copy insn. */ 1750static void 1751replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn) 1752{ 1753 /* Find the load. */ 1754 unsigned uid = INSN_UID (swap_insn); 1755 /* Only call this if quad_aligned_load_p (swap_insn). */ 1756 gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load); 1757 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn); 1758 1759 /* Since insn is known to represent a swap instruction, we know it 1760 "uses" only one input variable. */ 1761 df_ref use = DF_INSN_INFO_USES (insn_info); 1762 1763 /* Figure out where this input variable is defined. */ 1764 struct df_link *def_link = DF_REF_CHAIN (use); 1765 gcc_assert (def_link && !def_link->next); 1766 gcc_assert (def_link && def_link->ref && 1767 !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next); 1768 1769 rtx_insn *def_insn = DF_REF_INSN (def_link->ref); 1770 unsigned uid2 = INSN_UID (def_insn); 1771 1772 /* We're expecting a load-with-swap insn. */ 1773 gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap); 1774 1775 /* We expect this to be a set to memory, with source representing a 1776 swap (indicated by code VEC_SELECT). */ 1777 rtx body = PATTERN (def_insn); 1778 gcc_assert ((GET_CODE (body) == SET) 1779 && (GET_CODE (SET_SRC (body)) == VEC_SELECT 1780 || pattern_is_rotate64 (body)) 1781 && MEM_P (XEXP (SET_SRC (body), 0))); 1782 1783 rtx src_exp = XEXP (SET_SRC (body), 0); 1784 enum machine_mode mode = GET_MODE (src_exp); 1785 rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp); 1786 1787 rtx_insn *new_insn = emit_insn_before (lvx, def_insn); 1788 rtx new_body = PATTERN (new_insn); 1789 1790 gcc_assert ((GET_CODE (new_body) == SET) 1791 && MEM_P (SET_SRC (new_body))); 1792 1793 basic_block bb = BLOCK_FOR_INSN (def_insn); 1794 set_block_for_insn (new_insn, bb); 1795 /* Handle REG_EH_REGION note. */ 1796 if (cfun->can_throw_non_call_exceptions && BB_END (bb) == def_insn) 1797 { 1798 rtx note = find_reg_note (def_insn, REG_EH_REGION, NULL_RTX); 1799 if (note) 1800 add_reg_note (new_insn, REG_EH_REGION, XEXP (note, 0)); 1801 } 1802 df_insn_rescan (new_insn); 1803 1804 df_insn_delete (def_insn); 1805 remove_insn (def_insn); 1806 def_insn->set_deleted (); 1807 1808 /* Replace the swap with a copy. */ 1809 mark_swaps_for_removal (insn_entry, uid); 1810 replace_swap_with_copy (insn_entry, uid); 1811} 1812 1813/* Given that SWAP_INSN represents a swap of a load of a constant 1814 vector value, replace with a single instruction that loads a 1815 swapped variant of the original constant. 1816 1817 The "natural" representation of a byte array in memory is the same 1818 for big endian and little endian. 1819 1820 unsigned char byte_array[] = 1821 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }; 1822 1823 However, when loaded into a vector register, the representation 1824 depends on endian conventions. 1825 1826 In big-endian mode, the register holds: 1827 1828 MSB LSB 1829 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ] 1830 1831 In little-endian mode, the register holds: 1832 1833 MSB LSB 1834 [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ] 1835 1836 Word arrays require different handling. Consider the word array: 1837 1838 unsigned int word_array[] = 1839 { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f }; 1840 1841 The in-memory representation depends on endian configuration. The 1842 equivalent array, declared as a byte array, in memory would be: 1843 1844 unsigned char big_endian_word_array_data[] = 1845 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f } 1846 1847 unsigned char little_endian_word_array_data[] = 1848 { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c } 1849 1850 In big-endian mode, the register holds: 1851 1852 MSB LSB 1853 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ] 1854 1855 In little-endian mode, the register holds: 1856 1857 MSB LSB 1858 [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ] 1859 1860 1861 Similar transformations apply to the vector of half-word and vector 1862 of double-word representations. 1863 1864 For now, don't handle vectors of quad-precision values. Just return. 1865 A better solution is to fix the code generator to emit lvx/stvx for 1866 those. */ 1867static void 1868replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn) 1869{ 1870 /* Find the load. */ 1871 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn); 1872 rtx_insn *load_insn; 1873 df_ref use = DF_INSN_INFO_USES (insn_info); 1874 struct df_link *def_link = DF_REF_CHAIN (use); 1875 gcc_assert (def_link && !def_link->next); 1876 1877 load_insn = DF_REF_INSN (def_link->ref); 1878 gcc_assert (load_insn); 1879 1880 /* Find the TOC-relative symbol access. */ 1881 insn_info = DF_INSN_INFO_GET (load_insn); 1882 use = DF_INSN_INFO_USES (insn_info); 1883 1884 def_link = DF_REF_CHAIN (use); 1885 gcc_assert (def_link && !def_link->next); 1886 1887 rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref); 1888 gcc_assert (tocrel_insn); 1889 1890 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p 1891 to set tocrel_base; otherwise it would be unnecessary as we've 1892 already established it will return true. */ 1893 rtx base, offset; 1894 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn)); 1895 const_rtx tocrel_base; 1896 1897 /* There is an extra level of indirection for small/large code models. */ 1898 if (MEM_P (tocrel_expr)) 1899 tocrel_expr = XEXP (tocrel_expr, 0); 1900 1901 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL)) 1902 gcc_unreachable (); 1903 1904 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); 1905 rtx const_vector = get_pool_constant (base); 1906 1907 /* With the extra indirection, get_pool_constant will produce the 1908 real constant from the reg_equal expression, so get the real 1909 constant. */ 1910 if (SYMBOL_REF_P (const_vector)) 1911 const_vector = get_pool_constant (const_vector); 1912 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); 1913 1914 rtx new_mem; 1915 enum machine_mode mode = GET_MODE (const_vector); 1916 1917 /* Create an adjusted constant from the original constant. */ 1918 if (mode == V1TImode) 1919 /* Leave this code as is. */ 1920 return; 1921 else if (mode == V16QImode) 1922 { 1923 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16)); 1924 int i; 1925 1926 for (i = 0; i < 16; i++) 1927 XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i); 1928 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 1929 new_mem = force_const_mem (mode, new_const_vector); 1930 } 1931 else if ((mode == V8HImode) 1932#ifdef HAVE_V8HFmode 1933 || (mode == V8HFmode) 1934#endif 1935 ) 1936 { 1937 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8)); 1938 int i; 1939 1940 for (i = 0; i < 8; i++) 1941 XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i); 1942 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 1943 new_mem = force_const_mem (mode, new_const_vector); 1944 } 1945 else if ((mode == V4SImode) || (mode == V4SFmode)) 1946 { 1947 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4)); 1948 int i; 1949 1950 for (i = 0; i < 4; i++) 1951 XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i); 1952 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 1953 new_mem = force_const_mem (mode, new_const_vector); 1954 } 1955 else if ((mode == V2DImode) || (mode == V2DFmode)) 1956 { 1957 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2)); 1958 int i; 1959 1960 for (i = 0; i < 2; i++) 1961 XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i); 1962 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 1963 new_mem = force_const_mem (mode, new_const_vector); 1964 } 1965 else 1966 { 1967 /* We do not expect other modes to be constant-load-swapped. */ 1968 gcc_unreachable (); 1969 } 1970 1971 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we 1972 can't recognize. Force the SYMBOL_REF into a register. */ 1973 if (!REG_P (XEXP (new_mem, 0))) { 1974 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); 1975 XEXP (new_mem, 0) = base_reg; 1976 1977 /* Move the newly created insn ahead of the load insn. */ 1978 /* The last insn is the insn that forced new_mem into a register. */ 1979 rtx_insn *force_insn = get_last_insn (); 1980 /* Remove this insn from the end of the instruction sequence. */ 1981 remove_insn (force_insn); 1982 rtx_insn *before_load_insn = PREV_INSN (load_insn); 1983 1984 /* And insert this insn back into the sequence before the previous 1985 load insn so this new expression will be available when the 1986 existing load is modified to load the swapped constant. */ 1987 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); 1988 df_insn_rescan (before_load_insn); 1989 df_insn_rescan (force_insn); 1990 } 1991 1992 /* Replace the MEM in the load instruction and rescan it. */ 1993 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; 1994 INSN_CODE (load_insn) = -1; /* Force re-recognition. */ 1995 df_insn_rescan (load_insn); 1996 1997 unsigned int uid = INSN_UID (swap_insn); 1998 mark_swaps_for_removal (insn_entry, uid); 1999 replace_swap_with_copy (insn_entry, uid); 2000} 2001 2002/* Dump the swap table to DUMP_FILE. */ 2003static void 2004dump_swap_insn_table (swap_web_entry *insn_entry) 2005{ 2006 int e = get_max_uid (); 2007 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n"); 2008 2009 for (int i = 0; i < e; ++i) 2010 if (insn_entry[i].is_relevant) 2011 { 2012 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred (); 2013 fprintf (dump_file, "%6d %6d ", i, 2014 pred_entry && pred_entry->insn 2015 ? INSN_UID (pred_entry->insn) : 0); 2016 if (insn_entry[i].is_load) 2017 fputs ("load ", dump_file); 2018 if (insn_entry[i].is_store) 2019 fputs ("store ", dump_file); 2020 if (insn_entry[i].is_swap) 2021 fputs ("swap ", dump_file); 2022 if (insn_entry[i].is_live_in) 2023 fputs ("live-in ", dump_file); 2024 if (insn_entry[i].is_live_out) 2025 fputs ("live-out ", dump_file); 2026 if (insn_entry[i].contains_subreg) 2027 fputs ("subreg ", dump_file); 2028 if (insn_entry[i].is_128_int) 2029 fputs ("int128 ", dump_file); 2030 if (insn_entry[i].is_call) 2031 fputs ("call ", dump_file); 2032 if (insn_entry[i].is_swappable) 2033 { 2034 fputs ("swappable ", dump_file); 2035 if (insn_entry[i].special_handling == SH_CONST_VECTOR) 2036 fputs ("special:constvec ", dump_file); 2037 else if (insn_entry[i].special_handling == SH_SUBREG) 2038 fputs ("special:subreg ", dump_file); 2039 else if (insn_entry[i].special_handling == SH_NOSWAP_LD) 2040 fputs ("special:load ", dump_file); 2041 else if (insn_entry[i].special_handling == SH_NOSWAP_ST) 2042 fputs ("special:store ", dump_file); 2043 else if (insn_entry[i].special_handling == SH_EXTRACT) 2044 fputs ("special:extract ", dump_file); 2045 else if (insn_entry[i].special_handling == SH_SPLAT) 2046 fputs ("special:splat ", dump_file); 2047 else if (insn_entry[i].special_handling == SH_XXPERMDI) 2048 fputs ("special:xxpermdi ", dump_file); 2049 else if (insn_entry[i].special_handling == SH_CONCAT) 2050 fputs ("special:concat ", dump_file); 2051 else if (insn_entry[i].special_handling == SH_VPERM) 2052 fputs ("special:vperm ", dump_file); 2053 } 2054 if (insn_entry[i].web_not_optimizable) 2055 fputs ("unoptimizable ", dump_file); 2056 if (insn_entry[i].will_delete) 2057 fputs ("delete ", dump_file); 2058 fputs ("\n", dump_file); 2059 } 2060 fputs ("\n", dump_file); 2061} 2062 2063/* Return RTX with its address canonicalized to (reg) or (+ reg reg). 2064 Here RTX is an (& addr (const_int -16)). Always return a new copy 2065 to avoid problems with combine. */ 2066static rtx 2067alignment_with_canonical_addr (rtx align) 2068{ 2069 rtx canon; 2070 rtx addr = XEXP (align, 0); 2071 2072 if (REG_P (addr)) 2073 canon = addr; 2074 2075 else if (GET_CODE (addr) == PLUS) 2076 { 2077 rtx addrop0 = XEXP (addr, 0); 2078 rtx addrop1 = XEXP (addr, 1); 2079 2080 if (!REG_P (addrop0)) 2081 addrop0 = force_reg (GET_MODE (addrop0), addrop0); 2082 2083 if (!REG_P (addrop1)) 2084 addrop1 = force_reg (GET_MODE (addrop1), addrop1); 2085 2086 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1); 2087 } 2088 2089 else 2090 canon = force_reg (GET_MODE (addr), addr); 2091 2092 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16)); 2093} 2094 2095/* Check whether an rtx is an alignment mask, and if so, return 2096 a fully-expanded rtx for the masking operation. */ 2097static rtx 2098alignment_mask (rtx_insn *insn) 2099{ 2100 rtx body = PATTERN (insn); 2101 2102 if (GET_CODE (body) != SET 2103 || GET_CODE (SET_SRC (body)) != AND 2104 || !REG_P (XEXP (SET_SRC (body), 0))) 2105 return 0; 2106 2107 rtx mask = XEXP (SET_SRC (body), 1); 2108 2109 if (CONST_INT_P (mask)) 2110 { 2111 if (INTVAL (mask) == -16) 2112 return alignment_with_canonical_addr (SET_SRC (body)); 2113 else 2114 return 0; 2115 } 2116 2117 if (!REG_P (mask)) 2118 return 0; 2119 2120 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2121 df_ref use; 2122 rtx real_mask = 0; 2123 2124 FOR_EACH_INSN_INFO_USE (use, insn_info) 2125 { 2126 if (!rtx_equal_p (DF_REF_REG (use), mask)) 2127 continue; 2128 2129 struct df_link *def_link = DF_REF_CHAIN (use); 2130 if (!def_link || def_link->next) 2131 return 0; 2132 2133 rtx_insn *const_insn = DF_REF_INSN (def_link->ref); 2134 rtx const_body = PATTERN (const_insn); 2135 if (GET_CODE (const_body) != SET) 2136 return 0; 2137 2138 real_mask = SET_SRC (const_body); 2139 2140 if (!CONST_INT_P (real_mask) 2141 || INTVAL (real_mask) != -16) 2142 return 0; 2143 } 2144 2145 if (real_mask == 0) 2146 return 0; 2147 2148 return alignment_with_canonical_addr (SET_SRC (body)); 2149} 2150 2151/* Given INSN that's a load or store based at BASE_REG, check if 2152 all of its feeding computations align its address on a 16-byte 2153 boundary. If so, return true and add all definition insns into 2154 AND_INSNS and their corresponding fully-expanded rtxes for the 2155 masking operations into AND_OPS. */ 2156 2157static bool 2158find_alignment_op (rtx_insn *insn, rtx base_reg, vec<rtx_insn *> *and_insns, 2159 vec<rtx> *and_ops) 2160{ 2161 df_ref base_use; 2162 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2163 rtx and_operation = 0; 2164 2165 FOR_EACH_INSN_INFO_USE (base_use, insn_info) 2166 { 2167 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) 2168 continue; 2169 2170 struct df_link *base_def_link = DF_REF_CHAIN (base_use); 2171 if (!base_def_link) 2172 return false; 2173 2174 while (base_def_link) 2175 { 2176 /* With stack-protector code enabled, and possibly in other 2177 circumstances, there may not be an associated insn for 2178 the def. */ 2179 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) 2180 return false; 2181 2182 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref); 2183 and_operation = alignment_mask (and_insn); 2184 2185 /* Stop if we find any one which doesn't align. */ 2186 if (!and_operation) 2187 return false; 2188 2189 and_insns->safe_push (and_insn); 2190 and_ops->safe_push (and_operation); 2191 base_def_link = base_def_link->next; 2192 } 2193 } 2194 2195 return and_operation; 2196} 2197 2198struct del_info { bool replace; rtx_insn *replace_insn; }; 2199 2200/* If INSN is the load for an lvx pattern, put it in canonical form. */ 2201static void 2202recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) 2203{ 2204 rtx body = PATTERN (insn); 2205 gcc_assert (GET_CODE (body) == SET 2206 && (GET_CODE (SET_SRC (body)) == VEC_SELECT 2207 || pattern_is_rotate64 (body)) 2208 && MEM_P (XEXP (SET_SRC (body), 0))); 2209 2210 rtx mem = XEXP (SET_SRC (body), 0); 2211 rtx base_reg = XEXP (mem, 0); 2212 2213 auto_vec<rtx_insn *> and_insns; 2214 auto_vec<rtx> and_ops; 2215 bool is_any_def_and 2216 = find_alignment_op (insn, base_reg, &and_insns, &and_ops); 2217 2218 if (is_any_def_and) 2219 { 2220 gcc_assert (and_insns.length () == and_ops.length ()); 2221 df_ref def; 2222 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2223 FOR_EACH_INSN_INFO_DEF (def, insn_info) 2224 { 2225 struct df_link *link = DF_REF_CHAIN (def); 2226 if (!link || link->next) 2227 break; 2228 2229 rtx_insn *swap_insn = DF_REF_INSN (link->ref); 2230 if (!insn_is_swap_p (swap_insn) 2231 || insn_is_load_p (swap_insn) 2232 || insn_is_store_p (swap_insn)) 2233 break; 2234 2235 /* Expected lvx pattern found. Change the swap to 2236 a copy, and propagate the AND operation into the 2237 load. */ 2238 to_delete[INSN_UID (swap_insn)].replace = true; 2239 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; 2240 2241 rtx new_reg = 0; 2242 rtx and_mask = 0; 2243 for (unsigned i = 0; i < and_insns.length (); i++) 2244 { 2245 /* However, first we must be sure that we make the 2246 base register from the AND operation available 2247 in case the register has been overwritten. Copy 2248 the base register to a new pseudo and use that 2249 as the base register of the AND operation in 2250 the new LVX instruction. */ 2251 rtx_insn *and_insn = and_insns[i]; 2252 rtx and_op = and_ops[i]; 2253 rtx and_base = XEXP (and_op, 0); 2254 if (!new_reg) 2255 { 2256 new_reg = gen_reg_rtx (GET_MODE (and_base)); 2257 and_mask = XEXP (and_op, 1); 2258 } 2259 rtx copy = gen_rtx_SET (new_reg, and_base); 2260 rtx_insn *new_insn = emit_insn_after (copy, and_insn); 2261 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); 2262 df_insn_rescan (new_insn); 2263 } 2264 2265 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask); 2266 SET_SRC (body) = mem; 2267 INSN_CODE (insn) = -1; /* Force re-recognition. */ 2268 df_insn_rescan (insn); 2269 2270 if (dump_file) 2271 fprintf (dump_file, "lvx opportunity found at %d\n", 2272 INSN_UID (insn)); 2273 } 2274 } 2275} 2276 2277/* If INSN is the store for an stvx pattern, put it in canonical form. */ 2278static void 2279recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) 2280{ 2281 rtx body = PATTERN (insn); 2282 gcc_assert (GET_CODE (body) == SET 2283 && MEM_P (SET_DEST (body)) 2284 && (GET_CODE (SET_SRC (body)) == VEC_SELECT 2285 || pattern_is_rotate64 (body))); 2286 rtx mem = SET_DEST (body); 2287 rtx base_reg = XEXP (mem, 0); 2288 2289 auto_vec<rtx_insn *> and_insns; 2290 auto_vec<rtx> and_ops; 2291 bool is_any_def_and 2292 = find_alignment_op (insn, base_reg, &and_insns, &and_ops); 2293 2294 if (is_any_def_and) 2295 { 2296 gcc_assert (and_insns.length () == and_ops.length ()); 2297 rtx src_reg = XEXP (SET_SRC (body), 0); 2298 df_ref src_use; 2299 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2300 FOR_EACH_INSN_INFO_USE (src_use, insn_info) 2301 { 2302 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg)) 2303 continue; 2304 2305 struct df_link *link = DF_REF_CHAIN (src_use); 2306 if (!link || link->next) 2307 break; 2308 2309 rtx_insn *swap_insn = DF_REF_INSN (link->ref); 2310 if (!insn_is_swap_p (swap_insn) 2311 || insn_is_load_p (swap_insn) 2312 || insn_is_store_p (swap_insn)) 2313 break; 2314 2315 /* Expected stvx pattern found. Change the swap to 2316 a copy, and propagate the AND operation into the 2317 store. */ 2318 to_delete[INSN_UID (swap_insn)].replace = true; 2319 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; 2320 2321 rtx new_reg = 0; 2322 rtx and_mask = 0; 2323 for (unsigned i = 0; i < and_insns.length (); i++) 2324 { 2325 /* However, first we must be sure that we make the 2326 base register from the AND operation available 2327 in case the register has been overwritten. Copy 2328 the base register to a new pseudo and use that 2329 as the base register of the AND operation in 2330 the new STVX instruction. */ 2331 rtx_insn *and_insn = and_insns[i]; 2332 rtx and_op = and_ops[i]; 2333 rtx and_base = XEXP (and_op, 0); 2334 if (!new_reg) 2335 { 2336 new_reg = gen_reg_rtx (GET_MODE (and_base)); 2337 and_mask = XEXP (and_op, 1); 2338 } 2339 rtx copy = gen_rtx_SET (new_reg, and_base); 2340 rtx_insn *new_insn = emit_insn_after (copy, and_insn); 2341 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); 2342 df_insn_rescan (new_insn); 2343 } 2344 2345 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask); 2346 SET_SRC (body) = src_reg; 2347 INSN_CODE (insn) = -1; /* Force re-recognition. */ 2348 df_insn_rescan (insn); 2349 2350 if (dump_file) 2351 fprintf (dump_file, "stvx opportunity found at %d\n", 2352 INSN_UID (insn)); 2353 } 2354 } 2355} 2356 2357/* Look for patterns created from builtin lvx and stvx calls, and 2358 canonicalize them to be properly recognized as such. */ 2359static void 2360recombine_lvx_stvx_patterns (function *fun) 2361{ 2362 int i; 2363 basic_block bb; 2364 rtx_insn *insn; 2365 2366 int num_insns = get_max_uid (); 2367 del_info *to_delete = XCNEWVEC (del_info, num_insns); 2368 2369 FOR_ALL_BB_FN (bb, fun) 2370 FOR_BB_INSNS (bb, insn) 2371 { 2372 if (!NONDEBUG_INSN_P (insn)) 2373 continue; 2374 2375 if (insn_is_load_p (insn) && insn_is_swap_p (insn)) 2376 recombine_lvx_pattern (insn, to_delete); 2377 else if (insn_is_store_p (insn) && insn_is_swap_p (insn)) 2378 recombine_stvx_pattern (insn, to_delete); 2379 } 2380 2381 /* Turning swaps into copies is delayed until now, to avoid problems 2382 with deleting instructions during the insn walk. */ 2383 for (i = 0; i < num_insns; i++) 2384 if (to_delete[i].replace) 2385 { 2386 rtx swap_body = PATTERN (to_delete[i].replace_insn); 2387 rtx src_reg = XEXP (SET_SRC (swap_body), 0); 2388 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg); 2389 rtx_insn *new_insn = emit_insn_before (copy, 2390 to_delete[i].replace_insn); 2391 set_block_for_insn (new_insn, 2392 BLOCK_FOR_INSN (to_delete[i].replace_insn)); 2393 df_insn_rescan (new_insn); 2394 df_insn_delete (to_delete[i].replace_insn); 2395 remove_insn (to_delete[i].replace_insn); 2396 to_delete[i].replace_insn->set_deleted (); 2397 } 2398 2399 free (to_delete); 2400} 2401 2402/* Main entry point for this pass. */ 2403unsigned int 2404rs6000_analyze_swaps (function *fun) 2405{ 2406 swap_web_entry *insn_entry; 2407 basic_block bb; 2408 rtx_insn *insn, *curr_insn = 0; 2409 2410 /* Dataflow analysis for use-def chains. */ 2411 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); 2412 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); 2413 df_analyze (); 2414 df_set_flags (DF_DEFER_INSN_RESCAN); 2415 2416 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */ 2417 recombine_lvx_stvx_patterns (fun); 2418 2419 /* Rebuild ud- and du-chains. */ 2420 df_remove_problem (df_chain); 2421 df_process_deferred_rescans (); 2422 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); 2423 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); 2424 df_analyze (); 2425 df_set_flags (DF_DEFER_INSN_RESCAN); 2426 2427 /* Allocate structure to represent webs of insns. */ 2428 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); 2429 2430 /* Walk the insns to gather basic data. */ 2431 FOR_ALL_BB_FN (bb, fun) 2432 FOR_BB_INSNS_SAFE (bb, insn, curr_insn) 2433 { 2434 unsigned int uid = INSN_UID (insn); 2435 if (NONDEBUG_INSN_P (insn)) 2436 { 2437 insn_entry[uid].insn = insn; 2438 2439 if (GET_CODE (insn) == CALL_INSN) 2440 insn_entry[uid].is_call = 1; 2441 2442 /* Walk the uses and defs to see if we mention vector regs. 2443 Record any constraints on optimization of such mentions. */ 2444 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2445 df_ref mention; 2446 FOR_EACH_INSN_INFO_USE (mention, insn_info) 2447 { 2448 /* We use DF_REF_REAL_REG here to get inside any subregs. */ 2449 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); 2450 2451 /* If a use gets its value from a call insn, it will be 2452 a hard register and will look like (reg:V4SI 3 3). 2453 The df analysis creates two mentions for GPR3 and GPR4, 2454 both DImode. We must recognize this and treat it as a 2455 vector mention to ensure the call is unioned with this 2456 use. */ 2457 if (mode == DImode && DF_REF_INSN_INFO (mention)) 2458 { 2459 rtx feeder = DF_REF_INSN (mention); 2460 /* FIXME: It is pretty hard to get from the df mention 2461 to the mode of the use in the insn. We arbitrarily 2462 pick a vector mode here, even though the use might 2463 be a real DImode. We can be too conservative 2464 (create a web larger than necessary) because of 2465 this, so consider eventually fixing this. */ 2466 if (GET_CODE (feeder) == CALL_INSN) 2467 mode = V4SImode; 2468 } 2469 2470 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) 2471 { 2472 insn_entry[uid].is_relevant = 1; 2473 if (mode == TImode || mode == V1TImode 2474 || FLOAT128_VECTOR_P (mode)) 2475 insn_entry[uid].is_128_int = 1; 2476 if (DF_REF_INSN_INFO (mention)) 2477 insn_entry[uid].contains_subreg 2478 = !rtx_equal_p (DF_REF_REG (mention), 2479 DF_REF_REAL_REG (mention)); 2480 union_defs (insn_entry, insn, mention); 2481 } 2482 } 2483 FOR_EACH_INSN_INFO_DEF (mention, insn_info) 2484 { 2485 /* We use DF_REF_REAL_REG here to get inside any subregs. */ 2486 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); 2487 2488 /* If we're loading up a hard vector register for a call, 2489 it looks like (set (reg:V4SI 9 9) (...)). The df 2490 analysis creates two mentions for GPR9 and GPR10, both 2491 DImode. So relying on the mode from the mentions 2492 isn't sufficient to ensure we union the call into the 2493 web with the parameter setup code. */ 2494 if (mode == DImode && GET_CODE (insn) == SET 2495 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn)))) 2496 mode = GET_MODE (SET_DEST (insn)); 2497 2498 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) 2499 { 2500 insn_entry[uid].is_relevant = 1; 2501 if (mode == TImode || mode == V1TImode 2502 || FLOAT128_VECTOR_P (mode)) 2503 insn_entry[uid].is_128_int = 1; 2504 if (DF_REF_INSN_INFO (mention)) 2505 insn_entry[uid].contains_subreg 2506 = !rtx_equal_p (DF_REF_REG (mention), 2507 DF_REF_REAL_REG (mention)); 2508 /* REG_FUNCTION_VALUE_P is not valid for subregs. */ 2509 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention))) 2510 insn_entry[uid].is_live_out = 1; 2511 union_uses (insn_entry, insn, mention); 2512 } 2513 } 2514 2515 if (insn_entry[uid].is_relevant) 2516 { 2517 /* Determine if this is a load or store. */ 2518 insn_entry[uid].is_load = insn_is_load_p (insn); 2519 insn_entry[uid].is_store = insn_is_store_p (insn); 2520 2521 /* Determine if this is a doubleword swap. If not, 2522 determine whether it can legally be swapped. */ 2523 if (insn_is_swap_p (insn)) 2524 insn_entry[uid].is_swap = 1; 2525 else 2526 { 2527 unsigned int special = SH_NONE; 2528 insn_entry[uid].is_swappable 2529 = insn_is_swappable_p (insn_entry, insn, &special); 2530 if (special != SH_NONE && insn_entry[uid].contains_subreg) 2531 insn_entry[uid].is_swappable = 0; 2532 else if (special != SH_NONE) 2533 insn_entry[uid].special_handling = special; 2534 else if (insn_entry[uid].contains_subreg 2535 && has_part_mult (insn)) 2536 insn_entry[uid].is_swappable = 0; 2537 else if (insn_entry[uid].contains_subreg) 2538 insn_entry[uid].special_handling = SH_SUBREG; 2539 } 2540 } 2541 } 2542 } 2543 2544 if (dump_file) 2545 { 2546 fprintf (dump_file, "\nSwap insn entry table when first built\n"); 2547 dump_swap_insn_table (insn_entry); 2548 } 2549 2550 /* Record unoptimizable webs. */ 2551 unsigned e = get_max_uid (), i; 2552 for (i = 0; i < e; ++i) 2553 { 2554 if (!insn_entry[i].is_relevant) 2555 continue; 2556 2557 swap_web_entry *root 2558 = (swap_web_entry*)(&insn_entry[i])->unionfind_root (); 2559 2560 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out 2561 || (insn_entry[i].contains_subreg 2562 && insn_entry[i].special_handling != SH_SUBREG) 2563 || insn_entry[i].is_128_int || insn_entry[i].is_call 2564 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap)) 2565 root->web_not_optimizable = 1; 2566 2567 /* If we have loads or stores that aren't permuting then the 2568 optimization isn't appropriate. */ 2569 else if ((insn_entry[i].is_load || insn_entry[i].is_store) 2570 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable) 2571 root->web_not_optimizable = 1; 2572 2573 /* If we have a swap that is both fed by a permuting load 2574 and a feeder of a permuting store, then the optimization 2575 isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */ 2576 else if (insn_entry[i].is_swap && !insn_entry[i].is_load 2577 && !insn_entry[i].is_store 2578 && swap_feeds_both_load_and_store (&insn_entry[i])) 2579 root->web_not_optimizable = 1; 2580 2581 /* If we have permuting loads or stores that are not accompanied 2582 by a register swap, the optimization isn't appropriate. */ 2583 else if (insn_entry[i].is_load && insn_entry[i].is_swap) 2584 { 2585 rtx insn = insn_entry[i].insn; 2586 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2587 df_ref def; 2588 2589 FOR_EACH_INSN_INFO_DEF (def, insn_info) 2590 { 2591 struct df_link *link = DF_REF_CHAIN (def); 2592 2593 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS)) 2594 { 2595 root->web_not_optimizable = 1; 2596 break; 2597 } 2598 } 2599 } 2600 else if (insn_entry[i].is_store && insn_entry[i].is_swap) 2601 { 2602 rtx insn = insn_entry[i].insn; 2603 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2604 df_ref use; 2605 2606 FOR_EACH_INSN_INFO_USE (use, insn_info) 2607 { 2608 struct df_link *link = DF_REF_CHAIN (use); 2609 2610 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES)) 2611 { 2612 root->web_not_optimizable = 1; 2613 break; 2614 } 2615 } 2616 } 2617 } 2618 2619 if (dump_file) 2620 { 2621 fprintf (dump_file, "\nSwap insn entry table after web analysis\n"); 2622 dump_swap_insn_table (insn_entry); 2623 } 2624 2625 /* For each load and store in an optimizable web (which implies 2626 the loads and stores are permuting), find the associated 2627 register swaps and mark them for removal. Due to various 2628 optimizations we may mark the same swap more than once. Also 2629 perform special handling for swappable insns that require it. */ 2630 for (i = 0; i < e; ++i) 2631 if ((insn_entry[i].is_load || insn_entry[i].is_store) 2632 && insn_entry[i].is_swap) 2633 { 2634 swap_web_entry* root_entry 2635 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); 2636 if (!root_entry->web_not_optimizable) 2637 mark_swaps_for_removal (insn_entry, i); 2638 } 2639 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling) 2640 { 2641 swap_web_entry* root_entry 2642 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); 2643 if (!root_entry->web_not_optimizable) 2644 handle_special_swappables (insn_entry, i); 2645 } 2646 2647 /* Now delete the swaps marked for removal. */ 2648 for (i = 0; i < e; ++i) 2649 if (insn_entry[i].will_delete) 2650 replace_swap_with_copy (insn_entry, i); 2651 2652 /* Clean up. */ 2653 free (insn_entry); 2654 2655 /* Use a second pass over rtl to detect that certain vector values 2656 fetched from or stored to memory on quad-word aligned addresses 2657 can use lvx/stvx without swaps. */ 2658 2659 /* First, rebuild ud chains. */ 2660 df_remove_problem (df_chain); 2661 df_process_deferred_rescans (); 2662 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); 2663 df_chain_add_problem (DF_UD_CHAIN); 2664 df_analyze (); 2665 2666 swap_web_entry *pass2_insn_entry; 2667 pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); 2668 2669 /* Walk the insns to gather basic data. */ 2670 FOR_ALL_BB_FN (bb, fun) 2671 FOR_BB_INSNS_SAFE (bb, insn, curr_insn) 2672 { 2673 unsigned int uid = INSN_UID (insn); 2674 if (NONDEBUG_INSN_P (insn)) 2675 { 2676 pass2_insn_entry[uid].insn = insn; 2677 2678 pass2_insn_entry[uid].is_relevant = 1; 2679 pass2_insn_entry[uid].is_load = insn_is_load_p (insn); 2680 pass2_insn_entry[uid].is_store = insn_is_store_p (insn); 2681 2682 /* Determine if this is a doubleword swap. If not, 2683 determine whether it can legally be swapped. */ 2684 if (insn_is_swap_p (insn)) 2685 pass2_insn_entry[uid].is_swap = 1; 2686 } 2687 } 2688 2689 e = get_max_uid (); 2690 for (unsigned i = 0; i < e; ++i) 2691 if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load 2692 && !pass2_insn_entry[i].is_store) 2693 { 2694 /* Replace swap of aligned load-swap with aligned unswapped 2695 load. */ 2696 rtx_insn *rtx_insn = pass2_insn_entry[i].insn; 2697 if (quad_aligned_load_p (pass2_insn_entry, rtx_insn)) 2698 replace_swapped_aligned_load (pass2_insn_entry, rtx_insn); 2699 } 2700 else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store) 2701 { 2702 /* Replace aligned store-swap of swapped value with aligned 2703 unswapped store. */ 2704 rtx_insn *rtx_insn = pass2_insn_entry[i].insn; 2705 if (quad_aligned_store_p (pass2_insn_entry, rtx_insn)) 2706 replace_swapped_aligned_store (pass2_insn_entry, rtx_insn); 2707 } 2708 2709 /* Clean up. */ 2710 free (pass2_insn_entry); 2711 2712 /* Use a third pass over rtl to replace swap(load(vector constant)) 2713 with load(swapped vector constant). */ 2714 2715 /* First, rebuild ud chains. */ 2716 df_remove_problem (df_chain); 2717 df_process_deferred_rescans (); 2718 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); 2719 df_chain_add_problem (DF_UD_CHAIN); 2720 df_analyze (); 2721 2722 swap_web_entry *pass3_insn_entry; 2723 pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); 2724 2725 /* Walk the insns to gather basic data. */ 2726 FOR_ALL_BB_FN (bb, fun) 2727 FOR_BB_INSNS_SAFE (bb, insn, curr_insn) 2728 { 2729 unsigned int uid = INSN_UID (insn); 2730 if (NONDEBUG_INSN_P (insn)) 2731 { 2732 pass3_insn_entry[uid].insn = insn; 2733 2734 pass3_insn_entry[uid].is_relevant = 1; 2735 pass3_insn_entry[uid].is_load = insn_is_load_p (insn); 2736 pass3_insn_entry[uid].is_store = insn_is_store_p (insn); 2737 2738 /* Determine if this is a doubleword swap. If not, 2739 determine whether it can legally be swapped. */ 2740 if (insn_is_swap_p (insn)) 2741 pass3_insn_entry[uid].is_swap = 1; 2742 } 2743 } 2744 2745 e = get_max_uid (); 2746 for (unsigned i = 0; i < e; ++i) 2747 if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load 2748 && !pass3_insn_entry[i].is_store) 2749 { 2750 insn = pass3_insn_entry[i].insn; 2751 if (const_load_sequence_p (pass3_insn_entry, insn)) 2752 replace_swapped_load_constant (pass3_insn_entry, insn); 2753 } 2754 2755 /* Clean up. */ 2756 free (pass3_insn_entry); 2757 return 0; 2758} 2759 2760const pass_data pass_data_analyze_swaps = 2761{ 2762 RTL_PASS, /* type */ 2763 "swaps", /* name */ 2764 OPTGROUP_NONE, /* optinfo_flags */ 2765 TV_NONE, /* tv_id */ 2766 0, /* properties_required */ 2767 0, /* properties_provided */ 2768 0, /* properties_destroyed */ 2769 0, /* todo_flags_start */ 2770 TODO_df_finish, /* todo_flags_finish */ 2771}; 2772 2773class pass_analyze_swaps : public rtl_opt_pass 2774{ 2775public: 2776 pass_analyze_swaps(gcc::context *ctxt) 2777 : rtl_opt_pass(pass_data_analyze_swaps, ctxt) 2778 {} 2779 2780 /* opt_pass methods: */ 2781 virtual bool gate (function *) 2782 { 2783 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX 2784 && !TARGET_P9_VECTOR && rs6000_optimize_swaps); 2785 } 2786 2787 virtual unsigned int execute (function *fun) 2788 { 2789 return rs6000_analyze_swaps (fun); 2790 } 2791 2792 opt_pass *clone () 2793 { 2794 return new pass_analyze_swaps (m_ctxt); 2795 } 2796 2797}; // class pass_analyze_swaps 2798 2799rtl_opt_pass * 2800make_pass_analyze_swaps (gcc::context *ctxt) 2801{ 2802 return new pass_analyze_swaps (ctxt); 2803} 2804 2805