1/* Tag Collision Avoidance pass for Falkor. 2 Copyright (C) 2018-2020 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 GCC is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20#define IN_TARGET_CODE 1 21 22#include "config.h" 23#define INCLUDE_LIST 24#include "system.h" 25#include "coretypes.h" 26#include "backend.h" 27#include "target.h" 28#include "rtl.h" 29#include "tree.h" 30#include "tree-pass.h" 31#include "aarch64-protos.h" 32#include "hash-map.h" 33#include "cfgloop.h" 34#include "cfgrtl.h" 35#include "rtl-iter.h" 36#include "df.h" 37#include "memmodel.h" 38#include "optabs.h" 39#include "regs.h" 40#include "recog.h" 41#include "function-abi.h" 42#include "regrename.h" 43#include "print-rtl.h" 44 45/* The Falkor hardware prefetching system uses the encoding of the registers 46 and offsets of loads to decide which of the multiple hardware prefetchers to 47 assign the load to. This has the positive effect of accelerating prefetches 48 when all related loads with uniform strides are assigned to the same 49 prefetcher unit. The down side is that because of the way the assignment 50 works, multiple unrelated loads may end up on the same prefetch unit, thus 51 causing the unit to bounce between different sets of addresses and never 52 train correctly. The point of this pass is to avoid such collisions so that 53 unrelated loads are spread out to different prefetchers. It also makes a 54 rudimentary attempt to ensure that related loads with the same tags don't 55 get moved out unnecessarily. 56 57 Perhaps a future enhancement would be to make a more concerted attempt to 58 get related loads under the same tag. See the memcpy/memset implementation 59 for falkor in glibc to understand the kind of impact this can have on 60 falkor. 61 62 The assignment of loads is based on a tag that is computed from the encoding 63 of the first destination register (the only destination in case of LDR), the 64 base register and the offset (either the register or the immediate value, as 65 encoded in the instruction). This is what the 14 bit tag looks like: 66 67 |<- 6 bits ->|<- 4b ->|<- 4b ->| 68 -------------------------------- 69 | OFFSET | SRC | DST | 70 -------------------------------- 71 72 For all cases, the SRC and DST are the 4 LSB of the encoding of the register 73 in the instruction. Offset computation is more involved and is as follows: 74 75 - For register offset addressing: 4 LSB of the offset register with the MSB 76 of the 6 bits set to 1. 77 78 - For immediate offset: 4 LSB of the encoded immediate offset. The encoding 79 depends on the width of the load and is expressed as multiples of the 80 width. 81 82 - For loads with update: 4 LSB of the offset. The encoding here is the 83 exact number by which the base is offset and incremented. 84 85 Based on the above it is clear that registers 0 and 16 will result in 86 collisions, 1 and 17 and so on. This pass detects such collisions within a 87 def/use chain of the source register in a loop and tries to resolve the 88 collision by renaming one of the destination registers. */ 89 90/* Get the destination part of the tag. */ 91#define TAG_GET_DEST(__tag) ((__tag) & 0xf) 92 93/* Get the tag with the destination part updated. */ 94#define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf)) 95 96#define MAX_PREFETCH_STRIDE 2048 97 98/* The instruction information structure. This is used to cache information 99 about the INSN that we derive when traversing through all of the insns in 100 loops. */ 101class tag_insn_info 102{ 103public: 104 rtx_insn *insn; 105 rtx dest; 106 rtx base; 107 rtx offset; 108 bool writeback; 109 bool ldp; 110 111 tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p) 112 : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p) 113 {} 114 115 /* Compute the tag based on BASE, DEST and OFFSET of the load. */ 116 unsigned tag () 117 { 118 unsigned int_offset = 0; 119 rtx offset = this->offset; 120 unsigned dest = REGNO (this->dest); 121 unsigned base = REGNO (this->base); 122 machine_mode dest_mode = GET_MODE (this->dest); 123 124 /* Falkor does not support SVE; GET_LOAD_INFO ensures that the 125 destination mode is constant here. */ 126 unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant (); 127 128 /* For loads of larger than 16 bytes, the DEST part of the tag is 0. */ 129 if ((dest_mode_size << this->ldp) > 16) 130 dest = 0; 131 132 if (offset && REG_P (offset)) 133 int_offset = (1 << 5) | REGNO (offset); 134 else if (offset && CONST_INT_P (offset)) 135 { 136 int_offset = INTVAL (offset); 137 int_offset /= dest_mode_size; 138 if (!this->writeback) 139 int_offset >>= 2; 140 } 141 return ((dest & 0xf) 142 | ((base & 0xf) << 4) 143 | ((int_offset & 0x3f) << 8)); 144 } 145}; 146 147/* Hash map to traverse and process instructions with colliding tags. */ 148typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t; 149 150/* Vector of instructions with colliding tags. */ 151typedef auto_vec <tag_insn_info *> insn_info_list_t; 152 153/* Pair of instruction information and unavailable register set to pass to 154 CHECK_COLLIDING_TAGS. */ 155typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t; 156 157 158/* Callback to free all tag_insn_info objects. */ 159bool 160free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v, 161 void *arg ATTRIBUTE_UNUSED) 162{ 163 while (v->length () > 0) 164 delete v->pop (); 165 166 return true; 167} 168 169 170/* Add all aliases of the register to the unavailable register set. REG is the 171 smallest register number that can then be used to reference its aliases. 172 UNAVAILABLE is the hard register set to add the ignored register numbers to 173 and MODE is the mode in which the registers would have been used. */ 174static void 175ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg) 176{ 177 add_to_hard_reg_set (unavailable, mode, reg); 178 add_to_hard_reg_set (unavailable, mode, reg + 16); 179 add_to_hard_reg_set (unavailable, mode, reg + 32); 180 add_to_hard_reg_set (unavailable, mode, reg + 48); 181} 182 183 184/* Callback to check which destination registers are unavailable to us for 185 renaming because of the base and offset colliding. This is a callback that 186 gets called for every name value pair (T, V) in the TAG_MAP. The ARG is an 187 std::pair of the tag_insn_info of the original insn and the hard register 188 set UNAVAILABLE that is used to record hard register numbers that cannot be 189 used for the renaming. This always returns true since we want to traverse 190 through the entire TAG_MAP. */ 191bool 192check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg) 193{ 194 HARD_REG_SET *unavailable = arg->second; 195 unsigned orig_tag = arg->first->tag (); 196 unsigned tag = INTVAL (t); 197 machine_mode mode = GET_MODE (arg->first->dest); 198 199 /* Can't collide with emptiness. */ 200 if (v.length () == 0) 201 return true; 202 203 /* Drop all aliased destination registers that result in the same 204 tag. It is not necessary to drop all of them but we do anyway 205 because it is quicker than checking ranges. */ 206 if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0)) 207 ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag)); 208 209 return true; 210} 211 212 213/* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for 214 renaming. INSN_INFO is the original insn, TAG_MAP is the map of the list of 215 insns indexed by their tags, HEAD is the def/use chain head of the 216 destination register of the original insn. The routine returns the super 217 class of register classes that may be used during the renaming. */ 218static enum reg_class 219init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head, 220 HARD_REG_SET *unavailable) 221{ 222 unsigned dest = head->regno; 223 enum reg_class super_class = NO_REGS; 224 machine_mode mode = GET_MODE (insn_info->dest); 225 226 CLEAR_HARD_REG_SET (*unavailable); 227 228 for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use) 229 { 230 if (DEBUG_INSN_P (tmp->insn)) 231 continue; 232 233 *unavailable |= ~reg_class_contents[tmp->cl]; 234 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; 235 } 236 237 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++) 238 if (fixed_regs[i] || global_regs[i]) 239 add_to_hard_reg_set (unavailable, mode, i); 240 241 arg_pair_t arg = arg_pair_t (insn_info, unavailable); 242 243 /* Exclude all registers that would lead to collisions with other loads. */ 244 tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg); 245 246 /* Finally, also ignore all aliases of the current reg. */ 247 ignore_all_aliases (unavailable, mode, dest & 0xf); 248 249 return super_class; 250} 251 252 253/* Find a suitable and available register and rename the chain of occurrences 254 of the register defined in the def/use chain headed by HEAD in which INSN 255 exists. CUR_TAG, TAGS and TAG_MAP are used to determine which registers are 256 unavailable due to a potential collision due to the rename. The routine 257 returns the register number in case of a successful rename or -1 to indicate 258 failure. */ 259static int 260rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head) 261{ 262 unsigned dest_regno = head->regno; 263 264 if (head->cannot_rename || head->renamed) 265 return -1; 266 267 HARD_REG_SET unavailable; 268 269 enum reg_class super_class = init_unavailable (insn_info, tag_map, head, 270 &unavailable); 271 272 unsigned new_regno = find_rename_reg (head, super_class, &unavailable, 273 dest_regno, false); 274 275 /* Attempt to rename as long as regrename doesn't just throw the same 276 register at us. */ 277 if (new_regno != dest_regno && regrename_do_replace (head, new_regno)) 278 { 279 if (dump_file && (dump_flags & TDF_DETAILS)) 280 fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n", 281 INSN_UID (insn_info->insn), dest_regno, new_regno); 282 283 return new_regno; 284 } 285 286 return -1; 287} 288 289 290/* Return true if REGNO is not safe to rename. */ 291static bool 292unsafe_rename_p (unsigned regno) 293{ 294 /* Avoid renaming registers used for argument passing and return value. In 295 future we could be a little less conservative and walk through the basic 296 blocks to see if there are any call or syscall sites. */ 297 if (regno <= R8_REGNUM 298 || (regno >= V0_REGNUM && regno < V8_REGNUM)) 299 return true; 300 301 /* Don't attempt to rename registers that may have specific meanings. */ 302 switch (regno) 303 { 304 case LR_REGNUM: 305 case HARD_FRAME_POINTER_REGNUM: 306 case FRAME_POINTER_REGNUM: 307 case STACK_POINTER_REGNUM: 308 return true; 309 } 310 311 return false; 312} 313 314 315/* Go through the def/use chains for the register and find the chain for this 316 insn to rename. The function returns the hard register number in case of a 317 successful rename and -1 otherwise. */ 318static int 319rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map) 320{ 321 struct du_chain *chain = NULL; 322 du_head_p head = NULL; 323 int i; 324 325 unsigned dest_regno = REGNO (insn_info->dest); 326 327 if (unsafe_rename_p (dest_regno)) 328 return -1; 329 330 /* Search the chain where this instruction is (one of) the root. */ 331 rtx_insn *insn = insn_info->insn; 332 operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info; 333 334 for (i = 0; i < dest_op_info->n_chains; i++) 335 { 336 /* The register tracked by this chain does not match the 337 destination register of insn. */ 338 if (dest_op_info->heads[i]->regno != dest_regno) 339 continue; 340 341 head = dest_op_info->heads[i]; 342 /* The chain was merged in another, find the new head. */ 343 if (!head->first) 344 head = regrename_chain_from_id (head->id); 345 346 for (chain = head->first; chain; chain = chain->next_use) 347 /* Found the insn in the chain, so try renaming the register in this 348 chain. */ 349 if (chain->insn == insn) 350 return rename_chain (insn_info, tag_map, head); 351 } 352 353 return -1; 354} 355 356 357/* Flag to track if the map has changed. */ 358static bool map_changed = false; 359 360/* The actual reallocation logic. For each vector of collisions V, try to 361 resolve the collision by attempting to rename the destination register of 362 all but one of the loads. This is a callback that is invoked for each 363 name-value pair (T, V) in TAG_MAP. The function returns true whenever it 364 returns unchanged and false otherwise to halt traversal. */ 365bool 366avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map) 367{ 368 /* We need at least two loads to cause a tag collision, return unchanged. */ 369 if (v->length () < 2) 370 return true; 371 372 tag_insn_info *vec_start = v->pop (); 373 tag_insn_info *insn_info = vec_start; 374 375 /* Try to rename at least one register to reduce the collision. If we 376 iterate all the way through, we end up dropping one of the loads from the 377 list. This is fine because we want at most one element to ensure that a 378 subsequent rename attempt does not end up worsening the collision. */ 379 do 380 { 381 int new_regno; 382 383 if ((new_regno = rename_dest (insn_info, *tag_map)) != -1) 384 { 385 rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno)); 386 387 tag_map->get_or_insert (new_tag).safe_push (insn_info); 388 df_set_regs_ever_live (new_regno, true); 389 map_changed = true; 390 return false; 391 } 392 393 v->safe_insert (0, insn_info); 394 insn_info = v->pop (); 395 } 396 while (insn_info != vec_start); 397 398 if (dump_file) 399 fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>", 400 INSN_UID (insn_info->insn)); 401 402 /* Drop the last element and move on to the next tag. */ 403 delete insn_info; 404 return true; 405} 406 407 408/* For each set of collisions, attempt to rename the registers or insert a move 409 to avoid the collision. We repeatedly traverse through TAG_MAP using 410 AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a 411 full traversal results in no change in the map. */ 412static void 413avoid_collisions (tag_map_t &tag_map) 414{ 415 do 416 { 417 map_changed = false; 418 tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map); 419 } 420 while (map_changed); 421} 422 423 424 425/* Find the use def chain in which INSN exists and then see if there is a 426 definition inside the loop and outside it. We use this as a simple 427 approximation to determine whether the base register is an IV. The basic 428 idea is to find INSN in the use-def chains for its base register and find 429 all definitions that reach it. Of all these definitions, there should be at 430 least one definition that is a simple addition of a constant value, either 431 as a binary operation or a pre or post update. 432 433 The function returns true if the base register is estimated to be an IV. */ 434static bool 435iv_p (rtx_insn *insn, rtx reg, struct loop *loop) 436{ 437 df_ref ause; 438 unsigned regno = REGNO (reg); 439 440 /* Ignore loads from the stack. */ 441 if (regno == SP_REGNUM) 442 return false; 443 444 for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause)) 445 { 446 if (!DF_REF_INSN_INFO (ause) 447 || !NONDEBUG_INSN_P (DF_REF_INSN (ause))) 448 continue; 449 450 if (insn != DF_REF_INSN (ause)) 451 continue; 452 453 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 454 df_ref def_rec; 455 456 FOR_EACH_INSN_INFO_DEF (def_rec, insn_info) 457 { 458 rtx_insn *insn = DF_REF_INSN (def_rec); 459 basic_block bb = BLOCK_FOR_INSN (insn); 460 461 if (dominated_by_p (CDI_DOMINATORS, bb, loop->header) 462 && bb->loop_father == loop) 463 { 464 if (recog_memoized (insn) < 0) 465 continue; 466 467 rtx pat = PATTERN (insn); 468 469 /* Prefetch or clobber; unlikely to be a constant stride. The 470 falkor software prefetcher tuning is pretty conservative, so 471 its presence indicates that the access pattern is probably 472 strided but most likely with an unknown stride size or a 473 stride size that is quite large. */ 474 if (GET_CODE (pat) != SET) 475 continue; 476 477 rtx x = SET_SRC (pat); 478 if (GET_CODE (x) == ZERO_EXTRACT 479 || GET_CODE (x) == ZERO_EXTEND 480 || GET_CODE (x) == SIGN_EXTEND) 481 x = XEXP (x, 0); 482 483 /* Loading the value from memory; unlikely to be a constant 484 stride. */ 485 if (MEM_P (x)) 486 continue; 487 488 /* An increment or decrement by a constant MODE_SIZE amount or 489 the result of a binary expression is likely to be an IV. */ 490 if (GET_CODE (x) == POST_INC 491 || GET_CODE (x) == POST_DEC 492 || GET_CODE (x) == PRE_INC 493 || GET_CODE (x) == PRE_DEC) 494 return true; 495 else if (BINARY_P (x) 496 && (CONST_INT_P (XEXP (x, 0)) 497 || CONST_INT_P (XEXP (x, 1)))) 498 { 499 rtx stride = (CONST_INT_P (XEXP (x, 0)) 500 ? XEXP (x, 0) : XEXP (x, 1)); 501 502 /* Don't bother with very long strides because the prefetcher 503 is unable to train on them anyway. */ 504 if (INTVAL (stride) < MAX_PREFETCH_STRIDE) 505 return true; 506 } 507 } 508 } 509 return false; 510 } 511 return false; 512} 513 514 515/* Return true if SRC is a strided load in the LOOP, false otherwise. 516 If it is a strided load, set the BASE and OFFSET. Also, if this is 517 a pre/post increment load, set PRE_POST to true. */ 518static bool 519valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post, 520 rtx *base, rtx *offset, bool load_pair) 521{ 522 subrtx_var_iterator::array_type array; 523 rtx x = NULL_RTX; 524 525 FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST) 526 if (MEM_P (*iter)) 527 { 528 x = *iter; 529 break; 530 } 531 532 if (!x) 533 return false; 534 535 struct aarch64_address_info addr; 536 machine_mode mode = GET_MODE (x); 537 538 if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true)) 539 return false; 540 541 if (addr.type != ADDRESS_REG_IMM 542 && addr.type != ADDRESS_REG_WB 543 && addr.type != ADDRESS_REG_REG 544 && addr.type != ADDRESS_REG_UXTW 545 && addr.type != ADDRESS_REG_SXTW) 546 return false; 547 548 unsigned regno = REGNO (addr.base); 549 if (global_regs[regno] || fixed_regs[regno]) 550 return false; 551 552 if (addr.type == ADDRESS_REG_WB) 553 { 554 unsigned code = GET_CODE (XEXP (x, 0)); 555 556 *pre_post = true; 557 *base = addr.base; 558 559 if (code == PRE_MODIFY || code == POST_MODIFY) 560 *offset = addr.offset; 561 else 562 { 563 /*Writeback is only supported for fixed-width modes. */ 564 unsigned int_offset = GET_MODE_SIZE (mode).to_constant (); 565 566 /* For post-incremented load pairs we would increment the base twice 567 over, so make that adjustment. */ 568 if (load_pair && (code == POST_INC || code == POST_DEC)) 569 int_offset *= 2; 570 571 *offset = GEN_INT (int_offset); 572 } 573 return true; 574 } 575 else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG) 576 { 577 /* Check if the load is strided. */ 578 if (!iv_p (insn, addr.base, loop)) 579 return false; 580 581 *base = addr.base; 582 *offset = addr.offset; 583 return true; 584 } 585 586 return false; 587} 588 589 590/* Return true if INSN is a strided load in LOOP. If it is a strided load, set 591 the DEST, BASE and OFFSET. Also, if this is a pre/post increment load, set 592 PRE_POST to true. 593 594 The routine does checks on the destination of the insn and depends on 595 STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET. */ 596static bool 597get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base, 598 rtx *offset, bool *pre_post, bool *ldp) 599{ 600 if (!INSN_P (insn) || recog_memoized (insn) < 0) 601 return false; 602 603 rtx pat = PATTERN (insn); 604 unsigned code = GET_CODE (pat); 605 bool load_pair = (code == PARALLEL); 606 607 /* For a load pair we need only the first base and destination 608 registers. We however need to ensure that our pre/post increment 609 offset is doubled; we do that in STRIDED_LOAD_P. */ 610 if (load_pair) 611 { 612 pat = XVECEXP (pat, 0, 0); 613 code = GET_CODE (pat); 614 } 615 616 if (code != SET) 617 return false; 618 619 rtx dest_rtx = SET_DEST (pat); 620 621 if (!REG_P (dest_rtx)) 622 return false; 623 624 unsigned regno = REGNO (dest_rtx); 625 machine_mode mode = GET_MODE (dest_rtx); 626 machine_mode inner_mode = GET_MODE_INNER (mode); 627 628 /* Falkor does not support SVE vectors. */ 629 if (!GET_MODE_SIZE (mode).is_constant ()) 630 return false; 631 632 /* Ignore vector struct or lane loads. */ 633 if (GET_MODE_SIZE (mode).to_constant () 634 != GET_MODE_SIZE (inner_mode).to_constant ()) 635 return false; 636 637 /* The largest width we want to bother with is a load of a pair of 638 quad-words. */ 639 if ((GET_MODE_SIZE (mode).to_constant () << load_pair) 640 > GET_MODE_SIZE (OImode)) 641 return false; 642 643 /* Ignore loads into the stack pointer because it is unlikely to be a 644 stream. */ 645 if (regno == SP_REGNUM) 646 return false; 647 648 if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset, 649 load_pair)) 650 { 651 *dest = dest_rtx; 652 *ldp = load_pair; 653 654 return true; 655 } 656 657 return false; 658} 659 660 661/* Return whether INSN and CAND are in the same def/use chain. */ 662static bool 663in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno) 664{ 665 struct du_chain *chain = NULL; 666 du_head_p head = NULL; 667 int i; 668 669 /* Search the chain where this instruction is (one of) the root. */ 670 operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info; 671 672 for (i = 0; i < op_info->n_chains; i++) 673 { 674 /* The register tracked by this chain does not match the 675 dest register of insn. */ 676 if (op_info->heads[i]->regno != regno) 677 continue; 678 679 head = op_info->heads[i]; 680 /* The chain was merged in another, find the new head. */ 681 if (!head->first) 682 head = regrename_chain_from_id (head->id); 683 684 bool found_insn = false, found_cand = false; 685 686 for (chain = head->first; chain; chain = chain->next_use) 687 { 688 rtx *loc = &SET_DEST (PATTERN (chain->insn)); 689 690 if (chain->loc != loc) 691 continue; 692 693 if (chain->insn == insn) 694 found_insn = true; 695 696 if (chain->insn == cand) 697 found_cand = true; 698 699 if (found_insn && found_cand) 700 return true; 701 } 702 } 703 704 return false; 705} 706 707 708/* Callback function to traverse the tag map and drop loads that have the same 709 destination and are in the same chain of occurrence. Routine always returns 710 true to allow traversal through all of TAG_MAP. */ 711bool 712single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v, 713 void *arg ATTRIBUTE_UNUSED) 714{ 715 for (int i = v->length () - 1; i>= 1; i--) 716 { 717 tag_insn_info *insn_info = (*v)[i]; 718 719 for (int j = v->length () - 2; j >= 0; j--) 720 { 721 /* Filter out destinations in the same chain. */ 722 if (in_same_chain (insn_info->insn, (*v)[j]->insn, 723 REGNO (insn_info->dest))) 724 { 725 v->ordered_remove (j); 726 i = v->length (); 727 break; 728 } 729 } 730 } 731 732 return true; 733} 734 735 736/* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn 737 list INSN_INFO for tag T. */ 738bool 739dump_insn_list (const rtx &t, const insn_info_list_t &insn_info, 740 void *unused ATTRIBUTE_UNUSED) 741{ 742 gcc_assert (dump_file); 743 fprintf (dump_file, "Tag 0x%lx ::\n", INTVAL (t)); 744 745 for (unsigned i = 0; i < insn_info.length (); i++) 746 dump_insn_slim (dump_file, insn_info[i]->insn); 747 748 fprintf (dump_file, "\n"); 749 750 return true; 751} 752 753 754/* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware 755 prefetcher memory tags. */ 756static void 757record_loads (tag_map_t &tag_map, struct loop *loop) 758{ 759 rtx_insn *insn; 760 basic_block *body, bb; 761 762 body = get_loop_body (loop); 763 764 for (unsigned i = 0; i < loop->num_nodes; i++) 765 { 766 bb = body[i]; 767 FOR_BB_INSNS (bb, insn) 768 { 769 rtx base = NULL_RTX; 770 rtx dest = NULL_RTX; 771 rtx offset = NULL_RTX; 772 bool writeback = false; 773 bool ldp = false; 774 775 if (!INSN_P (insn) || DEBUG_INSN_P (insn)) 776 continue; 777 778 if (get_load_info (insn, loop, &dest, &base, &offset, &writeback, 779 &ldp)) 780 { 781 tag_insn_info *i = new tag_insn_info (insn, dest, base, offset, 782 writeback, ldp); 783 rtx tag = GEN_INT (i->tag ()); 784 tag_map.get_or_insert (tag).safe_push (i); 785 } 786 } 787 } 788 789 if (dump_file) 790 { 791 fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num); 792 tag_map.traverse <void *, dump_insn_list> (NULL); 793 } 794 795 /* Try to reduce the dataset before launching into the rename attempt. Drop 796 destinations in the same collision chain that appear in the same def/use 797 chain, all as defs. These chains will move together in a rename so 798 there's no point in keeping both in there. */ 799 tag_map.traverse <void *, single_dest_per_chain> (NULL); 800} 801 802 803/* Tag collision avoidance pass for Falkor. The pass runs in two phases for 804 each loop; the first phase collects all loads that we consider as 805 interesting for renaming into a tag-indexed map of lists. The second phase 806 renames the destination register of the loads in an attempt to spread out 807 the loads into different tags. */ 808void 809execute_tag_collision_avoidance () 810{ 811 struct loop *loop; 812 813 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); 814 df_chain_add_problem (DF_UD_CHAIN); 815 df_compute_regs_ever_live (true); 816 df_note_add_problem (); 817 df_analyze (); 818 df_set_flags (DF_DEFER_INSN_RESCAN); 819 820 regrename_init (true); 821 regrename_analyze (NULL); 822 823 compute_bb_for_insn (); 824 calculate_dominance_info (CDI_DOMINATORS); 825 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 826 827 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 828 { 829 tag_map_t tag_map (512); 830 831 record_loads (tag_map, loop); 832 avoid_collisions (tag_map); 833 if (dump_file) 834 { 835 fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num); 836 tag_map.traverse <void *, dump_insn_list> (NULL); 837 } 838 tag_map.traverse <void *, free_insn_info> (NULL); 839 } 840 841 loop_optimizer_finalize (); 842 free_dominance_info (CDI_DOMINATORS); 843 regrename_finish (); 844} 845 846 847const pass_data pass_data_tag_collision_avoidance = 848{ 849 RTL_PASS, /* type */ 850 "tag_collision_avoidance", /* name */ 851 OPTGROUP_NONE, /* optinfo_flags */ 852 TV_NONE, /* tv_id */ 853 0, /* properties_required */ 854 0, /* properties_provided */ 855 0, /* properties_destroyed */ 856 0, /* todo_flags_start */ 857 TODO_df_finish, /* todo_flags_finish */ 858}; 859 860 861class pass_tag_collision_avoidance : public rtl_opt_pass 862{ 863public: 864 pass_tag_collision_avoidance (gcc::context *ctxt) 865 : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt) 866 {} 867 868 /* opt_pass methods: */ 869 virtual bool gate (function *) 870 { 871 return ((aarch64_tune_params.extra_tuning_flags 872 & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS) 873 && optimize >= 2); 874 } 875 876 virtual unsigned int execute (function *) 877 { 878 execute_tag_collision_avoidance (); 879 return 0; 880 } 881 882}; // class pass_tag_collision_avoidance 883 884 885/* Create a new pass instance. */ 886rtl_opt_pass * 887make_pass_tag_collision_avoidance (gcc::context *ctxt) 888{ 889 return new pass_tag_collision_avoidance (ctxt); 890} 891