1/* Decompose multiword subregs. 2 Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. 3 Contributed by Richard Henderson <rth@redhat.com> 4 Ian Lance Taylor <iant@google.com> 5 6This file is part of GCC. 7 8GCC is free software; you can redistribute it and/or modify it under 9the terms of the GNU General Public License as published by the Free 10Software Foundation; either version 3, or (at your option) any later 11version. 12 13GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14WARRANTY; without even the implied warranty of MERCHANTABILITY or 15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16for more details. 17 18You should have received a copy of the GNU General Public License 19along with GCC; see the file COPYING3. If not see 20<http://www.gnu.org/licenses/>. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "machmode.h" 26#include "tm.h" 27#include "rtl.h" 28#include "tm_p.h" 29#include "timevar.h" 30#include "flags.h" 31#include "insn-config.h" 32#include "obstack.h" 33#include "basic-block.h" 34#include "recog.h" 35#include "bitmap.h" 36#include "expr.h" 37#include "except.h" 38#include "regs.h" 39#include "tree-pass.h" 40#include "df.h" 41 42#ifdef STACK_GROWS_DOWNWARD 43# undef STACK_GROWS_DOWNWARD 44# define STACK_GROWS_DOWNWARD 1 45#else 46# define STACK_GROWS_DOWNWARD 0 47#endif 48 49DEF_VEC_P (bitmap); 50DEF_VEC_ALLOC_P (bitmap,heap); 51 52/* Decompose multi-word pseudo-registers into individual 53 pseudo-registers when possible. This is possible when all the uses 54 of a multi-word register are via SUBREG, or are copies of the 55 register to another location. Breaking apart the register permits 56 more CSE and permits better register allocation. */ 57 58/* Bit N in this bitmap is set if regno N is used in a context in 59 which we can decompose it. */ 60static bitmap decomposable_context; 61 62/* Bit N in this bitmap is set if regno N is used in a context in 63 which it can not be decomposed. */ 64static bitmap non_decomposable_context; 65 66/* Bit N in the bitmap in element M of this array is set if there is a 67 copy from reg M to reg N. */ 68static VEC(bitmap,heap) *reg_copy_graph; 69 70/* Return whether X is a simple object which we can take a word_mode 71 subreg of. */ 72 73static bool 74simple_move_operand (rtx x) 75{ 76 if (GET_CODE (x) == SUBREG) 77 x = SUBREG_REG (x); 78 79 if (!OBJECT_P (x)) 80 return false; 81 82 if (GET_CODE (x) == LABEL_REF 83 || GET_CODE (x) == SYMBOL_REF 84 || GET_CODE (x) == HIGH 85 || GET_CODE (x) == CONST) 86 return false; 87 88 if (MEM_P (x) 89 && (MEM_VOLATILE_P (x) 90 || mode_dependent_address_p (XEXP (x, 0)))) 91 return false; 92 93 return true; 94} 95 96/* If INSN is a single set between two objects, return the single set. 97 Such an insn can always be decomposed. INSN should have been 98 passed to recog and extract_insn before this is called. */ 99 100static rtx 101simple_move (rtx insn) 102{ 103 rtx x; 104 rtx set; 105 enum machine_mode mode; 106 107 if (recog_data.n_operands != 2) 108 return NULL_RTX; 109 110 set = single_set (insn); 111 if (!set) 112 return NULL_RTX; 113 114 x = SET_DEST (set); 115 if (x != recog_data.operand[0] && x != recog_data.operand[1]) 116 return NULL_RTX; 117 if (!simple_move_operand (x)) 118 return NULL_RTX; 119 120 x = SET_SRC (set); 121 if (x != recog_data.operand[0] && x != recog_data.operand[1]) 122 return NULL_RTX; 123 /* For the src we can handle ASM_OPERANDS, and it is beneficial for 124 things like x86 rdtsc which returns a DImode value. */ 125 if (GET_CODE (x) != ASM_OPERANDS 126 && !simple_move_operand (x)) 127 return NULL_RTX; 128 129 /* We try to decompose in integer modes, to avoid generating 130 inefficient code copying between integer and floating point 131 registers. That means that we can't decompose if this is a 132 non-integer mode for which there is no integer mode of the same 133 size. */ 134 mode = GET_MODE (SET_SRC (set)); 135 if (!SCALAR_INT_MODE_P (mode) 136 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0) 137 == BLKmode)) 138 return NULL_RTX; 139 140 /* Reject PARTIAL_INT modes. They are used for processor specific 141 purposes and it's probably best not to tamper with them. */ 142 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 143 return NULL_RTX; 144 145 return set; 146} 147 148/* If SET is a copy from one multi-word pseudo-register to another, 149 record that in reg_copy_graph. Return whether it is such a 150 copy. */ 151 152static bool 153find_pseudo_copy (rtx set) 154{ 155 rtx dest = SET_DEST (set); 156 rtx src = SET_SRC (set); 157 unsigned int rd, rs; 158 bitmap b; 159 160 if (!REG_P (dest) || !REG_P (src)) 161 return false; 162 163 rd = REGNO (dest); 164 rs = REGNO (src); 165 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) 166 return false; 167 168 if (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD) 169 return false; 170 171 b = VEC_index (bitmap, reg_copy_graph, rs); 172 if (b == NULL) 173 { 174 b = BITMAP_ALLOC (NULL); 175 VEC_replace (bitmap, reg_copy_graph, rs, b); 176 } 177 178 bitmap_set_bit (b, rd); 179 180 return true; 181} 182 183/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case 184 where they are copied to another register, add the register to 185 which they are copied to DECOMPOSABLE_CONTEXT. Use 186 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track 187 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */ 188 189static void 190propagate_pseudo_copies (void) 191{ 192 bitmap queue, propagate; 193 194 queue = BITMAP_ALLOC (NULL); 195 propagate = BITMAP_ALLOC (NULL); 196 197 bitmap_copy (queue, decomposable_context); 198 do 199 { 200 bitmap_iterator iter; 201 unsigned int i; 202 203 bitmap_clear (propagate); 204 205 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) 206 { 207 bitmap b = VEC_index (bitmap, reg_copy_graph, i); 208 if (b) 209 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context); 210 } 211 212 bitmap_and_compl (queue, propagate, decomposable_context); 213 bitmap_ior_into (decomposable_context, propagate); 214 } 215 while (!bitmap_empty_p (queue)); 216 217 BITMAP_FREE (queue); 218 BITMAP_FREE (propagate); 219} 220 221/* A pointer to one of these values is passed to 222 find_decomposable_subregs via for_each_rtx. */ 223 224enum classify_move_insn 225{ 226 /* Not a simple move from one location to another. */ 227 NOT_SIMPLE_MOVE, 228 /* A simple move from one pseudo-register to another. */ 229 SIMPLE_PSEUDO_REG_MOVE, 230 /* A simple move involving a non-pseudo-register. */ 231 SIMPLE_MOVE 232}; 233 234/* This is called via for_each_rtx. If we find a SUBREG which we 235 could use to decompose a pseudo-register, set a bit in 236 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is 237 not a simple pseudo-register copy, DATA will point at the type of 238 move, and we set a bit in DECOMPOSABLE_CONTEXT or 239 NON_DECOMPOSABLE_CONTEXT as appropriate. */ 240 241static int 242find_decomposable_subregs (rtx *px, void *data) 243{ 244 enum classify_move_insn *pcmi = (enum classify_move_insn *) data; 245 rtx x = *px; 246 247 if (x == NULL_RTX) 248 return 0; 249 250 if (GET_CODE (x) == SUBREG) 251 { 252 rtx inner = SUBREG_REG (x); 253 unsigned int regno, outer_size, inner_size, outer_words, inner_words; 254 255 if (!REG_P (inner)) 256 return 0; 257 258 regno = REGNO (inner); 259 if (HARD_REGISTER_NUM_P (regno)) 260 return -1; 261 262 outer_size = GET_MODE_SIZE (GET_MODE (x)); 263 inner_size = GET_MODE_SIZE (GET_MODE (inner)); 264 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 265 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 266 267 /* We only try to decompose single word subregs of multi-word 268 registers. When we find one, we return -1 to avoid iterating 269 over the inner register. 270 271 ??? This doesn't allow, e.g., DImode subregs of TImode values 272 on 32-bit targets. We would need to record the way the 273 pseudo-register was used, and only decompose if all the uses 274 were the same number and size of pieces. Hopefully this 275 doesn't happen much. */ 276 277 if (outer_words == 1 && inner_words > 1) 278 { 279 bitmap_set_bit (decomposable_context, regno); 280 return -1; 281 } 282 283 /* If this is a cast from one mode to another, where the modes 284 have the same size, and they are not tieable, then mark this 285 register as non-decomposable. If we decompose it we are 286 likely to mess up whatever the backend is trying to do. */ 287 if (outer_words > 1 288 && outer_size == inner_size 289 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner))) 290 { 291 bitmap_set_bit (non_decomposable_context, regno); 292 return -1; 293 } 294 } 295 else if (REG_P (x)) 296 { 297 unsigned int regno; 298 299 /* We will see an outer SUBREG before we see the inner REG, so 300 when we see a plain REG here it means a direct reference to 301 the register. 302 303 If this is not a simple copy from one location to another, 304 then we can not decompose this register. If this is a simple 305 copy from one pseudo-register to another, and the mode is right 306 then we mark the register as decomposable. 307 Otherwise we don't say anything about this register -- 308 it could be decomposed, but whether that would be 309 profitable depends upon how it is used elsewhere. 310 311 We only set bits in the bitmap for multi-word 312 pseudo-registers, since those are the only ones we care about 313 and it keeps the size of the bitmaps down. */ 314 315 regno = REGNO (x); 316 if (!HARD_REGISTER_NUM_P (regno) 317 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) 318 { 319 switch (*pcmi) 320 { 321 case NOT_SIMPLE_MOVE: 322 bitmap_set_bit (non_decomposable_context, regno); 323 break; 324 case SIMPLE_PSEUDO_REG_MOVE: 325 if (MODES_TIEABLE_P (GET_MODE (x), word_mode)) 326 bitmap_set_bit (decomposable_context, regno); 327 break; 328 case SIMPLE_MOVE: 329 break; 330 default: 331 gcc_unreachable (); 332 } 333 } 334 } 335 else if (MEM_P (x)) 336 { 337 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE; 338 339 /* Any registers used in a MEM do not participate in a 340 SIMPLE_MOVE or SIMPLE_PSEUDO_REG_MOVE. Do our own recursion 341 here, and return -1 to block the parent's recursion. */ 342 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem); 343 return -1; 344 } 345 346 return 0; 347} 348 349/* Decompose REGNO into word-sized components. We smash the REG node 350 in place. This ensures that (1) something goes wrong quickly if we 351 fail to make some replacement, and (2) the debug information inside 352 the symbol table is automatically kept up to date. */ 353 354static void 355decompose_register (unsigned int regno) 356{ 357 rtx reg; 358 unsigned int words, i; 359 rtvec v; 360 361 reg = regno_reg_rtx[regno]; 362 363 regno_reg_rtx[regno] = NULL_RTX; 364 365 words = GET_MODE_SIZE (GET_MODE (reg)); 366 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 367 368 v = rtvec_alloc (words); 369 for (i = 0; i < words; ++i) 370 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD); 371 372 PUT_CODE (reg, CONCATN); 373 XVEC (reg, 0) = v; 374 375 if (dump_file) 376 { 377 fprintf (dump_file, "; Splitting reg %u ->", regno); 378 for (i = 0; i < words; ++i) 379 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i))); 380 fputc ('\n', dump_file); 381 } 382} 383 384/* Get a SUBREG of a CONCATN. */ 385 386static rtx 387simplify_subreg_concatn (enum machine_mode outermode, rtx op, 388 unsigned int byte) 389{ 390 unsigned int inner_size; 391 enum machine_mode innermode; 392 rtx part; 393 unsigned int final_offset; 394 395 gcc_assert (GET_CODE (op) == CONCATN); 396 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0); 397 398 innermode = GET_MODE (op); 399 gcc_assert (byte < GET_MODE_SIZE (innermode)); 400 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode)); 401 402 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0); 403 part = XVECEXP (op, 0, byte / inner_size); 404 final_offset = byte % inner_size; 405 if (final_offset + GET_MODE_SIZE (outermode) > inner_size) 406 return NULL_RTX; 407 408 return simplify_gen_subreg (outermode, part, GET_MODE (part), final_offset); 409} 410 411/* Wrapper around simplify_gen_subreg which handles CONCATN. */ 412 413static rtx 414simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op, 415 enum machine_mode innermode, unsigned int byte) 416{ 417 rtx ret; 418 419 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. 420 If OP is a SUBREG of a CONCATN, then it must be a simple mode 421 change with the same size and offset 0, or it must extract a 422 part. We shouldn't see anything else here. */ 423 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN) 424 { 425 rtx op2; 426 427 if ((GET_MODE_SIZE (GET_MODE (op)) 428 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))) 429 && SUBREG_BYTE (op) == 0) 430 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op), 431 GET_MODE (SUBREG_REG (op)), byte); 432 433 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), 434 SUBREG_BYTE (op)); 435 if (op2 == NULL_RTX) 436 { 437 /* We don't handle paradoxical subregs here. */ 438 gcc_assert (GET_MODE_SIZE (outermode) 439 <= GET_MODE_SIZE (GET_MODE (op))); 440 gcc_assert (GET_MODE_SIZE (GET_MODE (op)) 441 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))); 442 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), 443 byte + SUBREG_BYTE (op)); 444 gcc_assert (op2 != NULL_RTX); 445 return op2; 446 } 447 448 op = op2; 449 gcc_assert (op != NULL_RTX); 450 gcc_assert (innermode == GET_MODE (op)); 451 } 452 453 if (GET_CODE (op) == CONCATN) 454 return simplify_subreg_concatn (outermode, op, byte); 455 456 ret = simplify_gen_subreg (outermode, op, innermode, byte); 457 458 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then 459 resolve_simple_move will ask for the high part of the paradoxical 460 subreg, which does not have a value. Just return a zero. */ 461 if (ret == NULL_RTX 462 && GET_CODE (op) == SUBREG 463 && SUBREG_BYTE (op) == 0 464 && (GET_MODE_SIZE (innermode) 465 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))) 466 return CONST0_RTX (outermode); 467 468 gcc_assert (ret != NULL_RTX); 469 return ret; 470} 471 472/* Return whether we should resolve X into the registers into which it 473 was decomposed. */ 474 475static bool 476resolve_reg_p (rtx x) 477{ 478 return GET_CODE (x) == CONCATN; 479} 480 481/* Return whether X is a SUBREG of a register which we need to 482 resolve. */ 483 484static bool 485resolve_subreg_p (rtx x) 486{ 487 if (GET_CODE (x) != SUBREG) 488 return false; 489 return resolve_reg_p (SUBREG_REG (x)); 490} 491 492/* This is called via for_each_rtx. Look for SUBREGs which need to be 493 decomposed. */ 494 495static int 496resolve_subreg_use (rtx *px, void *data) 497{ 498 rtx insn = (rtx) data; 499 rtx x = *px; 500 501 if (x == NULL_RTX) 502 return 0; 503 504 if (resolve_subreg_p (x)) 505 { 506 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), 507 SUBREG_BYTE (x)); 508 509 /* It is possible for a note to contain a reference which we can 510 decompose. In this case, return 1 to the caller to indicate 511 that the note must be removed. */ 512 if (!x) 513 { 514 gcc_assert (!insn); 515 return 1; 516 } 517 518 validate_change (insn, px, x, 1); 519 return -1; 520 } 521 522 if (resolve_reg_p (x)) 523 { 524 /* Return 1 to the caller to indicate that we found a direct 525 reference to a register which is being decomposed. This can 526 happen inside notes, multiword shift or zero-extend 527 instructions. */ 528 return 1; 529 } 530 531 return 0; 532} 533 534/* This is called via for_each_rtx. Look for SUBREGs which can be 535 decomposed and decomposed REGs that need copying. */ 536 537static int 538adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED) 539{ 540 rtx x = *px; 541 542 if (x == NULL_RTX) 543 return 0; 544 545 if (resolve_subreg_p (x)) 546 { 547 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), 548 SUBREG_BYTE (x)); 549 550 if (x) 551 *px = x; 552 else 553 x = copy_rtx (*px); 554 } 555 556 if (resolve_reg_p (x)) 557 *px = copy_rtx (x); 558 559 return 0; 560} 561 562/* Resolve any decomposed registers which appear in register notes on 563 INSN. */ 564 565static void 566resolve_reg_notes (rtx insn) 567{ 568 rtx *pnote, note; 569 570 note = find_reg_equal_equiv_note (insn); 571 if (note) 572 { 573 int old_count = num_validated_changes (); 574 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL)) 575 remove_note (insn, note); 576 else 577 if (old_count != num_validated_changes ()) 578 df_notes_rescan (insn); 579 } 580 581 pnote = ®_NOTES (insn); 582 while (*pnote != NULL_RTX) 583 { 584 bool del = false; 585 586 note = *pnote; 587 switch (REG_NOTE_KIND (note)) 588 { 589 case REG_DEAD: 590 case REG_UNUSED: 591 if (resolve_reg_p (XEXP (note, 0))) 592 del = true; 593 break; 594 595 default: 596 break; 597 } 598 599 if (del) 600 *pnote = XEXP (note, 1); 601 else 602 pnote = &XEXP (note, 1); 603 } 604} 605 606/* Return whether X can be decomposed into subwords. */ 607 608static bool 609can_decompose_p (rtx x) 610{ 611 if (REG_P (x)) 612 { 613 unsigned int regno = REGNO (x); 614 615 if (HARD_REGISTER_NUM_P (regno)) 616 return (validate_subreg (word_mode, GET_MODE (x), x, UNITS_PER_WORD) 617 && HARD_REGNO_MODE_OK (regno, word_mode)); 618 else 619 return !bitmap_bit_p (non_decomposable_context, regno); 620 } 621 622 return true; 623} 624 625/* Decompose the registers used in a simple move SET within INSN. If 626 we don't change anything, return INSN, otherwise return the start 627 of the sequence of moves. */ 628 629static rtx 630resolve_simple_move (rtx set, rtx insn) 631{ 632 rtx src, dest, real_dest, insns; 633 enum machine_mode orig_mode, dest_mode; 634 unsigned int words; 635 bool pushing; 636 637 src = SET_SRC (set); 638 dest = SET_DEST (set); 639 orig_mode = GET_MODE (dest); 640 641 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 642 if (words <= 1) 643 return insn; 644 645 start_sequence (); 646 647 /* We have to handle copying from a SUBREG of a decomposed reg where 648 the SUBREG is larger than word size. Rather than assume that we 649 can take a word_mode SUBREG of the destination, we copy to a new 650 register and then copy that to the destination. */ 651 652 real_dest = NULL_RTX; 653 654 if (GET_CODE (src) == SUBREG 655 && resolve_reg_p (SUBREG_REG (src)) 656 && (SUBREG_BYTE (src) != 0 657 || (GET_MODE_SIZE (orig_mode) 658 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))) 659 { 660 real_dest = dest; 661 dest = gen_reg_rtx (orig_mode); 662 if (REG_P (real_dest)) 663 REG_ATTRS (dest) = REG_ATTRS (real_dest); 664 } 665 666 /* Similarly if we are copying to a SUBREG of a decomposed reg where 667 the SUBREG is larger than word size. */ 668 669 if (GET_CODE (dest) == SUBREG 670 && resolve_reg_p (SUBREG_REG (dest)) 671 && (SUBREG_BYTE (dest) != 0 672 || (GET_MODE_SIZE (orig_mode) 673 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) 674 { 675 rtx reg, minsn, smove; 676 677 reg = gen_reg_rtx (orig_mode); 678 minsn = emit_move_insn (reg, src); 679 smove = single_set (minsn); 680 gcc_assert (smove != NULL_RTX); 681 resolve_simple_move (smove, minsn); 682 src = reg; 683 } 684 685 /* If we didn't have any big SUBREGS of decomposed registers, and 686 neither side of the move is a register we are decomposing, then 687 we don't have to do anything here. */ 688 689 if (src == SET_SRC (set) 690 && dest == SET_DEST (set) 691 && !resolve_reg_p (src) 692 && !resolve_subreg_p (src) 693 && !resolve_reg_p (dest) 694 && !resolve_subreg_p (dest)) 695 { 696 end_sequence (); 697 return insn; 698 } 699 700 /* It's possible for the code to use a subreg of a decomposed 701 register while forming an address. We need to handle that before 702 passing the address to emit_move_insn. We pass NULL_RTX as the 703 insn parameter to resolve_subreg_use because we can not validate 704 the insn yet. */ 705 if (MEM_P (src) || MEM_P (dest)) 706 { 707 int acg; 708 709 if (MEM_P (src)) 710 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX); 711 if (MEM_P (dest)) 712 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX); 713 acg = apply_change_group (); 714 gcc_assert (acg); 715 } 716 717 /* If SRC is a register which we can't decompose, or has side 718 effects, we need to move via a temporary register. */ 719 720 if (!can_decompose_p (src) 721 || side_effects_p (src) 722 || GET_CODE (src) == ASM_OPERANDS) 723 { 724 rtx reg; 725 726 reg = gen_reg_rtx (orig_mode); 727 emit_move_insn (reg, src); 728 src = reg; 729 } 730 731 /* If DEST is a register which we can't decompose, or has side 732 effects, we need to first move to a temporary register. We 733 handle the common case of pushing an operand directly. We also 734 go through a temporary register if it holds a floating point 735 value. This gives us better code on systems which can't move 736 data easily between integer and floating point registers. */ 737 738 dest_mode = orig_mode; 739 pushing = push_operand (dest, dest_mode); 740 if (!can_decompose_p (dest) 741 || (side_effects_p (dest) && !pushing) 742 || (!SCALAR_INT_MODE_P (dest_mode) 743 && !resolve_reg_p (dest) 744 && !resolve_subreg_p (dest))) 745 { 746 if (real_dest == NULL_RTX) 747 real_dest = dest; 748 if (!SCALAR_INT_MODE_P (dest_mode)) 749 { 750 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT, 751 MODE_INT, 0); 752 gcc_assert (dest_mode != BLKmode); 753 } 754 dest = gen_reg_rtx (dest_mode); 755 if (REG_P (real_dest)) 756 REG_ATTRS (dest) = REG_ATTRS (real_dest); 757 } 758 759 if (pushing) 760 { 761 unsigned int i, j, jinc; 762 763 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0); 764 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY); 765 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY); 766 767 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD) 768 { 769 j = 0; 770 jinc = 1; 771 } 772 else 773 { 774 j = words - 1; 775 jinc = -1; 776 } 777 778 for (i = 0; i < words; ++i, j += jinc) 779 { 780 rtx temp; 781 782 temp = copy_rtx (XEXP (dest, 0)); 783 temp = adjust_automodify_address_nv (dest, word_mode, temp, 784 j * UNITS_PER_WORD); 785 emit_move_insn (temp, 786 simplify_gen_subreg_concatn (word_mode, src, 787 orig_mode, 788 j * UNITS_PER_WORD)); 789 } 790 } 791 else 792 { 793 unsigned int i; 794 795 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest))) 796 emit_clobber (dest); 797 798 for (i = 0; i < words; ++i) 799 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest, 800 dest_mode, 801 i * UNITS_PER_WORD), 802 simplify_gen_subreg_concatn (word_mode, src, 803 orig_mode, 804 i * UNITS_PER_WORD)); 805 } 806 807 if (real_dest != NULL_RTX) 808 { 809 rtx mdest, minsn, smove; 810 811 if (dest_mode == orig_mode) 812 mdest = dest; 813 else 814 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0); 815 minsn = emit_move_insn (real_dest, mdest); 816 817 smove = single_set (minsn); 818 gcc_assert (smove != NULL_RTX); 819 820 resolve_simple_move (smove, minsn); 821 } 822 823 insns = get_insns (); 824 end_sequence (); 825 826 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX); 827 828 emit_insn_before (insns, insn); 829 830 delete_insn (insn); 831 832 return insns; 833} 834 835/* Change a CLOBBER of a decomposed register into a CLOBBER of the 836 component registers. Return whether we changed something. */ 837 838static bool 839resolve_clobber (rtx pat, rtx insn) 840{ 841 rtx reg; 842 enum machine_mode orig_mode; 843 unsigned int words, i; 844 int ret; 845 846 reg = XEXP (pat, 0); 847 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg)) 848 return false; 849 850 orig_mode = GET_MODE (reg); 851 words = GET_MODE_SIZE (orig_mode); 852 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 853 854 ret = validate_change (NULL_RTX, &XEXP (pat, 0), 855 simplify_gen_subreg_concatn (word_mode, reg, 856 orig_mode, 0), 857 0); 858 df_insn_rescan (insn); 859 gcc_assert (ret != 0); 860 861 for (i = words - 1; i > 0; --i) 862 { 863 rtx x; 864 865 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode, 866 i * UNITS_PER_WORD); 867 x = gen_rtx_CLOBBER (VOIDmode, x); 868 emit_insn_after (x, insn); 869 } 870 871 resolve_reg_notes (insn); 872 873 return true; 874} 875 876/* A USE of a decomposed register is no longer meaningful. Return 877 whether we changed something. */ 878 879static bool 880resolve_use (rtx pat, rtx insn) 881{ 882 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) 883 { 884 delete_insn (insn); 885 return true; 886 } 887 888 resolve_reg_notes (insn); 889 890 return false; 891} 892 893/* A VAR_LOCATION can be simplified. */ 894 895static void 896resolve_debug (rtx insn) 897{ 898 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX); 899 900 df_insn_rescan (insn); 901 902 resolve_reg_notes (insn); 903} 904 905/* Checks if INSN is a decomposable multiword-shift or zero-extend and 906 sets the decomposable_context bitmap accordingly. A non-zero value 907 is returned if a decomposable insn has been found. */ 908 909static int 910find_decomposable_shift_zext (rtx insn) 911{ 912 rtx set; 913 rtx op; 914 rtx op_operand; 915 916 set = single_set (insn); 917 if (!set) 918 return 0; 919 920 op = SET_SRC (set); 921 if (GET_CODE (op) != ASHIFT 922 && GET_CODE (op) != LSHIFTRT 923 && GET_CODE (op) != ZERO_EXTEND) 924 return 0; 925 926 op_operand = XEXP (op, 0); 927 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) 928 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) 929 || HARD_REGISTER_NUM_P (REGNO (op_operand)) 930 || !SCALAR_INT_MODE_P (GET_MODE (op))) 931 return 0; 932 933 if (GET_CODE (op) == ZERO_EXTEND) 934 { 935 if (GET_MODE (op_operand) != word_mode 936 || GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD) 937 return 0; 938 } 939 else /* left or right shift */ 940 { 941 if (!CONST_INT_P (XEXP (op, 1)) 942 || INTVAL (XEXP (op, 1)) < BITS_PER_WORD 943 || GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD) 944 return 0; 945 } 946 947 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); 948 949 if (GET_CODE (op) != ZERO_EXTEND) 950 bitmap_set_bit (decomposable_context, REGNO (op_operand)); 951 952 return 1; 953} 954 955/* Decompose a more than word wide shift (in INSN) of a multiword 956 pseudo or a multiword zero-extend of a wordmode pseudo into a move 957 and 'set to zero' insn. Return a pointer to the new insn when a 958 replacement was done. */ 959 960static rtx 961resolve_shift_zext (rtx insn) 962{ 963 rtx set; 964 rtx op; 965 rtx op_operand; 966 rtx insns; 967 rtx src_reg, dest_reg, dest_zero; 968 int src_reg_num, dest_reg_num, offset1, offset2, src_offset; 969 970 set = single_set (insn); 971 if (!set) 972 return NULL_RTX; 973 974 op = SET_SRC (set); 975 if (GET_CODE (op) != ASHIFT 976 && GET_CODE (op) != LSHIFTRT 977 && GET_CODE (op) != ZERO_EXTEND) 978 return NULL_RTX; 979 980 op_operand = XEXP (op, 0); 981 982 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) 983 return NULL_RTX; 984 985 /* src_reg_num is the number of the word mode register which we 986 are operating on. For a left shift and a zero_extend on little 987 endian machines this is register 0. */ 988 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0; 989 990 if (WORDS_BIG_ENDIAN 991 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD) 992 src_reg_num = 1 - src_reg_num; 993 994 if (GET_CODE (op) == ZERO_EXTEND) 995 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0; 996 else 997 dest_reg_num = 1 - src_reg_num; 998 999 offset1 = UNITS_PER_WORD * dest_reg_num; 1000 offset2 = UNITS_PER_WORD * (1 - dest_reg_num); 1001 src_offset = UNITS_PER_WORD * src_reg_num; 1002 1003 if (WORDS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 1004 { 1005 offset1 += UNITS_PER_WORD - 1; 1006 offset2 += UNITS_PER_WORD - 1; 1007 src_offset += UNITS_PER_WORD - 1; 1008 } 1009 1010 start_sequence (); 1011 1012 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), 1013 GET_MODE (SET_DEST (set)), 1014 offset1); 1015 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), 1016 GET_MODE (SET_DEST (set)), 1017 offset2); 1018 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand, 1019 GET_MODE (op_operand), 1020 src_offset); 1021 if (GET_CODE (op) != ZERO_EXTEND) 1022 { 1023 int shift_count = INTVAL (XEXP (op, 1)); 1024 if (shift_count > BITS_PER_WORD) 1025 src_reg = expand_shift (GET_CODE (op) == ASHIFT ? 1026 LSHIFT_EXPR : RSHIFT_EXPR, 1027 word_mode, src_reg, 1028 build_int_cst (NULL_TREE, 1029 shift_count - BITS_PER_WORD), 1030 dest_reg, 1); 1031 } 1032 1033 if (dest_reg != src_reg) 1034 emit_move_insn (dest_reg, src_reg); 1035 emit_move_insn (dest_zero, CONST0_RTX (word_mode)); 1036 insns = get_insns (); 1037 1038 end_sequence (); 1039 1040 emit_insn_before (insns, insn); 1041 1042 if (dump_file) 1043 { 1044 rtx in; 1045 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn)); 1046 for (in = insns; in != insn; in = NEXT_INSN (in)) 1047 fprintf (dump_file, "%d ", INSN_UID (in)); 1048 fprintf (dump_file, "\n"); 1049 } 1050 1051 delete_insn (insn); 1052 return insns; 1053} 1054 1055/* Look for registers which are always accessed via word-sized SUBREGs 1056 or via copies. Decompose these registers into several word-sized 1057 pseudo-registers. */ 1058 1059static void 1060decompose_multiword_subregs (void) 1061{ 1062 unsigned int max; 1063 basic_block bb; 1064 1065 if (df) 1066 df_set_flags (DF_DEFER_INSN_RESCAN); 1067 1068 max = max_reg_num (); 1069 1070 /* First see if there are any multi-word pseudo-registers. If there 1071 aren't, there is nothing we can do. This should speed up this 1072 pass in the normal case, since it should be faster than scanning 1073 all the insns. */ 1074 { 1075 unsigned int i; 1076 1077 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) 1078 { 1079 if (regno_reg_rtx[i] != NULL 1080 && GET_MODE_SIZE (GET_MODE (regno_reg_rtx[i])) > UNITS_PER_WORD) 1081 break; 1082 } 1083 if (i == max) 1084 return; 1085 } 1086 1087 /* FIXME: When the dataflow branch is merged, we can change this 1088 code to look for each multi-word pseudo-register and to find each 1089 insn which sets or uses that register. That should be faster 1090 than scanning all the insns. */ 1091 1092 decomposable_context = BITMAP_ALLOC (NULL); 1093 non_decomposable_context = BITMAP_ALLOC (NULL); 1094 1095 reg_copy_graph = VEC_alloc (bitmap, heap, max); 1096 VEC_safe_grow (bitmap, heap, reg_copy_graph, max); 1097 memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max); 1098 1099 FOR_EACH_BB (bb) 1100 { 1101 rtx insn; 1102 1103 FOR_BB_INSNS (bb, insn) 1104 { 1105 rtx set; 1106 enum classify_move_insn cmi; 1107 int i, n; 1108 1109 if (!INSN_P (insn) 1110 || GET_CODE (PATTERN (insn)) == CLOBBER 1111 || GET_CODE (PATTERN (insn)) == USE) 1112 continue; 1113 1114 if (find_decomposable_shift_zext (insn)) 1115 continue; 1116 1117 recog_memoized (insn); 1118 extract_insn (insn); 1119 1120 set = simple_move (insn); 1121 1122 if (!set) 1123 cmi = NOT_SIMPLE_MOVE; 1124 else 1125 { 1126 if (find_pseudo_copy (set)) 1127 cmi = SIMPLE_PSEUDO_REG_MOVE; 1128 else 1129 cmi = SIMPLE_MOVE; 1130 } 1131 1132 n = recog_data.n_operands; 1133 for (i = 0; i < n; ++i) 1134 { 1135 for_each_rtx (&recog_data.operand[i], 1136 find_decomposable_subregs, 1137 &cmi); 1138 1139 /* We handle ASM_OPERANDS as a special case to support 1140 things like x86 rdtsc which returns a DImode value. 1141 We can decompose the output, which will certainly be 1142 operand 0, but not the inputs. */ 1143 1144 if (cmi == SIMPLE_MOVE 1145 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS) 1146 { 1147 gcc_assert (i == 0); 1148 cmi = NOT_SIMPLE_MOVE; 1149 } 1150 } 1151 } 1152 } 1153 1154 bitmap_and_compl_into (decomposable_context, non_decomposable_context); 1155 if (!bitmap_empty_p (decomposable_context)) 1156 { 1157 sbitmap sub_blocks; 1158 unsigned int i; 1159 sbitmap_iterator sbi; 1160 bitmap_iterator iter; 1161 unsigned int regno; 1162 1163 propagate_pseudo_copies (); 1164 1165 sub_blocks = sbitmap_alloc (last_basic_block); 1166 sbitmap_zero (sub_blocks); 1167 1168 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) 1169 decompose_register (regno); 1170 1171 FOR_EACH_BB (bb) 1172 { 1173 rtx insn; 1174 1175 FOR_BB_INSNS (bb, insn) 1176 { 1177 rtx pat; 1178 1179 if (!INSN_P (insn)) 1180 continue; 1181 1182 pat = PATTERN (insn); 1183 if (GET_CODE (pat) == CLOBBER) 1184 resolve_clobber (pat, insn); 1185 else if (GET_CODE (pat) == USE) 1186 resolve_use (pat, insn); 1187 else if (DEBUG_INSN_P (insn)) 1188 resolve_debug (insn); 1189 else 1190 { 1191 rtx set; 1192 int i; 1193 1194 recog_memoized (insn); 1195 extract_insn (insn); 1196 1197 set = simple_move (insn); 1198 if (set) 1199 { 1200 rtx orig_insn = insn; 1201 bool cfi = control_flow_insn_p (insn); 1202 1203 /* We can end up splitting loads to multi-word pseudos 1204 into separate loads to machine word size pseudos. 1205 When this happens, we first had one load that can 1206 throw, and after resolve_simple_move we'll have a 1207 bunch of loads (at least two). All those loads may 1208 trap if we can have non-call exceptions, so they 1209 all will end the current basic block. We split the 1210 block after the outer loop over all insns, but we 1211 make sure here that we will be able to split the 1212 basic block and still produce the correct control 1213 flow graph for it. */ 1214 gcc_assert (!cfi 1215 || (flag_non_call_exceptions 1216 && can_throw_internal (insn))); 1217 1218 insn = resolve_simple_move (set, insn); 1219 if (insn != orig_insn) 1220 { 1221 recog_memoized (insn); 1222 extract_insn (insn); 1223 1224 if (cfi) 1225 SET_BIT (sub_blocks, bb->index); 1226 } 1227 } 1228 else 1229 { 1230 rtx decomposed_shift; 1231 1232 decomposed_shift = resolve_shift_zext (insn); 1233 if (decomposed_shift != NULL_RTX) 1234 { 1235 insn = decomposed_shift; 1236 recog_memoized (insn); 1237 extract_insn (insn); 1238 } 1239 } 1240 1241 for (i = recog_data.n_operands - 1; i >= 0; --i) 1242 for_each_rtx (recog_data.operand_loc[i], 1243 resolve_subreg_use, 1244 insn); 1245 1246 resolve_reg_notes (insn); 1247 1248 if (num_validated_changes () > 0) 1249 { 1250 for (i = recog_data.n_dups - 1; i >= 0; --i) 1251 { 1252 rtx *pl = recog_data.dup_loc[i]; 1253 int dup_num = recog_data.dup_num[i]; 1254 rtx *px = recog_data.operand_loc[dup_num]; 1255 1256 validate_unshare_change (insn, pl, *px, 1); 1257 } 1258 1259 i = apply_change_group (); 1260 gcc_assert (i); 1261 } 1262 } 1263 } 1264 } 1265 1266 /* If we had insns to split that caused control flow insns in the middle 1267 of a basic block, split those blocks now. Note that we only handle 1268 the case where splitting a load has caused multiple possibly trapping 1269 loads to appear. */ 1270 EXECUTE_IF_SET_IN_SBITMAP (sub_blocks, 0, i, sbi) 1271 { 1272 rtx insn, end; 1273 edge fallthru; 1274 1275 bb = BASIC_BLOCK (i); 1276 insn = BB_HEAD (bb); 1277 end = BB_END (bb); 1278 1279 while (insn != end) 1280 { 1281 if (control_flow_insn_p (insn)) 1282 { 1283 /* Split the block after insn. There will be a fallthru 1284 edge, which is OK so we keep it. We have to create the 1285 exception edges ourselves. */ 1286 fallthru = split_block (bb, insn); 1287 rtl_make_eh_edge (NULL, bb, BB_END (bb)); 1288 bb = fallthru->dest; 1289 insn = BB_HEAD (bb); 1290 } 1291 else 1292 insn = NEXT_INSN (insn); 1293 } 1294 } 1295 1296 sbitmap_free (sub_blocks); 1297 } 1298 1299 { 1300 unsigned int i; 1301 bitmap b; 1302 1303 for (i = 0; VEC_iterate (bitmap, reg_copy_graph, i, b); ++i) 1304 if (b) 1305 BITMAP_FREE (b); 1306 } 1307 1308 VEC_free (bitmap, heap, reg_copy_graph); 1309 1310 BITMAP_FREE (decomposable_context); 1311 BITMAP_FREE (non_decomposable_context); 1312} 1313 1314/* Gate function for lower subreg pass. */ 1315 1316static bool 1317gate_handle_lower_subreg (void) 1318{ 1319 return flag_split_wide_types != 0; 1320} 1321 1322/* Implement first lower subreg pass. */ 1323 1324static unsigned int 1325rest_of_handle_lower_subreg (void) 1326{ 1327 decompose_multiword_subregs (); 1328 return 0; 1329} 1330 1331/* Implement second lower subreg pass. */ 1332 1333static unsigned int 1334rest_of_handle_lower_subreg2 (void) 1335{ 1336 decompose_multiword_subregs (); 1337 return 0; 1338} 1339 1340struct rtl_opt_pass pass_lower_subreg = 1341{ 1342 { 1343 RTL_PASS, 1344 "subreg1", /* name */ 1345 gate_handle_lower_subreg, /* gate */ 1346 rest_of_handle_lower_subreg, /* execute */ 1347 NULL, /* sub */ 1348 NULL, /* next */ 1349 0, /* static_pass_number */ 1350 TV_LOWER_SUBREG, /* tv_id */ 1351 0, /* properties_required */ 1352 0, /* properties_provided */ 1353 0, /* properties_destroyed */ 1354 0, /* todo_flags_start */ 1355 TODO_dump_func | 1356 TODO_ggc_collect | 1357 TODO_verify_flow /* todo_flags_finish */ 1358 } 1359}; 1360 1361struct rtl_opt_pass pass_lower_subreg2 = 1362{ 1363 { 1364 RTL_PASS, 1365 "subreg2", /* name */ 1366 gate_handle_lower_subreg, /* gate */ 1367 rest_of_handle_lower_subreg2, /* execute */ 1368 NULL, /* sub */ 1369 NULL, /* next */ 1370 0, /* static_pass_number */ 1371 TV_LOWER_SUBREG, /* tv_id */ 1372 0, /* properties_required */ 1373 0, /* properties_provided */ 1374 0, /* properties_destroyed */ 1375 0, /* todo_flags_start */ 1376 TODO_df_finish | TODO_verify_rtl_sharing | 1377 TODO_dump_func | 1378 TODO_ggc_collect | 1379 TODO_verify_flow /* todo_flags_finish */ 1380 } 1381}; 1382