1/* Copyright (C) 2006-2015 Free Software Foundation, Inc. 2 3 This file is free software; you can redistribute it and/or modify it under 4 the terms of the GNU General Public License as published by the Free 5 Software Foundation; either version 3 of the License, or (at your option) 6 any later version. 7 8 This file is distributed in the hope that it will be useful, but WITHOUT 9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with GCC; see the file COPYING3. If not see 15 <http://www.gnu.org/licenses/>. */ 16 17#include "config.h" 18#include "system.h" 19#include "coretypes.h" 20#include "tm.h" 21#include "rtl.h" 22#include "regs.h" 23#include "hard-reg-set.h" 24#include "insn-config.h" 25#include "conditions.h" 26#include "insn-attr.h" 27#include "flags.h" 28#include "recog.h" 29#include "obstack.h" 30#include "hash-set.h" 31#include "machmode.h" 32#include "vec.h" 33#include "double-int.h" 34#include "input.h" 35#include "alias.h" 36#include "symtab.h" 37#include "wide-int.h" 38#include "inchash.h" 39#include "tree.h" 40#include "fold-const.h" 41#include "stringpool.h" 42#include "stor-layout.h" 43#include "calls.h" 44#include "varasm.h" 45#include "hashtab.h" 46#include "function.h" 47#include "statistics.h" 48#include "real.h" 49#include "fixed-value.h" 50#include "expmed.h" 51#include "dojump.h" 52#include "explow.h" 53#include "emit-rtl.h" 54#include "stmt.h" 55#include "expr.h" 56#include "insn-codes.h" 57#include "optabs.h" 58#include "except.h" 59#include "output.h" 60#include "predict.h" 61#include "dominance.h" 62#include "cfg.h" 63#include "cfgrtl.h" 64#include "cfganal.h" 65#include "lcm.h" 66#include "cfgbuild.h" 67#include "cfgcleanup.h" 68#include "basic-block.h" 69#include "diagnostic-core.h" 70#include "ggc.h" 71#include "tm_p.h" 72#include "target.h" 73#include "target-def.h" 74#include "langhooks.h" 75#include "reload.h" 76#include "sched-int.h" 77#include "params.h" 78#include "hash-table.h" 79#include "tree-ssa-alias.h" 80#include "internal-fn.h" 81#include "gimple-fold.h" 82#include "tree-eh.h" 83#include "gimple-expr.h" 84#include "is-a.h" 85#include "gimple.h" 86#include "gimplify.h" 87#include "tm-constrs.h" 88#include "sbitmap.h" 89#include "df.h" 90#include "ddg.h" 91#include "timevar.h" 92#include "dumpfile.h" 93#include "cfgloop.h" 94#include "builtins.h" 95#include "rtl-iter.h" 96 97/* Builtin types, data and prototypes. */ 98 99enum spu_builtin_type_index 100{ 101 SPU_BTI_END_OF_PARAMS, 102 103 /* We create new type nodes for these. */ 104 SPU_BTI_V16QI, 105 SPU_BTI_V8HI, 106 SPU_BTI_V4SI, 107 SPU_BTI_V2DI, 108 SPU_BTI_V4SF, 109 SPU_BTI_V2DF, 110 SPU_BTI_UV16QI, 111 SPU_BTI_UV8HI, 112 SPU_BTI_UV4SI, 113 SPU_BTI_UV2DI, 114 115 /* A 16-byte type. (Implemented with V16QI_type_node) */ 116 SPU_BTI_QUADWORD, 117 118 /* These all correspond to intSI_type_node */ 119 SPU_BTI_7, 120 SPU_BTI_S7, 121 SPU_BTI_U7, 122 SPU_BTI_S10, 123 SPU_BTI_S10_4, 124 SPU_BTI_U14, 125 SPU_BTI_16, 126 SPU_BTI_S16, 127 SPU_BTI_S16_2, 128 SPU_BTI_U16, 129 SPU_BTI_U16_2, 130 SPU_BTI_U18, 131 132 /* These correspond to the standard types */ 133 SPU_BTI_INTQI, 134 SPU_BTI_INTHI, 135 SPU_BTI_INTSI, 136 SPU_BTI_INTDI, 137 138 SPU_BTI_UINTQI, 139 SPU_BTI_UINTHI, 140 SPU_BTI_UINTSI, 141 SPU_BTI_UINTDI, 142 143 SPU_BTI_FLOAT, 144 SPU_BTI_DOUBLE, 145 146 SPU_BTI_VOID, 147 SPU_BTI_PTR, 148 149 SPU_BTI_MAX 150}; 151 152#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI]) 153#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI]) 154#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI]) 155#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI]) 156#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF]) 157#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF]) 158#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI]) 159#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI]) 160#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI]) 161#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI]) 162 163static GTY(()) tree spu_builtin_types[SPU_BTI_MAX]; 164 165struct spu_builtin_range 166{ 167 int low, high; 168}; 169 170static struct spu_builtin_range spu_builtin_range[] = { 171 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */ 172 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */ 173 {0ll, 0x7fll}, /* SPU_BTI_U7 */ 174 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */ 175 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */ 176 {0ll, 0x3fffll}, /* SPU_BTI_U14 */ 177 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */ 178 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */ 179 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */ 180 {0ll, 0xffffll}, /* SPU_BTI_U16 */ 181 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */ 182 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */ 183}; 184 185 186/* Target specific attribute specifications. */ 187char regs_ever_allocated[FIRST_PSEUDO_REGISTER]; 188 189/* Prototypes and external defs. */ 190static int get_pipe (rtx_insn *insn); 191static int spu_naked_function_p (tree func); 192static int mem_is_padded_component_ref (rtx x); 193static void fix_range (const char *); 194static rtx spu_expand_load (rtx, rtx, rtx, int); 195 196/* Which instruction set architecture to use. */ 197int spu_arch; 198/* Which cpu are we tuning for. */ 199int spu_tune; 200 201/* The hardware requires 8 insns between a hint and the branch it 202 effects. This variable describes how many rtl instructions the 203 compiler needs to see before inserting a hint, and then the compiler 204 will insert enough nops to make it at least 8 insns. The default is 205 for the compiler to allow up to 2 nops be emitted. The nops are 206 inserted in pairs, so we round down. */ 207int spu_hint_dist = (8*4) - (2*4); 208 209enum spu_immediate { 210 SPU_NONE, 211 SPU_IL, 212 SPU_ILA, 213 SPU_ILH, 214 SPU_ILHU, 215 SPU_ORI, 216 SPU_ORHI, 217 SPU_ORBI, 218 SPU_IOHL 219}; 220enum immediate_class 221{ 222 IC_POOL, /* constant pool */ 223 IC_IL1, /* one il* instruction */ 224 IC_IL2, /* both ilhu and iohl instructions */ 225 IC_IL1s, /* one il* instruction */ 226 IC_IL2s, /* both ilhu and iohl instructions */ 227 IC_FSMBI, /* the fsmbi instruction */ 228 IC_CPAT, /* one of the c*d instructions */ 229 IC_FSMBI2 /* fsmbi plus 1 other instruction */ 230}; 231 232static enum spu_immediate which_immediate_load (HOST_WIDE_INT val); 233static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val); 234static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart); 235static enum immediate_class classify_immediate (rtx op, 236 machine_mode mode); 237 238/* Pointer mode for __ea references. */ 239#define EAmode (spu_ea_model != 32 ? DImode : SImode) 240 241 242/* Define the structure for the machine field in struct function. */ 243struct GTY(()) machine_function 244{ 245 /* Register to use for PIC accesses. */ 246 rtx pic_reg; 247}; 248 249/* How to allocate a 'struct machine_function'. */ 250static struct machine_function * 251spu_init_machine_status (void) 252{ 253 return ggc_cleared_alloc<machine_function> (); 254} 255 256/* Implement TARGET_OPTION_OVERRIDE. */ 257static void 258spu_option_override (void) 259{ 260 /* Set up function hooks. */ 261 init_machine_status = spu_init_machine_status; 262 263 /* Small loops will be unpeeled at -O3. For SPU it is more important 264 to keep code small by default. */ 265 if (!flag_unroll_loops && !flag_peel_loops) 266 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4, 267 global_options.x_param_values, 268 global_options_set.x_param_values); 269 270 flag_omit_frame_pointer = 1; 271 272 /* Functions must be 8 byte aligned so we correctly handle dual issue */ 273 if (align_functions < 8) 274 align_functions = 8; 275 276 spu_hint_dist = 8*4 - spu_max_nops*4; 277 if (spu_hint_dist < 0) 278 spu_hint_dist = 0; 279 280 if (spu_fixed_range_string) 281 fix_range (spu_fixed_range_string); 282 283 /* Determine processor architectural level. */ 284 if (spu_arch_string) 285 { 286 if (strcmp (&spu_arch_string[0], "cell") == 0) 287 spu_arch = PROCESSOR_CELL; 288 else if (strcmp (&spu_arch_string[0], "celledp") == 0) 289 spu_arch = PROCESSOR_CELLEDP; 290 else 291 error ("bad value (%s) for -march= switch", spu_arch_string); 292 } 293 294 /* Determine processor to tune for. */ 295 if (spu_tune_string) 296 { 297 if (strcmp (&spu_tune_string[0], "cell") == 0) 298 spu_tune = PROCESSOR_CELL; 299 else if (strcmp (&spu_tune_string[0], "celledp") == 0) 300 spu_tune = PROCESSOR_CELLEDP; 301 else 302 error ("bad value (%s) for -mtune= switch", spu_tune_string); 303 } 304 305 /* Change defaults according to the processor architecture. */ 306 if (spu_arch == PROCESSOR_CELLEDP) 307 { 308 /* If no command line option has been otherwise specified, change 309 the default to -mno-safe-hints on celledp -- only the original 310 Cell/B.E. processors require this workaround. */ 311 if (!(target_flags_explicit & MASK_SAFE_HINTS)) 312 target_flags &= ~MASK_SAFE_HINTS; 313 } 314 315 REAL_MODE_FORMAT (SFmode) = &spu_single_format; 316} 317 318/* Handle an attribute requiring a FUNCTION_DECL; arguments as in 319 struct attribute_spec.handler. */ 320 321/* True if MODE is valid for the target. By "valid", we mean able to 322 be manipulated in non-trivial ways. In particular, this means all 323 the arithmetic is supported. */ 324static bool 325spu_scalar_mode_supported_p (machine_mode mode) 326{ 327 switch (mode) 328 { 329 case QImode: 330 case HImode: 331 case SImode: 332 case SFmode: 333 case DImode: 334 case TImode: 335 case DFmode: 336 return true; 337 338 default: 339 return false; 340 } 341} 342 343/* Similarly for vector modes. "Supported" here is less strict. At 344 least some operations are supported; need to check optabs or builtins 345 for further details. */ 346static bool 347spu_vector_mode_supported_p (machine_mode mode) 348{ 349 switch (mode) 350 { 351 case V16QImode: 352 case V8HImode: 353 case V4SImode: 354 case V2DImode: 355 case V4SFmode: 356 case V2DFmode: 357 return true; 358 359 default: 360 return false; 361 } 362} 363 364/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the 365 least significant bytes of the outer mode. This function returns 366 TRUE for the SUBREG's where this is correct. */ 367int 368valid_subreg (rtx op) 369{ 370 machine_mode om = GET_MODE (op); 371 machine_mode im = GET_MODE (SUBREG_REG (op)); 372 return om != VOIDmode && im != VOIDmode 373 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om) 374 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4) 375 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16)); 376} 377 378/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off 379 and adjust the start offset. */ 380static rtx 381adjust_operand (rtx op, HOST_WIDE_INT * start) 382{ 383 machine_mode mode; 384 int op_size; 385 /* Strip any paradoxical SUBREG. */ 386 if (GET_CODE (op) == SUBREG 387 && (GET_MODE_BITSIZE (GET_MODE (op)) 388 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op))))) 389 { 390 if (start) 391 *start -= 392 GET_MODE_BITSIZE (GET_MODE (op)) - 393 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op))); 394 op = SUBREG_REG (op); 395 } 396 /* If it is smaller than SI, assure a SUBREG */ 397 op_size = GET_MODE_BITSIZE (GET_MODE (op)); 398 if (op_size < 32) 399 { 400 if (start) 401 *start += 32 - op_size; 402 op_size = 32; 403 } 404 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */ 405 mode = mode_for_size (op_size, MODE_INT, 0); 406 if (mode != GET_MODE (op)) 407 op = gen_rtx_SUBREG (mode, op, 0); 408 return op; 409} 410 411void 412spu_expand_extv (rtx ops[], int unsignedp) 413{ 414 rtx dst = ops[0], src = ops[1]; 415 HOST_WIDE_INT width = INTVAL (ops[2]); 416 HOST_WIDE_INT start = INTVAL (ops[3]); 417 HOST_WIDE_INT align_mask; 418 rtx s0, s1, mask, r0; 419 420 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode); 421 422 if (MEM_P (src)) 423 { 424 /* First, determine if we need 1 TImode load or 2. We need only 1 425 if the bits being extracted do not cross the alignment boundary 426 as determined by the MEM and its address. */ 427 428 align_mask = -MEM_ALIGN (src); 429 if ((start & align_mask) == ((start + width - 1) & align_mask)) 430 { 431 /* Alignment is sufficient for 1 load. */ 432 s0 = gen_reg_rtx (TImode); 433 r0 = spu_expand_load (s0, 0, src, start / 8); 434 start &= 7; 435 if (r0) 436 emit_insn (gen_rotqby_ti (s0, s0, r0)); 437 } 438 else 439 { 440 /* Need 2 loads. */ 441 s0 = gen_reg_rtx (TImode); 442 s1 = gen_reg_rtx (TImode); 443 r0 = spu_expand_load (s0, s1, src, start / 8); 444 start &= 7; 445 446 gcc_assert (start + width <= 128); 447 if (r0) 448 { 449 rtx r1 = gen_reg_rtx (SImode); 450 mask = gen_reg_rtx (TImode); 451 emit_move_insn (mask, GEN_INT (-1)); 452 emit_insn (gen_rotqby_ti (s0, s0, r0)); 453 emit_insn (gen_rotqby_ti (s1, s1, r0)); 454 if (GET_CODE (r0) == CONST_INT) 455 r1 = GEN_INT (INTVAL (r0) & 15); 456 else 457 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15))); 458 emit_insn (gen_shlqby_ti (mask, mask, r1)); 459 emit_insn (gen_selb (s0, s1, s0, mask)); 460 } 461 } 462 463 } 464 else if (GET_CODE (src) == SUBREG) 465 { 466 rtx r = SUBREG_REG (src); 467 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r))); 468 s0 = gen_reg_rtx (TImode); 469 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode)) 470 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r))); 471 else 472 emit_move_insn (s0, src); 473 } 474 else 475 { 476 gcc_assert (REG_P (src) && GET_MODE (src) == TImode); 477 s0 = gen_reg_rtx (TImode); 478 emit_move_insn (s0, src); 479 } 480 481 /* Now s0 is TImode and contains the bits to extract at start. */ 482 483 if (start) 484 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start))); 485 486 if (128 - width) 487 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp); 488 489 emit_move_insn (dst, s0); 490} 491 492void 493spu_expand_insv (rtx ops[]) 494{ 495 HOST_WIDE_INT width = INTVAL (ops[1]); 496 HOST_WIDE_INT start = INTVAL (ops[2]); 497 HOST_WIDE_INT maskbits; 498 machine_mode dst_mode; 499 rtx dst = ops[0], src = ops[3]; 500 int dst_size; 501 rtx mask; 502 rtx shift_reg; 503 int shift; 504 505 506 if (GET_CODE (ops[0]) == MEM) 507 dst = gen_reg_rtx (TImode); 508 else 509 dst = adjust_operand (dst, &start); 510 dst_mode = GET_MODE (dst); 511 dst_size = GET_MODE_BITSIZE (GET_MODE (dst)); 512 513 if (CONSTANT_P (src)) 514 { 515 machine_mode m = 516 (width <= 32 ? SImode : width <= 64 ? DImode : TImode); 517 src = force_reg (m, convert_to_mode (m, src, 0)); 518 } 519 src = adjust_operand (src, 0); 520 521 mask = gen_reg_rtx (dst_mode); 522 shift_reg = gen_reg_rtx (dst_mode); 523 shift = dst_size - start - width; 524 525 /* It's not safe to use subreg here because the compiler assumes 526 that the SUBREG_REG is right justified in the SUBREG. */ 527 convert_move (shift_reg, src, 1); 528 529 if (shift > 0) 530 { 531 switch (dst_mode) 532 { 533 case SImode: 534 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift))); 535 break; 536 case DImode: 537 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift))); 538 break; 539 case TImode: 540 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift))); 541 break; 542 default: 543 abort (); 544 } 545 } 546 else if (shift < 0) 547 abort (); 548 549 switch (dst_size) 550 { 551 case 32: 552 maskbits = (-1ll << (32 - width - start)); 553 if (start) 554 maskbits += (1ll << (32 - start)); 555 emit_move_insn (mask, GEN_INT (maskbits)); 556 break; 557 case 64: 558 maskbits = (-1ll << (64 - width - start)); 559 if (start) 560 maskbits += (1ll << (64 - start)); 561 emit_move_insn (mask, GEN_INT (maskbits)); 562 break; 563 case 128: 564 { 565 unsigned char arr[16]; 566 int i = start / 8; 567 memset (arr, 0, sizeof (arr)); 568 arr[i] = 0xff >> (start & 7); 569 for (i++; i <= (start + width - 1) / 8; i++) 570 arr[i] = 0xff; 571 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7)); 572 emit_move_insn (mask, array_to_constant (TImode, arr)); 573 } 574 break; 575 default: 576 abort (); 577 } 578 if (GET_CODE (ops[0]) == MEM) 579 { 580 rtx low = gen_reg_rtx (SImode); 581 rtx rotl = gen_reg_rtx (SImode); 582 rtx mask0 = gen_reg_rtx (TImode); 583 rtx addr; 584 rtx addr0; 585 rtx addr1; 586 rtx mem; 587 588 addr = force_reg (Pmode, XEXP (ops[0], 0)); 589 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16)); 590 emit_insn (gen_andsi3 (low, addr, GEN_INT (15))); 591 emit_insn (gen_negsi2 (rotl, low)); 592 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl)); 593 emit_insn (gen_rotqmby_ti (mask0, mask, rotl)); 594 mem = change_address (ops[0], TImode, addr0); 595 set_mem_alias_set (mem, 0); 596 emit_move_insn (dst, mem); 597 emit_insn (gen_selb (dst, dst, shift_reg, mask0)); 598 if (start + width > MEM_ALIGN (ops[0])) 599 { 600 rtx shl = gen_reg_rtx (SImode); 601 rtx mask1 = gen_reg_rtx (TImode); 602 rtx dst1 = gen_reg_rtx (TImode); 603 rtx mem1; 604 addr1 = plus_constant (Pmode, addr, 16); 605 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16)); 606 emit_insn (gen_subsi3 (shl, GEN_INT (16), low)); 607 emit_insn (gen_shlqby_ti (mask1, mask, shl)); 608 mem1 = change_address (ops[0], TImode, addr1); 609 set_mem_alias_set (mem1, 0); 610 emit_move_insn (dst1, mem1); 611 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1)); 612 emit_move_insn (mem1, dst1); 613 } 614 emit_move_insn (mem, dst); 615 } 616 else 617 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask)); 618} 619 620 621int 622spu_expand_block_move (rtx ops[]) 623{ 624 HOST_WIDE_INT bytes, align, offset; 625 rtx src, dst, sreg, dreg, target; 626 int i; 627 if (GET_CODE (ops[2]) != CONST_INT 628 || GET_CODE (ops[3]) != CONST_INT 629 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8)) 630 return 0; 631 632 bytes = INTVAL (ops[2]); 633 align = INTVAL (ops[3]); 634 635 if (bytes <= 0) 636 return 1; 637 638 dst = ops[0]; 639 src = ops[1]; 640 641 if (align == 16) 642 { 643 for (offset = 0; offset + 16 <= bytes; offset += 16) 644 { 645 dst = adjust_address (ops[0], V16QImode, offset); 646 src = adjust_address (ops[1], V16QImode, offset); 647 emit_move_insn (dst, src); 648 } 649 if (offset < bytes) 650 { 651 rtx mask; 652 unsigned char arr[16] = { 0 }; 653 for (i = 0; i < bytes - offset; i++) 654 arr[i] = 0xff; 655 dst = adjust_address (ops[0], V16QImode, offset); 656 src = adjust_address (ops[1], V16QImode, offset); 657 mask = gen_reg_rtx (V16QImode); 658 sreg = gen_reg_rtx (V16QImode); 659 dreg = gen_reg_rtx (V16QImode); 660 target = gen_reg_rtx (V16QImode); 661 emit_move_insn (mask, array_to_constant (V16QImode, arr)); 662 emit_move_insn (dreg, dst); 663 emit_move_insn (sreg, src); 664 emit_insn (gen_selb (target, dreg, sreg, mask)); 665 emit_move_insn (dst, target); 666 } 667 return 1; 668 } 669 return 0; 670} 671 672enum spu_comp_code 673{ SPU_EQ, SPU_GT, SPU_GTU }; 674 675int spu_comp_icode[12][3] = { 676 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi}, 677 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi}, 678 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si}, 679 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di}, 680 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti}, 681 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0}, 682 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0}, 683 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi}, 684 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi}, 685 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si}, 686 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0}, 687 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0}, 688}; 689 690/* Generate a compare for CODE. Return a brand-new rtx that represents 691 the result of the compare. GCC can figure this out too if we don't 692 provide all variations of compares, but GCC always wants to use 693 WORD_MODE, we can generate better code in most cases if we do it 694 ourselves. */ 695void 696spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[]) 697{ 698 int reverse_compare = 0; 699 int reverse_test = 0; 700 rtx compare_result, eq_result; 701 rtx comp_rtx, eq_rtx; 702 machine_mode comp_mode; 703 machine_mode op_mode; 704 enum spu_comp_code scode, eq_code; 705 enum insn_code ior_code; 706 enum rtx_code code = GET_CODE (cmp); 707 rtx op0 = XEXP (cmp, 0); 708 rtx op1 = XEXP (cmp, 1); 709 int index; 710 int eq_test = 0; 711 712 /* When op1 is a CONST_INT change (X >= C) to (X > C-1), 713 and so on, to keep the constant in operand 1. */ 714 if (GET_CODE (op1) == CONST_INT) 715 { 716 HOST_WIDE_INT val = INTVAL (op1) - 1; 717 if (trunc_int_for_mode (val, GET_MODE (op0)) == val) 718 switch (code) 719 { 720 case GE: 721 op1 = GEN_INT (val); 722 code = GT; 723 break; 724 case LT: 725 op1 = GEN_INT (val); 726 code = LE; 727 break; 728 case GEU: 729 op1 = GEN_INT (val); 730 code = GTU; 731 break; 732 case LTU: 733 op1 = GEN_INT (val); 734 code = LEU; 735 break; 736 default: 737 break; 738 } 739 } 740 741 /* However, if we generate an integer result, performing a reverse test 742 would require an extra negation, so avoid that where possible. */ 743 if (GET_CODE (op1) == CONST_INT && is_set == 1) 744 { 745 HOST_WIDE_INT val = INTVAL (op1) + 1; 746 if (trunc_int_for_mode (val, GET_MODE (op0)) == val) 747 switch (code) 748 { 749 case LE: 750 op1 = GEN_INT (val); 751 code = LT; 752 break; 753 case LEU: 754 op1 = GEN_INT (val); 755 code = LTU; 756 break; 757 default: 758 break; 759 } 760 } 761 762 comp_mode = SImode; 763 op_mode = GET_MODE (op0); 764 765 switch (code) 766 { 767 case GE: 768 scode = SPU_GT; 769 if (HONOR_NANS (op_mode)) 770 { 771 reverse_compare = 0; 772 reverse_test = 0; 773 eq_test = 1; 774 eq_code = SPU_EQ; 775 } 776 else 777 { 778 reverse_compare = 1; 779 reverse_test = 1; 780 } 781 break; 782 case LE: 783 scode = SPU_GT; 784 if (HONOR_NANS (op_mode)) 785 { 786 reverse_compare = 1; 787 reverse_test = 0; 788 eq_test = 1; 789 eq_code = SPU_EQ; 790 } 791 else 792 { 793 reverse_compare = 0; 794 reverse_test = 1; 795 } 796 break; 797 case LT: 798 reverse_compare = 1; 799 reverse_test = 0; 800 scode = SPU_GT; 801 break; 802 case GEU: 803 reverse_compare = 1; 804 reverse_test = 1; 805 scode = SPU_GTU; 806 break; 807 case LEU: 808 reverse_compare = 0; 809 reverse_test = 1; 810 scode = SPU_GTU; 811 break; 812 case LTU: 813 reverse_compare = 1; 814 reverse_test = 0; 815 scode = SPU_GTU; 816 break; 817 case NE: 818 reverse_compare = 0; 819 reverse_test = 1; 820 scode = SPU_EQ; 821 break; 822 823 case EQ: 824 scode = SPU_EQ; 825 break; 826 case GT: 827 scode = SPU_GT; 828 break; 829 case GTU: 830 scode = SPU_GTU; 831 break; 832 default: 833 scode = SPU_EQ; 834 break; 835 } 836 837 switch (op_mode) 838 { 839 case QImode: 840 index = 0; 841 comp_mode = QImode; 842 break; 843 case HImode: 844 index = 1; 845 comp_mode = HImode; 846 break; 847 case SImode: 848 index = 2; 849 break; 850 case DImode: 851 index = 3; 852 break; 853 case TImode: 854 index = 4; 855 break; 856 case SFmode: 857 index = 5; 858 break; 859 case DFmode: 860 index = 6; 861 break; 862 case V16QImode: 863 index = 7; 864 comp_mode = op_mode; 865 break; 866 case V8HImode: 867 index = 8; 868 comp_mode = op_mode; 869 break; 870 case V4SImode: 871 index = 9; 872 comp_mode = op_mode; 873 break; 874 case V4SFmode: 875 index = 10; 876 comp_mode = V4SImode; 877 break; 878 case V2DFmode: 879 index = 11; 880 comp_mode = V2DImode; 881 break; 882 case V2DImode: 883 default: 884 abort (); 885 } 886 887 if (GET_MODE (op1) == DFmode 888 && (scode != SPU_GT && scode != SPU_EQ)) 889 abort (); 890 891 if (is_set == 0 && op1 == const0_rtx 892 && (GET_MODE (op0) == SImode 893 || GET_MODE (op0) == HImode 894 || GET_MODE (op0) == QImode) && scode == SPU_EQ) 895 { 896 /* Don't need to set a register with the result when we are 897 comparing against zero and branching. */ 898 reverse_test = !reverse_test; 899 compare_result = op0; 900 } 901 else 902 { 903 compare_result = gen_reg_rtx (comp_mode); 904 905 if (reverse_compare) 906 { 907 rtx t = op1; 908 op1 = op0; 909 op0 = t; 910 } 911 912 if (spu_comp_icode[index][scode] == 0) 913 abort (); 914 915 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate) 916 (op0, op_mode)) 917 op0 = force_reg (op_mode, op0); 918 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate) 919 (op1, op_mode)) 920 op1 = force_reg (op_mode, op1); 921 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result, 922 op0, op1); 923 if (comp_rtx == 0) 924 abort (); 925 emit_insn (comp_rtx); 926 927 if (eq_test) 928 { 929 eq_result = gen_reg_rtx (comp_mode); 930 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result, 931 op0, op1); 932 if (eq_rtx == 0) 933 abort (); 934 emit_insn (eq_rtx); 935 ior_code = optab_handler (ior_optab, comp_mode); 936 gcc_assert (ior_code != CODE_FOR_nothing); 937 emit_insn (GEN_FCN (ior_code) 938 (compare_result, compare_result, eq_result)); 939 } 940 } 941 942 if (is_set == 0) 943 { 944 rtx bcomp; 945 rtx loc_ref; 946 947 /* We don't have branch on QI compare insns, so we convert the 948 QI compare result to a HI result. */ 949 if (comp_mode == QImode) 950 { 951 rtx old_res = compare_result; 952 compare_result = gen_reg_rtx (HImode); 953 comp_mode = HImode; 954 emit_insn (gen_extendqihi2 (compare_result, old_res)); 955 } 956 957 if (reverse_test) 958 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx); 959 else 960 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx); 961 962 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]); 963 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, 964 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, 965 loc_ref, pc_rtx))); 966 } 967 else if (is_set == 2) 968 { 969 rtx target = operands[0]; 970 int compare_size = GET_MODE_BITSIZE (comp_mode); 971 int target_size = GET_MODE_BITSIZE (GET_MODE (target)); 972 machine_mode mode = mode_for_size (target_size, MODE_INT, 0); 973 rtx select_mask; 974 rtx op_t = operands[2]; 975 rtx op_f = operands[3]; 976 977 /* The result of the comparison can be SI, HI or QI mode. Create a 978 mask based on that result. */ 979 if (target_size > compare_size) 980 { 981 select_mask = gen_reg_rtx (mode); 982 emit_insn (gen_extend_compare (select_mask, compare_result)); 983 } 984 else if (target_size < compare_size) 985 select_mask = 986 gen_rtx_SUBREG (mode, compare_result, 987 (compare_size - target_size) / BITS_PER_UNIT); 988 else if (comp_mode != mode) 989 select_mask = gen_rtx_SUBREG (mode, compare_result, 0); 990 else 991 select_mask = compare_result; 992 993 if (GET_MODE (target) != GET_MODE (op_t) 994 || GET_MODE (target) != GET_MODE (op_f)) 995 abort (); 996 997 if (reverse_test) 998 emit_insn (gen_selb (target, op_t, op_f, select_mask)); 999 else 1000 emit_insn (gen_selb (target, op_f, op_t, select_mask)); 1001 } 1002 else 1003 { 1004 rtx target = operands[0]; 1005 if (reverse_test) 1006 emit_insn (gen_rtx_SET (VOIDmode, compare_result, 1007 gen_rtx_NOT (comp_mode, compare_result))); 1008 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode) 1009 emit_insn (gen_extendhisi2 (target, compare_result)); 1010 else if (GET_MODE (target) == SImode 1011 && GET_MODE (compare_result) == QImode) 1012 emit_insn (gen_extend_compare (target, compare_result)); 1013 else 1014 emit_move_insn (target, compare_result); 1015 } 1016} 1017 1018HOST_WIDE_INT 1019const_double_to_hwint (rtx x) 1020{ 1021 HOST_WIDE_INT val; 1022 REAL_VALUE_TYPE rv; 1023 if (GET_MODE (x) == SFmode) 1024 { 1025 REAL_VALUE_FROM_CONST_DOUBLE (rv, x); 1026 REAL_VALUE_TO_TARGET_SINGLE (rv, val); 1027 } 1028 else if (GET_MODE (x) == DFmode) 1029 { 1030 long l[2]; 1031 REAL_VALUE_FROM_CONST_DOUBLE (rv, x); 1032 REAL_VALUE_TO_TARGET_DOUBLE (rv, l); 1033 val = l[0]; 1034 val = (val << 32) | (l[1] & 0xffffffff); 1035 } 1036 else 1037 abort (); 1038 return val; 1039} 1040 1041rtx 1042hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v) 1043{ 1044 long tv[2]; 1045 REAL_VALUE_TYPE rv; 1046 gcc_assert (mode == SFmode || mode == DFmode); 1047 1048 if (mode == SFmode) 1049 tv[0] = (v << 32) >> 32; 1050 else if (mode == DFmode) 1051 { 1052 tv[1] = (v << 32) >> 32; 1053 tv[0] = v >> 32; 1054 } 1055 real_from_target (&rv, tv, mode); 1056 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode); 1057} 1058 1059void 1060print_operand_address (FILE * file, register rtx addr) 1061{ 1062 rtx reg; 1063 rtx offset; 1064 1065 if (GET_CODE (addr) == AND 1066 && GET_CODE (XEXP (addr, 1)) == CONST_INT 1067 && INTVAL (XEXP (addr, 1)) == -16) 1068 addr = XEXP (addr, 0); 1069 1070 switch (GET_CODE (addr)) 1071 { 1072 case REG: 1073 fprintf (file, "0(%s)", reg_names[REGNO (addr)]); 1074 break; 1075 1076 case PLUS: 1077 reg = XEXP (addr, 0); 1078 offset = XEXP (addr, 1); 1079 if (GET_CODE (offset) == REG) 1080 { 1081 fprintf (file, "%s,%s", reg_names[REGNO (reg)], 1082 reg_names[REGNO (offset)]); 1083 } 1084 else if (GET_CODE (offset) == CONST_INT) 1085 { 1086 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)", 1087 INTVAL (offset), reg_names[REGNO (reg)]); 1088 } 1089 else 1090 abort (); 1091 break; 1092 1093 case CONST: 1094 case LABEL_REF: 1095 case SYMBOL_REF: 1096 case CONST_INT: 1097 output_addr_const (file, addr); 1098 break; 1099 1100 default: 1101 debug_rtx (addr); 1102 abort (); 1103 } 1104} 1105 1106void 1107print_operand (FILE * file, rtx x, int code) 1108{ 1109 machine_mode mode = GET_MODE (x); 1110 HOST_WIDE_INT val; 1111 unsigned char arr[16]; 1112 int xcode = GET_CODE (x); 1113 int i, info; 1114 if (GET_MODE (x) == VOIDmode) 1115 switch (code) 1116 { 1117 case 'L': /* 128 bits, signed */ 1118 case 'm': /* 128 bits, signed */ 1119 case 'T': /* 128 bits, signed */ 1120 case 't': /* 128 bits, signed */ 1121 mode = TImode; 1122 break; 1123 case 'K': /* 64 bits, signed */ 1124 case 'k': /* 64 bits, signed */ 1125 case 'D': /* 64 bits, signed */ 1126 case 'd': /* 64 bits, signed */ 1127 mode = DImode; 1128 break; 1129 case 'J': /* 32 bits, signed */ 1130 case 'j': /* 32 bits, signed */ 1131 case 's': /* 32 bits, signed */ 1132 case 'S': /* 32 bits, signed */ 1133 mode = SImode; 1134 break; 1135 } 1136 switch (code) 1137 { 1138 1139 case 'j': /* 32 bits, signed */ 1140 case 'k': /* 64 bits, signed */ 1141 case 'm': /* 128 bits, signed */ 1142 if (xcode == CONST_INT 1143 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR) 1144 { 1145 gcc_assert (logical_immediate_p (x, mode)); 1146 constant_to_array (mode, x, arr); 1147 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 1148 val = trunc_int_for_mode (val, SImode); 1149 switch (which_logical_immediate (val)) 1150 { 1151 case SPU_ORI: 1152 break; 1153 case SPU_ORHI: 1154 fprintf (file, "h"); 1155 break; 1156 case SPU_ORBI: 1157 fprintf (file, "b"); 1158 break; 1159 default: 1160 gcc_unreachable(); 1161 } 1162 } 1163 else 1164 gcc_unreachable(); 1165 return; 1166 1167 case 'J': /* 32 bits, signed */ 1168 case 'K': /* 64 bits, signed */ 1169 case 'L': /* 128 bits, signed */ 1170 if (xcode == CONST_INT 1171 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR) 1172 { 1173 gcc_assert (logical_immediate_p (x, mode) 1174 || iohl_immediate_p (x, mode)); 1175 constant_to_array (mode, x, arr); 1176 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 1177 val = trunc_int_for_mode (val, SImode); 1178 switch (which_logical_immediate (val)) 1179 { 1180 case SPU_ORI: 1181 case SPU_IOHL: 1182 break; 1183 case SPU_ORHI: 1184 val = trunc_int_for_mode (val, HImode); 1185 break; 1186 case SPU_ORBI: 1187 val = trunc_int_for_mode (val, QImode); 1188 break; 1189 default: 1190 gcc_unreachable(); 1191 } 1192 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val); 1193 } 1194 else 1195 gcc_unreachable(); 1196 return; 1197 1198 case 't': /* 128 bits, signed */ 1199 case 'd': /* 64 bits, signed */ 1200 case 's': /* 32 bits, signed */ 1201 if (CONSTANT_P (x)) 1202 { 1203 enum immediate_class c = classify_immediate (x, mode); 1204 switch (c) 1205 { 1206 case IC_IL1: 1207 constant_to_array (mode, x, arr); 1208 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 1209 val = trunc_int_for_mode (val, SImode); 1210 switch (which_immediate_load (val)) 1211 { 1212 case SPU_IL: 1213 break; 1214 case SPU_ILA: 1215 fprintf (file, "a"); 1216 break; 1217 case SPU_ILH: 1218 fprintf (file, "h"); 1219 break; 1220 case SPU_ILHU: 1221 fprintf (file, "hu"); 1222 break; 1223 default: 1224 gcc_unreachable (); 1225 } 1226 break; 1227 case IC_CPAT: 1228 constant_to_array (mode, x, arr); 1229 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0); 1230 if (info == 1) 1231 fprintf (file, "b"); 1232 else if (info == 2) 1233 fprintf (file, "h"); 1234 else if (info == 4) 1235 fprintf (file, "w"); 1236 else if (info == 8) 1237 fprintf (file, "d"); 1238 break; 1239 case IC_IL1s: 1240 if (xcode == CONST_VECTOR) 1241 { 1242 x = CONST_VECTOR_ELT (x, 0); 1243 xcode = GET_CODE (x); 1244 } 1245 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST) 1246 fprintf (file, "a"); 1247 else if (xcode == HIGH) 1248 fprintf (file, "hu"); 1249 break; 1250 case IC_FSMBI: 1251 case IC_FSMBI2: 1252 case IC_IL2: 1253 case IC_IL2s: 1254 case IC_POOL: 1255 abort (); 1256 } 1257 } 1258 else 1259 gcc_unreachable (); 1260 return; 1261 1262 case 'T': /* 128 bits, signed */ 1263 case 'D': /* 64 bits, signed */ 1264 case 'S': /* 32 bits, signed */ 1265 if (CONSTANT_P (x)) 1266 { 1267 enum immediate_class c = classify_immediate (x, mode); 1268 switch (c) 1269 { 1270 case IC_IL1: 1271 constant_to_array (mode, x, arr); 1272 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 1273 val = trunc_int_for_mode (val, SImode); 1274 switch (which_immediate_load (val)) 1275 { 1276 case SPU_IL: 1277 case SPU_ILA: 1278 break; 1279 case SPU_ILH: 1280 case SPU_ILHU: 1281 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode); 1282 break; 1283 default: 1284 gcc_unreachable (); 1285 } 1286 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val); 1287 break; 1288 case IC_FSMBI: 1289 constant_to_array (mode, x, arr); 1290 val = 0; 1291 for (i = 0; i < 16; i++) 1292 { 1293 val <<= 1; 1294 val |= arr[i] & 1; 1295 } 1296 print_operand (file, GEN_INT (val), 0); 1297 break; 1298 case IC_CPAT: 1299 constant_to_array (mode, x, arr); 1300 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info); 1301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info); 1302 break; 1303 case IC_IL1s: 1304 if (xcode == HIGH) 1305 x = XEXP (x, 0); 1306 if (GET_CODE (x) == CONST_VECTOR) 1307 x = CONST_VECTOR_ELT (x, 0); 1308 output_addr_const (file, x); 1309 if (xcode == HIGH) 1310 fprintf (file, "@h"); 1311 break; 1312 case IC_IL2: 1313 case IC_IL2s: 1314 case IC_FSMBI2: 1315 case IC_POOL: 1316 abort (); 1317 } 1318 } 1319 else 1320 gcc_unreachable (); 1321 return; 1322 1323 case 'C': 1324 if (xcode == CONST_INT) 1325 { 1326 /* Only 4 least significant bits are relevant for generate 1327 control word instructions. */ 1328 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15); 1329 return; 1330 } 1331 break; 1332 1333 case 'M': /* print code for c*d */ 1334 if (GET_CODE (x) == CONST_INT) 1335 switch (INTVAL (x)) 1336 { 1337 case 1: 1338 fprintf (file, "b"); 1339 break; 1340 case 2: 1341 fprintf (file, "h"); 1342 break; 1343 case 4: 1344 fprintf (file, "w"); 1345 break; 1346 case 8: 1347 fprintf (file, "d"); 1348 break; 1349 default: 1350 gcc_unreachable(); 1351 } 1352 else 1353 gcc_unreachable(); 1354 return; 1355 1356 case 'N': /* Negate the operand */ 1357 if (xcode == CONST_INT) 1358 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x)); 1359 else if (xcode == CONST_VECTOR) 1360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 1361 -INTVAL (CONST_VECTOR_ELT (x, 0))); 1362 return; 1363 1364 case 'I': /* enable/disable interrupts */ 1365 if (xcode == CONST_INT) 1366 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e"); 1367 return; 1368 1369 case 'b': /* branch modifiers */ 1370 if (xcode == REG) 1371 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : ""); 1372 else if (COMPARISON_P (x)) 1373 fprintf (file, "%s", xcode == NE ? "n" : ""); 1374 return; 1375 1376 case 'i': /* indirect call */ 1377 if (xcode == MEM) 1378 { 1379 if (GET_CODE (XEXP (x, 0)) == REG) 1380 /* Used in indirect function calls. */ 1381 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]); 1382 else 1383 output_address (XEXP (x, 0)); 1384 } 1385 return; 1386 1387 case 'p': /* load/store */ 1388 if (xcode == MEM) 1389 { 1390 x = XEXP (x, 0); 1391 xcode = GET_CODE (x); 1392 } 1393 if (xcode == AND) 1394 { 1395 x = XEXP (x, 0); 1396 xcode = GET_CODE (x); 1397 } 1398 if (xcode == REG) 1399 fprintf (file, "d"); 1400 else if (xcode == CONST_INT) 1401 fprintf (file, "a"); 1402 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF) 1403 fprintf (file, "r"); 1404 else if (xcode == PLUS || xcode == LO_SUM) 1405 { 1406 if (GET_CODE (XEXP (x, 1)) == REG) 1407 fprintf (file, "x"); 1408 else 1409 fprintf (file, "d"); 1410 } 1411 return; 1412 1413 case 'e': 1414 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1415 val &= 0x7; 1416 output_addr_const (file, GEN_INT (val)); 1417 return; 1418 1419 case 'f': 1420 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1421 val &= 0x1f; 1422 output_addr_const (file, GEN_INT (val)); 1423 return; 1424 1425 case 'g': 1426 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1427 val &= 0x3f; 1428 output_addr_const (file, GEN_INT (val)); 1429 return; 1430 1431 case 'h': 1432 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1433 val = (val >> 3) & 0x1f; 1434 output_addr_const (file, GEN_INT (val)); 1435 return; 1436 1437 case 'E': 1438 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1439 val = -val; 1440 val &= 0x7; 1441 output_addr_const (file, GEN_INT (val)); 1442 return; 1443 1444 case 'F': 1445 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1446 val = -val; 1447 val &= 0x1f; 1448 output_addr_const (file, GEN_INT (val)); 1449 return; 1450 1451 case 'G': 1452 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1453 val = -val; 1454 val &= 0x3f; 1455 output_addr_const (file, GEN_INT (val)); 1456 return; 1457 1458 case 'H': 1459 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1460 val = -(val & -8ll); 1461 val = (val >> 3) & 0x1f; 1462 output_addr_const (file, GEN_INT (val)); 1463 return; 1464 1465 case 'v': 1466 case 'w': 1467 constant_to_array (mode, x, arr); 1468 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127; 1469 output_addr_const (file, GEN_INT (code == 'w' ? -val : val)); 1470 return; 1471 1472 case 0: 1473 if (xcode == REG) 1474 fprintf (file, "%s", reg_names[REGNO (x)]); 1475 else if (xcode == MEM) 1476 output_address (XEXP (x, 0)); 1477 else if (xcode == CONST_VECTOR) 1478 print_operand (file, CONST_VECTOR_ELT (x, 0), 0); 1479 else 1480 output_addr_const (file, x); 1481 return; 1482 1483 /* unused letters 1484 o qr u yz 1485 AB OPQR UVWXYZ */ 1486 default: 1487 output_operand_lossage ("invalid %%xn code"); 1488 } 1489 gcc_unreachable (); 1490} 1491 1492/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a 1493 caller saved register. For leaf functions it is more efficient to 1494 use a volatile register because we won't need to save and restore the 1495 pic register. This routine is only valid after register allocation 1496 is completed, so we can pick an unused register. */ 1497static rtx 1498get_pic_reg (void) 1499{ 1500 if (!reload_completed && !reload_in_progress) 1501 abort (); 1502 1503 /* If we've already made the decision, we need to keep with it. Once we've 1504 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may 1505 return true since the register is now live; this should not cause us to 1506 "switch back" to using pic_offset_table_rtx. */ 1507 if (!cfun->machine->pic_reg) 1508 { 1509 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM)) 1510 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM); 1511 else 1512 cfun->machine->pic_reg = pic_offset_table_rtx; 1513 } 1514 1515 return cfun->machine->pic_reg; 1516} 1517 1518/* Split constant addresses to handle cases that are too large. 1519 Add in the pic register when in PIC mode. 1520 Split immediates that require more than 1 instruction. */ 1521int 1522spu_split_immediate (rtx * ops) 1523{ 1524 machine_mode mode = GET_MODE (ops[0]); 1525 enum immediate_class c = classify_immediate (ops[1], mode); 1526 1527 switch (c) 1528 { 1529 case IC_IL2: 1530 { 1531 unsigned char arrhi[16]; 1532 unsigned char arrlo[16]; 1533 rtx to, temp, hi, lo; 1534 int i; 1535 machine_mode imode = mode; 1536 /* We need to do reals as ints because the constant used in the 1537 IOR might not be a legitimate real constant. */ 1538 imode = int_mode_for_mode (mode); 1539 constant_to_array (mode, ops[1], arrhi); 1540 if (imode != mode) 1541 to = simplify_gen_subreg (imode, ops[0], mode, 0); 1542 else 1543 to = ops[0]; 1544 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode); 1545 for (i = 0; i < 16; i += 4) 1546 { 1547 arrlo[i + 2] = arrhi[i + 2]; 1548 arrlo[i + 3] = arrhi[i + 3]; 1549 arrlo[i + 0] = arrlo[i + 1] = 0; 1550 arrhi[i + 2] = arrhi[i + 3] = 0; 1551 } 1552 hi = array_to_constant (imode, arrhi); 1553 lo = array_to_constant (imode, arrlo); 1554 emit_move_insn (temp, hi); 1555 emit_insn (gen_rtx_SET 1556 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo))); 1557 return 1; 1558 } 1559 case IC_FSMBI2: 1560 { 1561 unsigned char arr_fsmbi[16]; 1562 unsigned char arr_andbi[16]; 1563 rtx to, reg_fsmbi, reg_and; 1564 int i; 1565 machine_mode imode = mode; 1566 /* We need to do reals as ints because the constant used in the 1567 * AND might not be a legitimate real constant. */ 1568 imode = int_mode_for_mode (mode); 1569 constant_to_array (mode, ops[1], arr_fsmbi); 1570 if (imode != mode) 1571 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0); 1572 else 1573 to = ops[0]; 1574 for (i = 0; i < 16; i++) 1575 if (arr_fsmbi[i] != 0) 1576 { 1577 arr_andbi[0] = arr_fsmbi[i]; 1578 arr_fsmbi[i] = 0xff; 1579 } 1580 for (i = 1; i < 16; i++) 1581 arr_andbi[i] = arr_andbi[0]; 1582 reg_fsmbi = array_to_constant (imode, arr_fsmbi); 1583 reg_and = array_to_constant (imode, arr_andbi); 1584 emit_move_insn (to, reg_fsmbi); 1585 emit_insn (gen_rtx_SET 1586 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and))); 1587 return 1; 1588 } 1589 case IC_POOL: 1590 if (reload_in_progress || reload_completed) 1591 { 1592 rtx mem = force_const_mem (mode, ops[1]); 1593 if (TARGET_LARGE_MEM) 1594 { 1595 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0])); 1596 emit_move_insn (addr, XEXP (mem, 0)); 1597 mem = replace_equiv_address (mem, addr); 1598 } 1599 emit_move_insn (ops[0], mem); 1600 return 1; 1601 } 1602 break; 1603 case IC_IL1s: 1604 case IC_IL2s: 1605 if (reload_completed && GET_CODE (ops[1]) != HIGH) 1606 { 1607 if (c == IC_IL2s) 1608 { 1609 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1])); 1610 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1])); 1611 } 1612 else if (flag_pic) 1613 emit_insn (gen_pic (ops[0], ops[1])); 1614 if (flag_pic) 1615 { 1616 rtx pic_reg = get_pic_reg (); 1617 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg)); 1618 } 1619 return flag_pic || c == IC_IL2s; 1620 } 1621 break; 1622 case IC_IL1: 1623 case IC_FSMBI: 1624 case IC_CPAT: 1625 break; 1626 } 1627 return 0; 1628} 1629 1630/* SAVING is TRUE when we are generating the actual load and store 1631 instructions for REGNO. When determining the size of the stack 1632 needed for saving register we must allocate enough space for the 1633 worst case, because we don't always have the information early enough 1634 to not allocate it. But we can at least eliminate the actual loads 1635 and stores during the prologue/epilogue. */ 1636static int 1637need_to_save_reg (int regno, int saving) 1638{ 1639 if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) 1640 return 1; 1641 if (flag_pic 1642 && regno == PIC_OFFSET_TABLE_REGNUM 1643 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx)) 1644 return 1; 1645 return 0; 1646} 1647 1648/* This function is only correct starting with local register 1649 allocation */ 1650int 1651spu_saved_regs_size (void) 1652{ 1653 int reg_save_size = 0; 1654 int regno; 1655 1656 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno) 1657 if (need_to_save_reg (regno, 0)) 1658 reg_save_size += 0x10; 1659 return reg_save_size; 1660} 1661 1662static rtx_insn * 1663frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset) 1664{ 1665 rtx reg = gen_rtx_REG (V4SImode, regno); 1666 rtx mem = 1667 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset))); 1668 return emit_insn (gen_movv4si (mem, reg)); 1669} 1670 1671static rtx_insn * 1672frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset) 1673{ 1674 rtx reg = gen_rtx_REG (V4SImode, regno); 1675 rtx mem = 1676 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset))); 1677 return emit_insn (gen_movv4si (reg, mem)); 1678} 1679 1680/* This happens after reload, so we need to expand it. */ 1681static rtx_insn * 1682frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch) 1683{ 1684 rtx_insn *insn; 1685 if (satisfies_constraint_K (GEN_INT (imm))) 1686 { 1687 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm))); 1688 } 1689 else 1690 { 1691 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode))); 1692 insn = emit_insn (gen_addsi3 (dst, src, scratch)); 1693 if (REGNO (src) == REGNO (scratch)) 1694 abort (); 1695 } 1696 return insn; 1697} 1698 1699/* Return nonzero if this function is known to have a null epilogue. */ 1700 1701int 1702direct_return (void) 1703{ 1704 if (reload_completed) 1705 { 1706 if (cfun->static_chain_decl == 0 1707 && (spu_saved_regs_size () 1708 + get_frame_size () 1709 + crtl->outgoing_args_size 1710 + crtl->args.pretend_args_size == 0) 1711 && crtl->is_leaf) 1712 return 1; 1713 } 1714 return 0; 1715} 1716 1717/* 1718 The stack frame looks like this: 1719 +-------------+ 1720 | incoming | 1721 | args | 1722 AP -> +-------------+ 1723 | $lr save | 1724 +-------------+ 1725 prev SP | back chain | 1726 +-------------+ 1727 | var args | 1728 | reg save | crtl->args.pretend_args_size bytes 1729 +-------------+ 1730 | ... | 1731 | saved regs | spu_saved_regs_size() bytes 1732 FP -> +-------------+ 1733 | ... | 1734 | vars | get_frame_size() bytes 1735 HFP -> +-------------+ 1736 | ... | 1737 | outgoing | 1738 | args | crtl->outgoing_args_size bytes 1739 +-------------+ 1740 | $lr of next | 1741 | frame | 1742 +-------------+ 1743 | back chain | 1744 SP -> +-------------+ 1745 1746*/ 1747void 1748spu_expand_prologue (void) 1749{ 1750 HOST_WIDE_INT size = get_frame_size (), offset, regno; 1751 HOST_WIDE_INT total_size; 1752 HOST_WIDE_INT saved_regs_size; 1753 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); 1754 rtx scratch_reg_0, scratch_reg_1; 1755 rtx_insn *insn; 1756 rtx real; 1757 1758 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg) 1759 cfun->machine->pic_reg = pic_offset_table_rtx; 1760 1761 if (spu_naked_function_p (current_function_decl)) 1762 return; 1763 1764 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1); 1765 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2); 1766 1767 saved_regs_size = spu_saved_regs_size (); 1768 total_size = size + saved_regs_size 1769 + crtl->outgoing_args_size 1770 + crtl->args.pretend_args_size; 1771 1772 if (!crtl->is_leaf 1773 || cfun->calls_alloca || total_size > 0) 1774 total_size += STACK_POINTER_OFFSET; 1775 1776 /* Save this first because code after this might use the link 1777 register as a scratch register. */ 1778 if (!crtl->is_leaf) 1779 { 1780 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16); 1781 RTX_FRAME_RELATED_P (insn) = 1; 1782 } 1783 1784 if (total_size > 0) 1785 { 1786 offset = -crtl->args.pretend_args_size; 1787 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) 1788 if (need_to_save_reg (regno, 1)) 1789 { 1790 offset -= 16; 1791 insn = frame_emit_store (regno, sp_reg, offset); 1792 RTX_FRAME_RELATED_P (insn) = 1; 1793 } 1794 } 1795 1796 if (flag_pic && cfun->machine->pic_reg) 1797 { 1798 rtx pic_reg = cfun->machine->pic_reg; 1799 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0)); 1800 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0)); 1801 } 1802 1803 if (total_size > 0) 1804 { 1805 if (flag_stack_check) 1806 { 1807 /* We compare against total_size-1 because 1808 ($sp >= total_size) <=> ($sp > total_size-1) */ 1809 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0)); 1810 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM); 1811 rtx size_v4si = spu_const (V4SImode, total_size - 1); 1812 if (!satisfies_constraint_K (GEN_INT (total_size - 1))) 1813 { 1814 emit_move_insn (scratch_v4si, size_v4si); 1815 size_v4si = scratch_v4si; 1816 } 1817 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si)); 1818 emit_insn (gen_vec_extractv4si 1819 (scratch_reg_0, scratch_v4si, GEN_INT (1))); 1820 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0))); 1821 } 1822 1823 /* Adjust the stack pointer, and make sure scratch_reg_0 contains 1824 the value of the previous $sp because we save it as the back 1825 chain. */ 1826 if (total_size <= 2000) 1827 { 1828 /* In this case we save the back chain first. */ 1829 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size); 1830 insn = 1831 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0); 1832 } 1833 else 1834 { 1835 insn = emit_move_insn (scratch_reg_0, sp_reg); 1836 insn = 1837 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1); 1838 } 1839 RTX_FRAME_RELATED_P (insn) = 1; 1840 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)); 1841 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real); 1842 1843 if (total_size > 2000) 1844 { 1845 /* Save the back chain ptr */ 1846 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0); 1847 } 1848 1849 if (frame_pointer_needed) 1850 { 1851 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM); 1852 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET 1853 + crtl->outgoing_args_size; 1854 /* Set the new frame_pointer */ 1855 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0); 1856 RTX_FRAME_RELATED_P (insn) = 1; 1857 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset)); 1858 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real); 1859 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY; 1860 } 1861 } 1862 1863 if (flag_stack_usage_info) 1864 current_function_static_stack_size = total_size; 1865} 1866 1867void 1868spu_expand_epilogue (bool sibcall_p) 1869{ 1870 int size = get_frame_size (), offset, regno; 1871 HOST_WIDE_INT saved_regs_size, total_size; 1872 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); 1873 rtx scratch_reg_0; 1874 1875 if (spu_naked_function_p (current_function_decl)) 1876 return; 1877 1878 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1); 1879 1880 saved_regs_size = spu_saved_regs_size (); 1881 total_size = size + saved_regs_size 1882 + crtl->outgoing_args_size 1883 + crtl->args.pretend_args_size; 1884 1885 if (!crtl->is_leaf 1886 || cfun->calls_alloca || total_size > 0) 1887 total_size += STACK_POINTER_OFFSET; 1888 1889 if (total_size > 0) 1890 { 1891 if (cfun->calls_alloca) 1892 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0); 1893 else 1894 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0); 1895 1896 1897 if (saved_regs_size > 0) 1898 { 1899 offset = -crtl->args.pretend_args_size; 1900 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) 1901 if (need_to_save_reg (regno, 1)) 1902 { 1903 offset -= 0x10; 1904 frame_emit_load (regno, sp_reg, offset); 1905 } 1906 } 1907 } 1908 1909 if (!crtl->is_leaf) 1910 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16); 1911 1912 if (!sibcall_p) 1913 { 1914 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)); 1915 emit_jump_insn (gen__return ()); 1916 } 1917} 1918 1919rtx 1920spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) 1921{ 1922 if (count != 0) 1923 return 0; 1924 /* This is inefficient because it ends up copying to a save-register 1925 which then gets saved even though $lr has already been saved. But 1926 it does generate better code for leaf functions and we don't need 1927 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only 1928 used for __builtin_return_address anyway, so maybe we don't care if 1929 it's inefficient. */ 1930 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM); 1931} 1932 1933 1934/* Given VAL, generate a constant appropriate for MODE. 1935 If MODE is a vector mode, every element will be VAL. 1936 For TImode, VAL will be zero extended to 128 bits. */ 1937rtx 1938spu_const (machine_mode mode, HOST_WIDE_INT val) 1939{ 1940 rtx inner; 1941 rtvec v; 1942 int units, i; 1943 1944 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT 1945 || GET_MODE_CLASS (mode) == MODE_FLOAT 1946 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT 1947 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT); 1948 1949 if (GET_MODE_CLASS (mode) == MODE_INT) 1950 return immed_double_const (val, 0, mode); 1951 1952 /* val is the bit representation of the float */ 1953 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 1954 return hwint_to_const_double (mode, val); 1955 1956 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 1957 inner = immed_double_const (val, 0, GET_MODE_INNER (mode)); 1958 else 1959 inner = hwint_to_const_double (GET_MODE_INNER (mode), val); 1960 1961 units = GET_MODE_NUNITS (mode); 1962 1963 v = rtvec_alloc (units); 1964 1965 for (i = 0; i < units; ++i) 1966 RTVEC_ELT (v, i) = inner; 1967 1968 return gen_rtx_CONST_VECTOR (mode, v); 1969} 1970 1971/* Create a MODE vector constant from 4 ints. */ 1972rtx 1973spu_const_from_ints(machine_mode mode, int a, int b, int c, int d) 1974{ 1975 unsigned char arr[16]; 1976 arr[0] = (a >> 24) & 0xff; 1977 arr[1] = (a >> 16) & 0xff; 1978 arr[2] = (a >> 8) & 0xff; 1979 arr[3] = (a >> 0) & 0xff; 1980 arr[4] = (b >> 24) & 0xff; 1981 arr[5] = (b >> 16) & 0xff; 1982 arr[6] = (b >> 8) & 0xff; 1983 arr[7] = (b >> 0) & 0xff; 1984 arr[8] = (c >> 24) & 0xff; 1985 arr[9] = (c >> 16) & 0xff; 1986 arr[10] = (c >> 8) & 0xff; 1987 arr[11] = (c >> 0) & 0xff; 1988 arr[12] = (d >> 24) & 0xff; 1989 arr[13] = (d >> 16) & 0xff; 1990 arr[14] = (d >> 8) & 0xff; 1991 arr[15] = (d >> 0) & 0xff; 1992 return array_to_constant(mode, arr); 1993} 1994 1995/* branch hint stuff */ 1996 1997/* An array of these is used to propagate hints to predecessor blocks. */ 1998struct spu_bb_info 1999{ 2000 rtx_insn *prop_jump; /* propagated from another block */ 2001 int bb_index; /* the original block. */ 2002}; 2003static struct spu_bb_info *spu_bb_info; 2004 2005#define STOP_HINT_P(INSN) \ 2006 (CALL_P(INSN) \ 2007 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \ 2008 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4) 2009 2010/* 1 when RTX is a hinted branch or its target. We keep track of 2011 what has been hinted so the safe-hint code can test it easily. */ 2012#define HINTED_P(RTX) \ 2013 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging) 2014 2015/* 1 when RTX is an insn that must be scheduled on an even boundary. */ 2016#define SCHED_ON_EVEN_P(RTX) \ 2017 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct) 2018 2019/* Emit a nop for INSN such that the two will dual issue. This assumes 2020 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop. 2021 We check for TImode to handle a MULTI1 insn which has dual issued its 2022 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */ 2023static void 2024emit_nop_for_insn (rtx_insn *insn) 2025{ 2026 int p; 2027 rtx_insn *new_insn; 2028 2029 /* We need to handle JUMP_TABLE_DATA separately. */ 2030 if (JUMP_TABLE_DATA_P (insn)) 2031 { 2032 new_insn = emit_insn_after (gen_lnop(), insn); 2033 recog_memoized (new_insn); 2034 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION; 2035 return; 2036 } 2037 2038 p = get_pipe (insn); 2039 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn)) 2040 new_insn = emit_insn_after (gen_lnop (), insn); 2041 else if (p == 1 && GET_MODE (insn) == TImode) 2042 { 2043 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn); 2044 PUT_MODE (new_insn, TImode); 2045 PUT_MODE (insn, VOIDmode); 2046 } 2047 else 2048 new_insn = emit_insn_after (gen_lnop (), insn); 2049 recog_memoized (new_insn); 2050 INSN_LOCATION (new_insn) = INSN_LOCATION (insn); 2051} 2052 2053/* Insert nops in basic blocks to meet dual issue alignment 2054 requirements. Also make sure hbrp and hint instructions are at least 2055 one cycle apart, possibly inserting a nop. */ 2056static void 2057pad_bb(void) 2058{ 2059 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0; 2060 int length; 2061 int addr; 2062 2063 /* This sets up INSN_ADDRESSES. */ 2064 shorten_branches (get_insns ()); 2065 2066 /* Keep track of length added by nops. */ 2067 length = 0; 2068 2069 prev_insn = 0; 2070 insn = get_insns (); 2071 if (!active_insn_p (insn)) 2072 insn = next_active_insn (insn); 2073 for (; insn; insn = next_insn) 2074 { 2075 next_insn = next_active_insn (insn); 2076 if (INSN_CODE (insn) == CODE_FOR_iprefetch 2077 || INSN_CODE (insn) == CODE_FOR_hbr) 2078 { 2079 if (hbr_insn) 2080 { 2081 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn)); 2082 int a1 = INSN_ADDRESSES (INSN_UID (insn)); 2083 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode) 2084 || (a1 - a0 == 4)) 2085 { 2086 prev_insn = emit_insn_before (gen_lnop (), insn); 2087 PUT_MODE (prev_insn, GET_MODE (insn)); 2088 PUT_MODE (insn, TImode); 2089 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn); 2090 length += 4; 2091 } 2092 } 2093 hbr_insn = insn; 2094 } 2095 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn) 2096 { 2097 if (GET_MODE (insn) == TImode) 2098 PUT_MODE (next_insn, TImode); 2099 insn = next_insn; 2100 next_insn = next_active_insn (insn); 2101 } 2102 addr = INSN_ADDRESSES (INSN_UID (insn)); 2103 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn)) 2104 { 2105 if (((addr + length) & 7) != 0) 2106 { 2107 emit_nop_for_insn (prev_insn); 2108 length += 4; 2109 } 2110 } 2111 else if (GET_MODE (insn) == TImode 2112 && ((next_insn && GET_MODE (next_insn) != TImode) 2113 || get_attr_type (insn) == TYPE_MULTI0) 2114 && ((addr + length) & 7) != 0) 2115 { 2116 /* prev_insn will always be set because the first insn is 2117 always 8-byte aligned. */ 2118 emit_nop_for_insn (prev_insn); 2119 length += 4; 2120 } 2121 prev_insn = insn; 2122 } 2123} 2124 2125 2126/* Routines for branch hints. */ 2127 2128static void 2129spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target, 2130 int distance, sbitmap blocks) 2131{ 2132 rtx branch_label = 0; 2133 rtx_insn *hint; 2134 rtx_insn *insn; 2135 rtx_jump_table_data *table; 2136 2137 if (before == 0 || branch == 0 || target == 0) 2138 return; 2139 2140 /* While scheduling we require hints to be no further than 600, so 2141 we need to enforce that here too */ 2142 if (distance > 600) 2143 return; 2144 2145 /* If we have a Basic block note, emit it after the basic block note. */ 2146 if (NOTE_INSN_BASIC_BLOCK_P (before)) 2147 before = NEXT_INSN (before); 2148 2149 branch_label = gen_label_rtx (); 2150 LABEL_NUSES (branch_label)++; 2151 LABEL_PRESERVE_P (branch_label) = 1; 2152 insn = emit_label_before (branch_label, branch); 2153 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label); 2154 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index); 2155 2156 hint = emit_insn_before (gen_hbr (branch_label, target), before); 2157 recog_memoized (hint); 2158 INSN_LOCATION (hint) = INSN_LOCATION (branch); 2159 HINTED_P (branch) = 1; 2160 2161 if (GET_CODE (target) == LABEL_REF) 2162 HINTED_P (XEXP (target, 0)) = 1; 2163 else if (tablejump_p (branch, 0, &table)) 2164 { 2165 rtvec vec; 2166 int j; 2167 if (GET_CODE (PATTERN (table)) == ADDR_VEC) 2168 vec = XVEC (PATTERN (table), 0); 2169 else 2170 vec = XVEC (PATTERN (table), 1); 2171 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j) 2172 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1; 2173 } 2174 2175 if (distance >= 588) 2176 { 2177 /* Make sure the hint isn't scheduled any earlier than this point, 2178 which could make it too far for the branch offest to fit */ 2179 insn = emit_insn_before (gen_blockage (), hint); 2180 recog_memoized (insn); 2181 INSN_LOCATION (insn) = INSN_LOCATION (hint); 2182 } 2183 else if (distance <= 8 * 4) 2184 { 2185 /* To guarantee at least 8 insns between the hint and branch we 2186 insert nops. */ 2187 int d; 2188 for (d = distance; d < 8 * 4; d += 4) 2189 { 2190 insn = 2191 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint); 2192 recog_memoized (insn); 2193 INSN_LOCATION (insn) = INSN_LOCATION (hint); 2194 } 2195 2196 /* Make sure any nops inserted aren't scheduled before the hint. */ 2197 insn = emit_insn_after (gen_blockage (), hint); 2198 recog_memoized (insn); 2199 INSN_LOCATION (insn) = INSN_LOCATION (hint); 2200 2201 /* Make sure any nops inserted aren't scheduled after the call. */ 2202 if (CALL_P (branch) && distance < 8 * 4) 2203 { 2204 insn = emit_insn_before (gen_blockage (), branch); 2205 recog_memoized (insn); 2206 INSN_LOCATION (insn) = INSN_LOCATION (branch); 2207 } 2208 } 2209} 2210 2211/* Returns 0 if we don't want a hint for this branch. Otherwise return 2212 the rtx for the branch target. */ 2213static rtx 2214get_branch_target (rtx_insn *branch) 2215{ 2216 if (JUMP_P (branch)) 2217 { 2218 rtx set, src; 2219 2220 /* Return statements */ 2221 if (GET_CODE (PATTERN (branch)) == RETURN) 2222 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM); 2223 2224 /* ASM GOTOs. */ 2225 if (extract_asm_operands (PATTERN (branch)) != NULL) 2226 return NULL; 2227 2228 set = single_set (branch); 2229 src = SET_SRC (set); 2230 if (GET_CODE (SET_DEST (set)) != PC) 2231 abort (); 2232 2233 if (GET_CODE (src) == IF_THEN_ELSE) 2234 { 2235 rtx lab = 0; 2236 rtx note = find_reg_note (branch, REG_BR_PROB, 0); 2237 if (note) 2238 { 2239 /* If the more probable case is not a fall through, then 2240 try a branch hint. */ 2241 int prob = XINT (note, 0); 2242 if (prob > (REG_BR_PROB_BASE * 6 / 10) 2243 && GET_CODE (XEXP (src, 1)) != PC) 2244 lab = XEXP (src, 1); 2245 else if (prob < (REG_BR_PROB_BASE * 4 / 10) 2246 && GET_CODE (XEXP (src, 2)) != PC) 2247 lab = XEXP (src, 2); 2248 } 2249 if (lab) 2250 { 2251 if (GET_CODE (lab) == RETURN) 2252 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM); 2253 return lab; 2254 } 2255 return 0; 2256 } 2257 2258 return src; 2259 } 2260 else if (CALL_P (branch)) 2261 { 2262 rtx call; 2263 /* All of our call patterns are in a PARALLEL and the CALL is 2264 the first pattern in the PARALLEL. */ 2265 if (GET_CODE (PATTERN (branch)) != PARALLEL) 2266 abort (); 2267 call = XVECEXP (PATTERN (branch), 0, 0); 2268 if (GET_CODE (call) == SET) 2269 call = SET_SRC (call); 2270 if (GET_CODE (call) != CALL) 2271 abort (); 2272 return XEXP (XEXP (call, 0), 0); 2273 } 2274 return 0; 2275} 2276 2277/* The special $hbr register is used to prevent the insn scheduler from 2278 moving hbr insns across instructions which invalidate them. It 2279 should only be used in a clobber, and this function searches for 2280 insns which clobber it. */ 2281static bool 2282insn_clobbers_hbr (rtx_insn *insn) 2283{ 2284 if (INSN_P (insn) 2285 && GET_CODE (PATTERN (insn)) == PARALLEL) 2286 { 2287 rtx parallel = PATTERN (insn); 2288 rtx clobber; 2289 int j; 2290 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--) 2291 { 2292 clobber = XVECEXP (parallel, 0, j); 2293 if (GET_CODE (clobber) == CLOBBER 2294 && GET_CODE (XEXP (clobber, 0)) == REG 2295 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM) 2296 return 1; 2297 } 2298 } 2299 return 0; 2300} 2301 2302/* Search up to 32 insns starting at FIRST: 2303 - at any kind of hinted branch, just return 2304 - at any unconditional branch in the first 15 insns, just return 2305 - at a call or indirect branch, after the first 15 insns, force it to 2306 an even address and return 2307 - at any unconditional branch, after the first 15 insns, force it to 2308 an even address. 2309 At then end of the search, insert an hbrp within 4 insns of FIRST, 2310 and an hbrp within 16 instructions of FIRST. 2311 */ 2312static void 2313insert_hbrp_for_ilb_runout (rtx_insn *first) 2314{ 2315 rtx_insn *insn, *before_4 = 0, *before_16 = 0; 2316 int addr = 0, length, first_addr = -1; 2317 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4; 2318 int insert_lnop_after = 0; 2319 for (insn = first; insn; insn = NEXT_INSN (insn)) 2320 if (INSN_P (insn)) 2321 { 2322 if (first_addr == -1) 2323 first_addr = INSN_ADDRESSES (INSN_UID (insn)); 2324 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr; 2325 length = get_attr_length (insn); 2326 2327 if (before_4 == 0 && addr + length >= 4 * 4) 2328 before_4 = insn; 2329 /* We test for 14 instructions because the first hbrp will add 2330 up to 2 instructions. */ 2331 if (before_16 == 0 && addr + length >= 14 * 4) 2332 before_16 = insn; 2333 2334 if (INSN_CODE (insn) == CODE_FOR_hbr) 2335 { 2336 /* Make sure an hbrp is at least 2 cycles away from a hint. 2337 Insert an lnop after the hbrp when necessary. */ 2338 if (before_4 == 0 && addr > 0) 2339 { 2340 before_4 = insn; 2341 insert_lnop_after |= 1; 2342 } 2343 else if (before_4 && addr <= 4 * 4) 2344 insert_lnop_after |= 1; 2345 if (before_16 == 0 && addr > 10 * 4) 2346 { 2347 before_16 = insn; 2348 insert_lnop_after |= 2; 2349 } 2350 else if (before_16 && addr <= 14 * 4) 2351 insert_lnop_after |= 2; 2352 } 2353 2354 if (INSN_CODE (insn) == CODE_FOR_iprefetch) 2355 { 2356 if (addr < hbrp_addr0) 2357 hbrp_addr0 = addr; 2358 else if (addr < hbrp_addr1) 2359 hbrp_addr1 = addr; 2360 } 2361 2362 if (CALL_P (insn) || JUMP_P (insn)) 2363 { 2364 if (HINTED_P (insn)) 2365 return; 2366 2367 /* Any branch after the first 15 insns should be on an even 2368 address to avoid a special case branch. There might be 2369 some nops and/or hbrps inserted, so we test after 10 2370 insns. */ 2371 if (addr > 10 * 4) 2372 SCHED_ON_EVEN_P (insn) = 1; 2373 } 2374 2375 if (CALL_P (insn) || tablejump_p (insn, 0, 0)) 2376 return; 2377 2378 2379 if (addr + length >= 32 * 4) 2380 { 2381 gcc_assert (before_4 && before_16); 2382 if (hbrp_addr0 > 4 * 4) 2383 { 2384 insn = 2385 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4); 2386 recog_memoized (insn); 2387 INSN_LOCATION (insn) = INSN_LOCATION (before_4); 2388 INSN_ADDRESSES_NEW (insn, 2389 INSN_ADDRESSES (INSN_UID (before_4))); 2390 PUT_MODE (insn, GET_MODE (before_4)); 2391 PUT_MODE (before_4, TImode); 2392 if (insert_lnop_after & 1) 2393 { 2394 insn = emit_insn_before (gen_lnop (), before_4); 2395 recog_memoized (insn); 2396 INSN_LOCATION (insn) = INSN_LOCATION (before_4); 2397 INSN_ADDRESSES_NEW (insn, 2398 INSN_ADDRESSES (INSN_UID (before_4))); 2399 PUT_MODE (insn, TImode); 2400 } 2401 } 2402 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4) 2403 && hbrp_addr1 > 16 * 4) 2404 { 2405 insn = 2406 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16); 2407 recog_memoized (insn); 2408 INSN_LOCATION (insn) = INSN_LOCATION (before_16); 2409 INSN_ADDRESSES_NEW (insn, 2410 INSN_ADDRESSES (INSN_UID (before_16))); 2411 PUT_MODE (insn, GET_MODE (before_16)); 2412 PUT_MODE (before_16, TImode); 2413 if (insert_lnop_after & 2) 2414 { 2415 insn = emit_insn_before (gen_lnop (), before_16); 2416 recog_memoized (insn); 2417 INSN_LOCATION (insn) = INSN_LOCATION (before_16); 2418 INSN_ADDRESSES_NEW (insn, 2419 INSN_ADDRESSES (INSN_UID 2420 (before_16))); 2421 PUT_MODE (insn, TImode); 2422 } 2423 } 2424 return; 2425 } 2426 } 2427 else if (BARRIER_P (insn)) 2428 return; 2429 2430} 2431 2432/* The SPU might hang when it executes 48 inline instructions after a 2433 hinted branch jumps to its hinted target. The beginning of a 2434 function and the return from a call might have been hinted, and 2435 must be handled as well. To prevent a hang we insert 2 hbrps. The 2436 first should be within 6 insns of the branch target. The second 2437 should be within 22 insns of the branch target. When determining 2438 if hbrps are necessary, we look for only 32 inline instructions, 2439 because up to 12 nops and 4 hbrps could be inserted. Similarily, 2440 when inserting new hbrps, we insert them within 4 and 16 insns of 2441 the target. */ 2442static void 2443insert_hbrp (void) 2444{ 2445 rtx_insn *insn; 2446 if (TARGET_SAFE_HINTS) 2447 { 2448 shorten_branches (get_insns ()); 2449 /* Insert hbrp at beginning of function */ 2450 insn = next_active_insn (get_insns ()); 2451 if (insn) 2452 insert_hbrp_for_ilb_runout (insn); 2453 /* Insert hbrp after hinted targets. */ 2454 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 2455 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn)) 2456 insert_hbrp_for_ilb_runout (next_active_insn (insn)); 2457 } 2458} 2459 2460static int in_spu_reorg; 2461 2462static void 2463spu_var_tracking (void) 2464{ 2465 if (flag_var_tracking) 2466 { 2467 df_analyze (); 2468 timevar_push (TV_VAR_TRACKING); 2469 variable_tracking_main (); 2470 timevar_pop (TV_VAR_TRACKING); 2471 df_finish_pass (false); 2472 } 2473} 2474 2475/* Insert branch hints. There are no branch optimizations after this 2476 pass, so it's safe to set our branch hints now. */ 2477static void 2478spu_machine_dependent_reorg (void) 2479{ 2480 sbitmap blocks; 2481 basic_block bb; 2482 rtx_insn *branch, *insn; 2483 rtx branch_target = 0; 2484 int branch_addr = 0, insn_addr, required_dist = 0; 2485 int i; 2486 unsigned int j; 2487 2488 if (!TARGET_BRANCH_HINTS || optimize == 0) 2489 { 2490 /* We still do it for unoptimized code because an external 2491 function might have hinted a call or return. */ 2492 compute_bb_for_insn (); 2493 insert_hbrp (); 2494 pad_bb (); 2495 spu_var_tracking (); 2496 free_bb_for_insn (); 2497 return; 2498 } 2499 2500 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun)); 2501 bitmap_clear (blocks); 2502 2503 in_spu_reorg = 1; 2504 compute_bb_for_insn (); 2505 2506 /* (Re-)discover loops so that bb->loop_father can be used 2507 in the analysis below. */ 2508 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 2509 2510 compact_blocks (); 2511 2512 spu_bb_info = 2513 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun), 2514 sizeof (struct spu_bb_info)); 2515 2516 /* We need exact insn addresses and lengths. */ 2517 shorten_branches (get_insns ()); 2518 2519 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--) 2520 { 2521 bb = BASIC_BLOCK_FOR_FN (cfun, i); 2522 branch = 0; 2523 if (spu_bb_info[i].prop_jump) 2524 { 2525 branch = spu_bb_info[i].prop_jump; 2526 branch_target = get_branch_target (branch); 2527 branch_addr = INSN_ADDRESSES (INSN_UID (branch)); 2528 required_dist = spu_hint_dist; 2529 } 2530 /* Search from end of a block to beginning. In this loop, find 2531 jumps which need a branch and emit them only when: 2532 - it's an indirect branch and we're at the insn which sets 2533 the register 2534 - we're at an insn that will invalidate the hint. e.g., a 2535 call, another hint insn, inline asm that clobbers $hbr, and 2536 some inlined operations (divmodsi4). Don't consider jumps 2537 because they are only at the end of a block and are 2538 considered when we are deciding whether to propagate 2539 - we're getting too far away from the branch. The hbr insns 2540 only have a signed 10 bit offset 2541 We go back as far as possible so the branch will be considered 2542 for propagation when we get to the beginning of the block. */ 2543 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn)) 2544 { 2545 if (INSN_P (insn)) 2546 { 2547 insn_addr = INSN_ADDRESSES (INSN_UID (insn)); 2548 if (branch 2549 && ((GET_CODE (branch_target) == REG 2550 && set_of (branch_target, insn) != NULL_RTX) 2551 || insn_clobbers_hbr (insn) 2552 || branch_addr - insn_addr > 600)) 2553 { 2554 rtx_insn *next = NEXT_INSN (insn); 2555 int next_addr = INSN_ADDRESSES (INSN_UID (next)); 2556 if (insn != BB_END (bb) 2557 && branch_addr - next_addr >= required_dist) 2558 { 2559 if (dump_file) 2560 fprintf (dump_file, 2561 "hint for %i in block %i before %i\n", 2562 INSN_UID (branch), bb->index, 2563 INSN_UID (next)); 2564 spu_emit_branch_hint (next, branch, branch_target, 2565 branch_addr - next_addr, blocks); 2566 } 2567 branch = 0; 2568 } 2569 2570 /* JUMP_P will only be true at the end of a block. When 2571 branch is already set it means we've previously decided 2572 to propagate a hint for that branch into this block. */ 2573 if (CALL_P (insn) || (JUMP_P (insn) && !branch)) 2574 { 2575 branch = 0; 2576 if ((branch_target = get_branch_target (insn))) 2577 { 2578 branch = insn; 2579 branch_addr = insn_addr; 2580 required_dist = spu_hint_dist; 2581 } 2582 } 2583 } 2584 if (insn == BB_HEAD (bb)) 2585 break; 2586 } 2587 2588 if (branch) 2589 { 2590 /* If we haven't emitted a hint for this branch yet, it might 2591 be profitable to emit it in one of the predecessor blocks, 2592 especially for loops. */ 2593 rtx_insn *bbend; 2594 basic_block prev = 0, prop = 0, prev2 = 0; 2595 int loop_exit = 0, simple_loop = 0; 2596 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn))); 2597 2598 for (j = 0; j < EDGE_COUNT (bb->preds); j++) 2599 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU) 2600 prev = EDGE_PRED (bb, j)->src; 2601 else 2602 prev2 = EDGE_PRED (bb, j)->src; 2603 2604 for (j = 0; j < EDGE_COUNT (bb->succs); j++) 2605 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT) 2606 loop_exit = 1; 2607 else if (EDGE_SUCC (bb, j)->dest == bb) 2608 simple_loop = 1; 2609 2610 /* If this branch is a loop exit then propagate to previous 2611 fallthru block. This catches the cases when it is a simple 2612 loop or when there is an initial branch into the loop. */ 2613 if (prev && (loop_exit || simple_loop) 2614 && bb_loop_depth (prev) <= bb_loop_depth (bb)) 2615 prop = prev; 2616 2617 /* If there is only one adjacent predecessor. Don't propagate 2618 outside this loop. */ 2619 else if (prev && single_pred_p (bb) 2620 && prev->loop_father == bb->loop_father) 2621 prop = prev; 2622 2623 /* If this is the JOIN block of a simple IF-THEN then 2624 propagate the hint to the HEADER block. */ 2625 else if (prev && prev2 2626 && EDGE_COUNT (bb->preds) == 2 2627 && EDGE_COUNT (prev->preds) == 1 2628 && EDGE_PRED (prev, 0)->src == prev2 2629 && prev2->loop_father == bb->loop_father 2630 && GET_CODE (branch_target) != REG) 2631 prop = prev; 2632 2633 /* Don't propagate when: 2634 - this is a simple loop and the hint would be too far 2635 - this is not a simple loop and there are 16 insns in 2636 this block already 2637 - the predecessor block ends in a branch that will be 2638 hinted 2639 - the predecessor block ends in an insn that invalidates 2640 the hint */ 2641 if (prop 2642 && prop->index >= 0 2643 && (bbend = BB_END (prop)) 2644 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) < 2645 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0 2646 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend))) 2647 { 2648 if (dump_file) 2649 fprintf (dump_file, "propagate from %i to %i (loop depth %i) " 2650 "for %i (loop_exit %i simple_loop %i dist %i)\n", 2651 bb->index, prop->index, bb_loop_depth (bb), 2652 INSN_UID (branch), loop_exit, simple_loop, 2653 branch_addr - INSN_ADDRESSES (INSN_UID (bbend))); 2654 2655 spu_bb_info[prop->index].prop_jump = branch; 2656 spu_bb_info[prop->index].bb_index = i; 2657 } 2658 else if (branch_addr - next_addr >= required_dist) 2659 { 2660 if (dump_file) 2661 fprintf (dump_file, "hint for %i in block %i before %i\n", 2662 INSN_UID (branch), bb->index, 2663 INSN_UID (NEXT_INSN (insn))); 2664 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target, 2665 branch_addr - next_addr, blocks); 2666 } 2667 branch = 0; 2668 } 2669 } 2670 free (spu_bb_info); 2671 2672 if (!bitmap_empty_p (blocks)) 2673 find_many_sub_basic_blocks (blocks); 2674 2675 /* We have to schedule to make sure alignment is ok. */ 2676 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE; 2677 2678 /* The hints need to be scheduled, so call it again. */ 2679 schedule_insns (); 2680 df_finish_pass (true); 2681 2682 insert_hbrp (); 2683 2684 pad_bb (); 2685 2686 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 2687 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr) 2688 { 2689 /* Adjust the LABEL_REF in a hint when we have inserted a nop 2690 between its branch label and the branch . We don't move the 2691 label because GCC expects it at the beginning of the block. */ 2692 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 2693 rtx label_ref = XVECEXP (unspec, 0, 0); 2694 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0)); 2695 rtx_insn *branch; 2696 int offset = 0; 2697 for (branch = NEXT_INSN (label); 2698 !JUMP_P (branch) && !CALL_P (branch); 2699 branch = NEXT_INSN (branch)) 2700 if (NONJUMP_INSN_P (branch)) 2701 offset += get_attr_length (branch); 2702 if (offset > 0) 2703 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset); 2704 } 2705 2706 spu_var_tracking (); 2707 2708 loop_optimizer_finalize (); 2709 2710 free_bb_for_insn (); 2711 2712 in_spu_reorg = 0; 2713} 2714 2715 2716/* Insn scheduling routines, primarily for dual issue. */ 2717static int 2718spu_sched_issue_rate (void) 2719{ 2720 return 2; 2721} 2722 2723static int 2724uses_ls_unit(rtx_insn *insn) 2725{ 2726 rtx set = single_set (insn); 2727 if (set != 0 2728 && (GET_CODE (SET_DEST (set)) == MEM 2729 || GET_CODE (SET_SRC (set)) == MEM)) 2730 return 1; 2731 return 0; 2732} 2733 2734static int 2735get_pipe (rtx_insn *insn) 2736{ 2737 enum attr_type t; 2738 /* Handle inline asm */ 2739 if (INSN_CODE (insn) == -1) 2740 return -1; 2741 t = get_attr_type (insn); 2742 switch (t) 2743 { 2744 case TYPE_CONVERT: 2745 return -2; 2746 case TYPE_MULTI0: 2747 return -1; 2748 2749 case TYPE_FX2: 2750 case TYPE_FX3: 2751 case TYPE_SPR: 2752 case TYPE_NOP: 2753 case TYPE_FXB: 2754 case TYPE_FPD: 2755 case TYPE_FP6: 2756 case TYPE_FP7: 2757 return 0; 2758 2759 case TYPE_LNOP: 2760 case TYPE_SHUF: 2761 case TYPE_LOAD: 2762 case TYPE_STORE: 2763 case TYPE_BR: 2764 case TYPE_MULTI1: 2765 case TYPE_HBR: 2766 case TYPE_IPREFETCH: 2767 return 1; 2768 default: 2769 abort (); 2770 } 2771} 2772 2773 2774/* haifa-sched.c has a static variable that keeps track of the current 2775 cycle. It is passed to spu_sched_reorder, and we record it here for 2776 use by spu_sched_variable_issue. It won't be accurate if the 2777 scheduler updates it's clock_var between the two calls. */ 2778static int clock_var; 2779 2780/* This is used to keep track of insn alignment. Set to 0 at the 2781 beginning of each block and increased by the "length" attr of each 2782 insn scheduled. */ 2783static int spu_sched_length; 2784 2785/* Record when we've issued pipe0 and pipe1 insns so we can reorder the 2786 ready list appropriately in spu_sched_reorder(). */ 2787static int pipe0_clock; 2788static int pipe1_clock; 2789 2790static int prev_clock_var; 2791 2792static int prev_priority; 2793 2794/* The SPU needs to load the next ilb sometime during the execution of 2795 the previous ilb. There is a potential conflict if every cycle has a 2796 load or store. To avoid the conflict we make sure the load/store 2797 unit is free for at least one cycle during the execution of insns in 2798 the previous ilb. */ 2799static int spu_ls_first; 2800static int prev_ls_clock; 2801 2802static void 2803spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, 2804 int max_ready ATTRIBUTE_UNUSED) 2805{ 2806 spu_sched_length = 0; 2807} 2808 2809static void 2810spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, 2811 int max_ready ATTRIBUTE_UNUSED) 2812{ 2813 if (align_labels > 4 || align_loops > 4 || align_jumps > 4) 2814 { 2815 /* When any block might be at least 8-byte aligned, assume they 2816 will all be at least 8-byte aligned to make sure dual issue 2817 works out correctly. */ 2818 spu_sched_length = 0; 2819 } 2820 spu_ls_first = INT_MAX; 2821 clock_var = -1; 2822 prev_ls_clock = -1; 2823 pipe0_clock = -1; 2824 pipe1_clock = -1; 2825 prev_clock_var = -1; 2826 prev_priority = -1; 2827} 2828 2829static int 2830spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED, 2831 int verbose ATTRIBUTE_UNUSED, 2832 rtx_insn *insn, int more) 2833{ 2834 int len; 2835 int p; 2836 if (GET_CODE (PATTERN (insn)) == USE 2837 || GET_CODE (PATTERN (insn)) == CLOBBER 2838 || (len = get_attr_length (insn)) == 0) 2839 return more; 2840 2841 spu_sched_length += len; 2842 2843 /* Reset on inline asm */ 2844 if (INSN_CODE (insn) == -1) 2845 { 2846 spu_ls_first = INT_MAX; 2847 pipe0_clock = -1; 2848 pipe1_clock = -1; 2849 return 0; 2850 } 2851 p = get_pipe (insn); 2852 if (p == 0) 2853 pipe0_clock = clock_var; 2854 else 2855 pipe1_clock = clock_var; 2856 2857 if (in_spu_reorg) 2858 { 2859 if (clock_var - prev_ls_clock > 1 2860 || INSN_CODE (insn) == CODE_FOR_iprefetch) 2861 spu_ls_first = INT_MAX; 2862 if (uses_ls_unit (insn)) 2863 { 2864 if (spu_ls_first == INT_MAX) 2865 spu_ls_first = spu_sched_length; 2866 prev_ls_clock = clock_var; 2867 } 2868 2869 /* The scheduler hasn't inserted the nop, but we will later on. 2870 Include those nops in spu_sched_length. */ 2871 if (prev_clock_var == clock_var && (spu_sched_length & 7)) 2872 spu_sched_length += 4; 2873 prev_clock_var = clock_var; 2874 2875 /* more is -1 when called from spu_sched_reorder for new insns 2876 that don't have INSN_PRIORITY */ 2877 if (more >= 0) 2878 prev_priority = INSN_PRIORITY (insn); 2879 } 2880 2881 /* Always try issuing more insns. spu_sched_reorder will decide 2882 when the cycle should be advanced. */ 2883 return 1; 2884} 2885 2886/* This function is called for both TARGET_SCHED_REORDER and 2887 TARGET_SCHED_REORDER2. */ 2888static int 2889spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, 2890 rtx_insn **ready, int *nreadyp, int clock) 2891{ 2892 int i, nready = *nreadyp; 2893 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i; 2894 rtx_insn *insn; 2895 2896 clock_var = clock; 2897 2898 if (nready <= 0 || pipe1_clock >= clock) 2899 return 0; 2900 2901 /* Find any rtl insns that don't generate assembly insns and schedule 2902 them first. */ 2903 for (i = nready - 1; i >= 0; i--) 2904 { 2905 insn = ready[i]; 2906 if (INSN_CODE (insn) == -1 2907 || INSN_CODE (insn) == CODE_FOR_blockage 2908 || (INSN_P (insn) && get_attr_length (insn) == 0)) 2909 { 2910 ready[i] = ready[nready - 1]; 2911 ready[nready - 1] = insn; 2912 return 1; 2913 } 2914 } 2915 2916 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1; 2917 for (i = 0; i < nready; i++) 2918 if (INSN_CODE (ready[i]) != -1) 2919 { 2920 insn = ready[i]; 2921 switch (get_attr_type (insn)) 2922 { 2923 default: 2924 case TYPE_MULTI0: 2925 case TYPE_CONVERT: 2926 case TYPE_FX2: 2927 case TYPE_FX3: 2928 case TYPE_SPR: 2929 case TYPE_NOP: 2930 case TYPE_FXB: 2931 case TYPE_FPD: 2932 case TYPE_FP6: 2933 case TYPE_FP7: 2934 pipe_0 = i; 2935 break; 2936 case TYPE_LOAD: 2937 case TYPE_STORE: 2938 pipe_ls = i; 2939 case TYPE_LNOP: 2940 case TYPE_SHUF: 2941 case TYPE_BR: 2942 case TYPE_MULTI1: 2943 case TYPE_HBR: 2944 pipe_1 = i; 2945 break; 2946 case TYPE_IPREFETCH: 2947 pipe_hbrp = i; 2948 break; 2949 } 2950 } 2951 2952 /* In the first scheduling phase, schedule loads and stores together 2953 to increase the chance they will get merged during postreload CSE. */ 2954 if (!reload_completed && pipe_ls >= 0) 2955 { 2956 insn = ready[pipe_ls]; 2957 ready[pipe_ls] = ready[nready - 1]; 2958 ready[nready - 1] = insn; 2959 return 1; 2960 } 2961 2962 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */ 2963 if (pipe_hbrp >= 0) 2964 pipe_1 = pipe_hbrp; 2965 2966 /* When we have loads/stores in every cycle of the last 15 insns and 2967 we are about to schedule another load/store, emit an hbrp insn 2968 instead. */ 2969 if (in_spu_reorg 2970 && spu_sched_length - spu_ls_first >= 4 * 15 2971 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls) 2972 { 2973 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3))); 2974 recog_memoized (insn); 2975 if (pipe0_clock < clock) 2976 PUT_MODE (insn, TImode); 2977 spu_sched_variable_issue (file, verbose, insn, -1); 2978 return 0; 2979 } 2980 2981 /* In general, we want to emit nops to increase dual issue, but dual 2982 issue isn't faster when one of the insns could be scheduled later 2983 without effecting the critical path. We look at INSN_PRIORITY to 2984 make a good guess, but it isn't perfect so -mdual-nops=n can be 2985 used to effect it. */ 2986 if (in_spu_reorg && spu_dual_nops < 10) 2987 { 2988 /* When we are at an even address and we are not issuing nops to 2989 improve scheduling then we need to advance the cycle. */ 2990 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock 2991 && (spu_dual_nops == 0 2992 || (pipe_1 != -1 2993 && prev_priority > 2994 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops))) 2995 return 0; 2996 2997 /* When at an odd address, schedule the highest priority insn 2998 without considering pipeline. */ 2999 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock 3000 && (spu_dual_nops == 0 3001 || (prev_priority > 3002 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops))) 3003 return 1; 3004 } 3005 3006 3007 /* We haven't issued a pipe0 insn yet this cycle, if there is a 3008 pipe0 insn in the ready list, schedule it. */ 3009 if (pipe0_clock < clock && pipe_0 >= 0) 3010 schedule_i = pipe_0; 3011 3012 /* Either we've scheduled a pipe0 insn already or there is no pipe0 3013 insn to schedule. Put a pipe1 insn at the front of the ready list. */ 3014 else 3015 schedule_i = pipe_1; 3016 3017 if (schedule_i > -1) 3018 { 3019 insn = ready[schedule_i]; 3020 ready[schedule_i] = ready[nready - 1]; 3021 ready[nready - 1] = insn; 3022 return 1; 3023 } 3024 return 0; 3025} 3026 3027/* INSN is dependent on DEP_INSN. */ 3028static int 3029spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost) 3030{ 3031 rtx set; 3032 3033 /* The blockage pattern is used to prevent instructions from being 3034 moved across it and has no cost. */ 3035 if (INSN_CODE (insn) == CODE_FOR_blockage 3036 || INSN_CODE (dep_insn) == CODE_FOR_blockage) 3037 return 0; 3038 3039 if ((INSN_P (insn) && get_attr_length (insn) == 0) 3040 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0)) 3041 return 0; 3042 3043 /* Make sure hbrps are spread out. */ 3044 if (INSN_CODE (insn) == CODE_FOR_iprefetch 3045 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch) 3046 return 8; 3047 3048 /* Make sure hints and hbrps are 2 cycles apart. */ 3049 if ((INSN_CODE (insn) == CODE_FOR_iprefetch 3050 || INSN_CODE (insn) == CODE_FOR_hbr) 3051 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch 3052 || INSN_CODE (dep_insn) == CODE_FOR_hbr)) 3053 return 2; 3054 3055 /* An hbrp has no real dependency on other insns. */ 3056 if (INSN_CODE (insn) == CODE_FOR_iprefetch 3057 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch) 3058 return 0; 3059 3060 /* Assuming that it is unlikely an argument register will be used in 3061 the first cycle of the called function, we reduce the cost for 3062 slightly better scheduling of dep_insn. When not hinted, the 3063 mispredicted branch would hide the cost as well. */ 3064 if (CALL_P (insn)) 3065 { 3066 rtx target = get_branch_target (insn); 3067 if (GET_CODE (target) != REG || !set_of (target, insn)) 3068 return cost - 2; 3069 return cost; 3070 } 3071 3072 /* And when returning from a function, let's assume the return values 3073 are completed sooner too. */ 3074 if (CALL_P (dep_insn)) 3075 return cost - 2; 3076 3077 /* Make sure an instruction that loads from the back chain is schedule 3078 away from the return instruction so a hint is more likely to get 3079 issued. */ 3080 if (INSN_CODE (insn) == CODE_FOR__return 3081 && (set = single_set (dep_insn)) 3082 && GET_CODE (SET_DEST (set)) == REG 3083 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM) 3084 return 20; 3085 3086 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the 3087 scheduler makes every insn in a block anti-dependent on the final 3088 jump_insn. We adjust here so higher cost insns will get scheduled 3089 earlier. */ 3090 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI) 3091 return insn_cost (dep_insn) - 3; 3092 3093 return cost; 3094} 3095 3096/* Create a CONST_DOUBLE from a string. */ 3097rtx 3098spu_float_const (const char *string, machine_mode mode) 3099{ 3100 REAL_VALUE_TYPE value; 3101 value = REAL_VALUE_ATOF (string, mode); 3102 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode); 3103} 3104 3105int 3106spu_constant_address_p (rtx x) 3107{ 3108 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF 3109 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST 3110 || GET_CODE (x) == HIGH); 3111} 3112 3113static enum spu_immediate 3114which_immediate_load (HOST_WIDE_INT val) 3115{ 3116 gcc_assert (val == trunc_int_for_mode (val, SImode)); 3117 3118 if (val >= -0x8000 && val <= 0x7fff) 3119 return SPU_IL; 3120 if (val >= 0 && val <= 0x3ffff) 3121 return SPU_ILA; 3122 if ((val & 0xffff) == ((val >> 16) & 0xffff)) 3123 return SPU_ILH; 3124 if ((val & 0xffff) == 0) 3125 return SPU_ILHU; 3126 3127 return SPU_NONE; 3128} 3129 3130/* Return true when OP can be loaded by one of the il instructions, or 3131 when flow2 is not completed and OP can be loaded using ilhu and iohl. */ 3132int 3133immediate_load_p (rtx op, machine_mode mode) 3134{ 3135 if (CONSTANT_P (op)) 3136 { 3137 enum immediate_class c = classify_immediate (op, mode); 3138 return c == IC_IL1 || c == IC_IL1s 3139 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s)); 3140 } 3141 return 0; 3142} 3143 3144/* Return true if the first SIZE bytes of arr is a constant that can be 3145 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART 3146 represent the size and offset of the instruction to use. */ 3147static int 3148cpat_info(unsigned char *arr, int size, int *prun, int *pstart) 3149{ 3150 int cpat, run, i, start; 3151 cpat = 1; 3152 run = 0; 3153 start = -1; 3154 for (i = 0; i < size && cpat; i++) 3155 if (arr[i] != i+16) 3156 { 3157 if (!run) 3158 { 3159 start = i; 3160 if (arr[i] == 3) 3161 run = 1; 3162 else if (arr[i] == 2 && arr[i+1] == 3) 3163 run = 2; 3164 else if (arr[i] == 0) 3165 { 3166 while (arr[i+run] == run && i+run < 16) 3167 run++; 3168 if (run != 4 && run != 8) 3169 cpat = 0; 3170 } 3171 else 3172 cpat = 0; 3173 if ((i & (run-1)) != 0) 3174 cpat = 0; 3175 i += run; 3176 } 3177 else 3178 cpat = 0; 3179 } 3180 if (cpat && (run || size < 16)) 3181 { 3182 if (run == 0) 3183 run = 1; 3184 if (prun) 3185 *prun = run; 3186 if (pstart) 3187 *pstart = start == -1 ? 16-run : start; 3188 return 1; 3189 } 3190 return 0; 3191} 3192 3193/* OP is a CONSTANT_P. Determine what instructions can be used to load 3194 it into a register. MODE is only valid when OP is a CONST_INT. */ 3195static enum immediate_class 3196classify_immediate (rtx op, machine_mode mode) 3197{ 3198 HOST_WIDE_INT val; 3199 unsigned char arr[16]; 3200 int i, j, repeated, fsmbi, repeat; 3201 3202 gcc_assert (CONSTANT_P (op)); 3203 3204 if (GET_MODE (op) != VOIDmode) 3205 mode = GET_MODE (op); 3206 3207 /* A V4SI const_vector with all identical symbols is ok. */ 3208 if (!flag_pic 3209 && mode == V4SImode 3210 && GET_CODE (op) == CONST_VECTOR 3211 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT 3212 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE 3213 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1) 3214 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2) 3215 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3)) 3216 op = CONST_VECTOR_ELT (op, 0); 3217 3218 switch (GET_CODE (op)) 3219 { 3220 case SYMBOL_REF: 3221 case LABEL_REF: 3222 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s; 3223 3224 case CONST: 3225 /* We can never know if the resulting address fits in 18 bits and can be 3226 loaded with ila. For now, assume the address will not overflow if 3227 the displacement is "small" (fits 'K' constraint). */ 3228 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS) 3229 { 3230 rtx sym = XEXP (XEXP (op, 0), 0); 3231 rtx cst = XEXP (XEXP (op, 0), 1); 3232 3233 if (GET_CODE (sym) == SYMBOL_REF 3234 && GET_CODE (cst) == CONST_INT 3235 && satisfies_constraint_K (cst)) 3236 return IC_IL1s; 3237 } 3238 return IC_IL2s; 3239 3240 case HIGH: 3241 return IC_IL1s; 3242 3243 case CONST_VECTOR: 3244 for (i = 0; i < GET_MODE_NUNITS (mode); i++) 3245 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT 3246 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE) 3247 return IC_POOL; 3248 /* Fall through. */ 3249 3250 case CONST_INT: 3251 case CONST_DOUBLE: 3252 constant_to_array (mode, op, arr); 3253 3254 /* Check that each 4-byte slot is identical. */ 3255 repeated = 1; 3256 for (i = 4; i < 16; i += 4) 3257 for (j = 0; j < 4; j++) 3258 if (arr[j] != arr[i + j]) 3259 repeated = 0; 3260 3261 if (repeated) 3262 { 3263 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 3264 val = trunc_int_for_mode (val, SImode); 3265 3266 if (which_immediate_load (val) != SPU_NONE) 3267 return IC_IL1; 3268 } 3269 3270 /* Any mode of 2 bytes or smaller can be loaded with an il 3271 instruction. */ 3272 gcc_assert (GET_MODE_SIZE (mode) > 2); 3273 3274 fsmbi = 1; 3275 repeat = 0; 3276 for (i = 0; i < 16 && fsmbi; i++) 3277 if (arr[i] != 0 && repeat == 0) 3278 repeat = arr[i]; 3279 else if (arr[i] != 0 && arr[i] != repeat) 3280 fsmbi = 0; 3281 if (fsmbi) 3282 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2; 3283 3284 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0)) 3285 return IC_CPAT; 3286 3287 if (repeated) 3288 return IC_IL2; 3289 3290 return IC_POOL; 3291 default: 3292 break; 3293 } 3294 gcc_unreachable (); 3295} 3296 3297static enum spu_immediate 3298which_logical_immediate (HOST_WIDE_INT val) 3299{ 3300 gcc_assert (val == trunc_int_for_mode (val, SImode)); 3301 3302 if (val >= -0x200 && val <= 0x1ff) 3303 return SPU_ORI; 3304 if (val >= 0 && val <= 0xffff) 3305 return SPU_IOHL; 3306 if ((val & 0xffff) == ((val >> 16) & 0xffff)) 3307 { 3308 val = trunc_int_for_mode (val, HImode); 3309 if (val >= -0x200 && val <= 0x1ff) 3310 return SPU_ORHI; 3311 if ((val & 0xff) == ((val >> 8) & 0xff)) 3312 { 3313 val = trunc_int_for_mode (val, QImode); 3314 if (val >= -0x200 && val <= 0x1ff) 3315 return SPU_ORBI; 3316 } 3317 } 3318 return SPU_NONE; 3319} 3320 3321/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or 3322 CONST_DOUBLEs. */ 3323static int 3324const_vector_immediate_p (rtx x) 3325{ 3326 int i; 3327 gcc_assert (GET_CODE (x) == CONST_VECTOR); 3328 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++) 3329 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT 3330 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE) 3331 return 0; 3332 return 1; 3333} 3334 3335int 3336logical_immediate_p (rtx op, machine_mode mode) 3337{ 3338 HOST_WIDE_INT val; 3339 unsigned char arr[16]; 3340 int i, j; 3341 3342 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE 3343 || GET_CODE (op) == CONST_VECTOR); 3344 3345 if (GET_CODE (op) == CONST_VECTOR 3346 && !const_vector_immediate_p (op)) 3347 return 0; 3348 3349 if (GET_MODE (op) != VOIDmode) 3350 mode = GET_MODE (op); 3351 3352 constant_to_array (mode, op, arr); 3353 3354 /* Check that bytes are repeated. */ 3355 for (i = 4; i < 16; i += 4) 3356 for (j = 0; j < 4; j++) 3357 if (arr[j] != arr[i + j]) 3358 return 0; 3359 3360 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 3361 val = trunc_int_for_mode (val, SImode); 3362 3363 i = which_logical_immediate (val); 3364 return i != SPU_NONE && i != SPU_IOHL; 3365} 3366 3367int 3368iohl_immediate_p (rtx op, machine_mode mode) 3369{ 3370 HOST_WIDE_INT val; 3371 unsigned char arr[16]; 3372 int i, j; 3373 3374 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE 3375 || GET_CODE (op) == CONST_VECTOR); 3376 3377 if (GET_CODE (op) == CONST_VECTOR 3378 && !const_vector_immediate_p (op)) 3379 return 0; 3380 3381 if (GET_MODE (op) != VOIDmode) 3382 mode = GET_MODE (op); 3383 3384 constant_to_array (mode, op, arr); 3385 3386 /* Check that bytes are repeated. */ 3387 for (i = 4; i < 16; i += 4) 3388 for (j = 0; j < 4; j++) 3389 if (arr[j] != arr[i + j]) 3390 return 0; 3391 3392 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 3393 val = trunc_int_for_mode (val, SImode); 3394 3395 return val >= 0 && val <= 0xffff; 3396} 3397 3398int 3399arith_immediate_p (rtx op, machine_mode mode, 3400 HOST_WIDE_INT low, HOST_WIDE_INT high) 3401{ 3402 HOST_WIDE_INT val; 3403 unsigned char arr[16]; 3404 int bytes, i, j; 3405 3406 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE 3407 || GET_CODE (op) == CONST_VECTOR); 3408 3409 if (GET_CODE (op) == CONST_VECTOR 3410 && !const_vector_immediate_p (op)) 3411 return 0; 3412 3413 if (GET_MODE (op) != VOIDmode) 3414 mode = GET_MODE (op); 3415 3416 constant_to_array (mode, op, arr); 3417 3418 if (VECTOR_MODE_P (mode)) 3419 mode = GET_MODE_INNER (mode); 3420 3421 bytes = GET_MODE_SIZE (mode); 3422 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0); 3423 3424 /* Check that bytes are repeated. */ 3425 for (i = bytes; i < 16; i += bytes) 3426 for (j = 0; j < bytes; j++) 3427 if (arr[j] != arr[i + j]) 3428 return 0; 3429 3430 val = arr[0]; 3431 for (j = 1; j < bytes; j++) 3432 val = (val << 8) | arr[j]; 3433 3434 val = trunc_int_for_mode (val, mode); 3435 3436 return val >= low && val <= high; 3437} 3438 3439/* TRUE when op is an immediate and an exact power of 2, and given that 3440 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector, 3441 all entries must be the same. */ 3442bool 3443exp2_immediate_p (rtx op, machine_mode mode, int low, int high) 3444{ 3445 machine_mode int_mode; 3446 HOST_WIDE_INT val; 3447 unsigned char arr[16]; 3448 int bytes, i, j; 3449 3450 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE 3451 || GET_CODE (op) == CONST_VECTOR); 3452 3453 if (GET_CODE (op) == CONST_VECTOR 3454 && !const_vector_immediate_p (op)) 3455 return 0; 3456 3457 if (GET_MODE (op) != VOIDmode) 3458 mode = GET_MODE (op); 3459 3460 constant_to_array (mode, op, arr); 3461 3462 if (VECTOR_MODE_P (mode)) 3463 mode = GET_MODE_INNER (mode); 3464 3465 bytes = GET_MODE_SIZE (mode); 3466 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0); 3467 3468 /* Check that bytes are repeated. */ 3469 for (i = bytes; i < 16; i += bytes) 3470 for (j = 0; j < bytes; j++) 3471 if (arr[j] != arr[i + j]) 3472 return 0; 3473 3474 val = arr[0]; 3475 for (j = 1; j < bytes; j++) 3476 val = (val << 8) | arr[j]; 3477 3478 val = trunc_int_for_mode (val, int_mode); 3479 3480 /* Currently, we only handle SFmode */ 3481 gcc_assert (mode == SFmode); 3482 if (mode == SFmode) 3483 { 3484 int exp = (val >> 23) - 127; 3485 return val > 0 && (val & 0x007fffff) == 0 3486 && exp >= low && exp <= high; 3487 } 3488 return FALSE; 3489} 3490 3491/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */ 3492 3493static bool 3494ea_symbol_ref_p (const_rtx x) 3495{ 3496 tree decl; 3497 3498 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) 3499 { 3500 rtx plus = XEXP (x, 0); 3501 rtx op0 = XEXP (plus, 0); 3502 rtx op1 = XEXP (plus, 1); 3503 if (GET_CODE (op1) == CONST_INT) 3504 x = op0; 3505 } 3506 3507 return (GET_CODE (x) == SYMBOL_REF 3508 && (decl = SYMBOL_REF_DECL (x)) != 0 3509 && TREE_CODE (decl) == VAR_DECL 3510 && TYPE_ADDR_SPACE (TREE_TYPE (decl))); 3511} 3512 3513/* We accept: 3514 - any 32-bit constant (SImode, SFmode) 3515 - any constant that can be generated with fsmbi (any mode) 3516 - a 64-bit constant where the high and low bits are identical 3517 (DImode, DFmode) 3518 - a 128-bit constant where the four 32-bit words match. */ 3519bool 3520spu_legitimate_constant_p (machine_mode mode, rtx x) 3521{ 3522 subrtx_iterator::array_type array; 3523 if (GET_CODE (x) == HIGH) 3524 x = XEXP (x, 0); 3525 3526 /* Reject any __ea qualified reference. These can't appear in 3527 instructions but must be forced to the constant pool. */ 3528 FOR_EACH_SUBRTX (iter, array, x, ALL) 3529 if (ea_symbol_ref_p (*iter)) 3530 return 0; 3531 3532 /* V4SI with all identical symbols is valid. */ 3533 if (!flag_pic 3534 && mode == V4SImode 3535 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF 3536 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF 3537 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST)) 3538 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1) 3539 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2) 3540 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3); 3541 3542 if (GET_CODE (x) == CONST_VECTOR 3543 && !const_vector_immediate_p (x)) 3544 return 0; 3545 return 1; 3546} 3547 3548/* Valid address are: 3549 - symbol_ref, label_ref, const 3550 - reg 3551 - reg + const_int, where const_int is 16 byte aligned 3552 - reg + reg, alignment doesn't matter 3553 The alignment matters in the reg+const case because lqd and stqd 3554 ignore the 4 least significant bits of the const. We only care about 3555 16 byte modes because the expand phase will change all smaller MEM 3556 references to TImode. */ 3557static bool 3558spu_legitimate_address_p (machine_mode mode, 3559 rtx x, bool reg_ok_strict) 3560{ 3561 int aligned = GET_MODE_SIZE (mode) >= 16; 3562 if (aligned 3563 && GET_CODE (x) == AND 3564 && GET_CODE (XEXP (x, 1)) == CONST_INT 3565 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16) 3566 x = XEXP (x, 0); 3567 switch (GET_CODE (x)) 3568 { 3569 case LABEL_REF: 3570 return !TARGET_LARGE_MEM; 3571 3572 case SYMBOL_REF: 3573 case CONST: 3574 /* Keep __ea references until reload so that spu_expand_mov can see them 3575 in MEMs. */ 3576 if (ea_symbol_ref_p (x)) 3577 return !reload_in_progress && !reload_completed; 3578 return !TARGET_LARGE_MEM; 3579 3580 case CONST_INT: 3581 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff; 3582 3583 case SUBREG: 3584 x = XEXP (x, 0); 3585 if (REG_P (x)) 3586 return 0; 3587 3588 case REG: 3589 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict); 3590 3591 case PLUS: 3592 case LO_SUM: 3593 { 3594 rtx op0 = XEXP (x, 0); 3595 rtx op1 = XEXP (x, 1); 3596 if (GET_CODE (op0) == SUBREG) 3597 op0 = XEXP (op0, 0); 3598 if (GET_CODE (op1) == SUBREG) 3599 op1 = XEXP (op1, 0); 3600 if (GET_CODE (op0) == REG 3601 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) 3602 && GET_CODE (op1) == CONST_INT 3603 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff) 3604 /* If virtual registers are involved, the displacement will 3605 change later on anyway, so checking would be premature. 3606 Reload will make sure the final displacement after 3607 register elimination is OK. */ 3608 || op0 == arg_pointer_rtx 3609 || op0 == frame_pointer_rtx 3610 || op0 == virtual_stack_vars_rtx) 3611 && (!aligned || (INTVAL (op1) & 15) == 0)) 3612 return TRUE; 3613 if (GET_CODE (op0) == REG 3614 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) 3615 && GET_CODE (op1) == REG 3616 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict)) 3617 return TRUE; 3618 } 3619 break; 3620 3621 default: 3622 break; 3623 } 3624 return FALSE; 3625} 3626 3627/* Like spu_legitimate_address_p, except with named addresses. */ 3628static bool 3629spu_addr_space_legitimate_address_p (machine_mode mode, rtx x, 3630 bool reg_ok_strict, addr_space_t as) 3631{ 3632 if (as == ADDR_SPACE_EA) 3633 return (REG_P (x) && (GET_MODE (x) == EAmode)); 3634 3635 else if (as != ADDR_SPACE_GENERIC) 3636 gcc_unreachable (); 3637 3638 return spu_legitimate_address_p (mode, x, reg_ok_strict); 3639} 3640 3641/* When the address is reg + const_int, force the const_int into a 3642 register. */ 3643static rtx 3644spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 3645 machine_mode mode ATTRIBUTE_UNUSED) 3646{ 3647 rtx op0, op1; 3648 /* Make sure both operands are registers. */ 3649 if (GET_CODE (x) == PLUS) 3650 { 3651 op0 = XEXP (x, 0); 3652 op1 = XEXP (x, 1); 3653 if (ALIGNED_SYMBOL_REF_P (op0)) 3654 { 3655 op0 = force_reg (Pmode, op0); 3656 mark_reg_pointer (op0, 128); 3657 } 3658 else if (GET_CODE (op0) != REG) 3659 op0 = force_reg (Pmode, op0); 3660 if (ALIGNED_SYMBOL_REF_P (op1)) 3661 { 3662 op1 = force_reg (Pmode, op1); 3663 mark_reg_pointer (op1, 128); 3664 } 3665 else if (GET_CODE (op1) != REG) 3666 op1 = force_reg (Pmode, op1); 3667 x = gen_rtx_PLUS (Pmode, op0, op1); 3668 } 3669 return x; 3670} 3671 3672/* Like spu_legitimate_address, except with named address support. */ 3673static rtx 3674spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode, 3675 addr_space_t as) 3676{ 3677 if (as != ADDR_SPACE_GENERIC) 3678 return x; 3679 3680 return spu_legitimize_address (x, oldx, mode); 3681} 3682 3683/* Reload reg + const_int for out-of-range displacements. */ 3684rtx 3685spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED, 3686 int opnum, int type) 3687{ 3688 bool removed_and = false; 3689 3690 if (GET_CODE (ad) == AND 3691 && CONST_INT_P (XEXP (ad, 1)) 3692 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16) 3693 { 3694 ad = XEXP (ad, 0); 3695 removed_and = true; 3696 } 3697 3698 if (GET_CODE (ad) == PLUS 3699 && REG_P (XEXP (ad, 0)) 3700 && CONST_INT_P (XEXP (ad, 1)) 3701 && !(INTVAL (XEXP (ad, 1)) >= -0x2000 3702 && INTVAL (XEXP (ad, 1)) <= 0x1fff)) 3703 { 3704 /* Unshare the sum. */ 3705 ad = copy_rtx (ad); 3706 3707 /* Reload the displacement. */ 3708 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL, 3709 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0, 3710 opnum, (enum reload_type) type); 3711 3712 /* Add back AND for alignment if we stripped it. */ 3713 if (removed_and) 3714 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16)); 3715 3716 return ad; 3717 } 3718 3719 return NULL_RTX; 3720} 3721 3722/* Handle an attribute requiring a FUNCTION_DECL; arguments as in 3723 struct attribute_spec.handler. */ 3724static tree 3725spu_handle_fndecl_attribute (tree * node, 3726 tree name, 3727 tree args ATTRIBUTE_UNUSED, 3728 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs) 3729{ 3730 if (TREE_CODE (*node) != FUNCTION_DECL) 3731 { 3732 warning (0, "%qE attribute only applies to functions", 3733 name); 3734 *no_add_attrs = true; 3735 } 3736 3737 return NULL_TREE; 3738} 3739 3740/* Handle the "vector" attribute. */ 3741static tree 3742spu_handle_vector_attribute (tree * node, tree name, 3743 tree args ATTRIBUTE_UNUSED, 3744 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs) 3745{ 3746 tree type = *node, result = NULL_TREE; 3747 machine_mode mode; 3748 int unsigned_p; 3749 3750 while (POINTER_TYPE_P (type) 3751 || TREE_CODE (type) == FUNCTION_TYPE 3752 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE) 3753 type = TREE_TYPE (type); 3754 3755 mode = TYPE_MODE (type); 3756 3757 unsigned_p = TYPE_UNSIGNED (type); 3758 switch (mode) 3759 { 3760 case DImode: 3761 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); 3762 break; 3763 case SImode: 3764 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); 3765 break; 3766 case HImode: 3767 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node); 3768 break; 3769 case QImode: 3770 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); 3771 break; 3772 case SFmode: 3773 result = V4SF_type_node; 3774 break; 3775 case DFmode: 3776 result = V2DF_type_node; 3777 break; 3778 default: 3779 break; 3780 } 3781 3782 /* Propagate qualifiers attached to the element type 3783 onto the vector type. */ 3784 if (result && result != type && TYPE_QUALS (type)) 3785 result = build_qualified_type (result, TYPE_QUALS (type)); 3786 3787 *no_add_attrs = true; /* No need to hang on to the attribute. */ 3788 3789 if (!result) 3790 warning (0, "%qE attribute ignored", name); 3791 else 3792 *node = lang_hooks.types.reconstruct_complex_type (*node, result); 3793 3794 return NULL_TREE; 3795} 3796 3797/* Return nonzero if FUNC is a naked function. */ 3798static int 3799spu_naked_function_p (tree func) 3800{ 3801 tree a; 3802 3803 if (TREE_CODE (func) != FUNCTION_DECL) 3804 abort (); 3805 3806 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func)); 3807 return a != NULL_TREE; 3808} 3809 3810int 3811spu_initial_elimination_offset (int from, int to) 3812{ 3813 int saved_regs_size = spu_saved_regs_size (); 3814 int sp_offset = 0; 3815 if (!crtl->is_leaf || crtl->outgoing_args_size 3816 || get_frame_size () || saved_regs_size) 3817 sp_offset = STACK_POINTER_OFFSET; 3818 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 3819 return get_frame_size () + crtl->outgoing_args_size + sp_offset; 3820 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 3821 return get_frame_size (); 3822 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 3823 return sp_offset + crtl->outgoing_args_size 3824 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET; 3825 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 3826 return get_frame_size () + saved_regs_size + sp_offset; 3827 else 3828 gcc_unreachable (); 3829} 3830 3831rtx 3832spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED) 3833{ 3834 machine_mode mode = TYPE_MODE (type); 3835 int byte_size = ((mode == BLKmode) 3836 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3837 3838 /* Make sure small structs are left justified in a register. */ 3839 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) 3840 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0) 3841 { 3842 machine_mode smode; 3843 rtvec v; 3844 int i; 3845 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3846 int n = byte_size / UNITS_PER_WORD; 3847 v = rtvec_alloc (nregs); 3848 for (i = 0; i < n; i++) 3849 { 3850 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode, 3851 gen_rtx_REG (TImode, 3852 FIRST_RETURN_REGNUM 3853 + i), 3854 GEN_INT (UNITS_PER_WORD * i)); 3855 byte_size -= UNITS_PER_WORD; 3856 } 3857 3858 if (n < nregs) 3859 { 3860 if (byte_size < 4) 3861 byte_size = 4; 3862 smode = 3863 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT); 3864 RTVEC_ELT (v, n) = 3865 gen_rtx_EXPR_LIST (VOIDmode, 3866 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n), 3867 GEN_INT (UNITS_PER_WORD * n)); 3868 } 3869 return gen_rtx_PARALLEL (mode, v); 3870 } 3871 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM); 3872} 3873 3874static rtx 3875spu_function_arg (cumulative_args_t cum_v, 3876 machine_mode mode, 3877 const_tree type, bool named ATTRIBUTE_UNUSED) 3878{ 3879 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 3880 int byte_size; 3881 3882 if (*cum >= MAX_REGISTER_ARGS) 3883 return 0; 3884 3885 byte_size = ((mode == BLKmode) 3886 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3887 3888 /* The ABI does not allow parameters to be passed partially in 3889 reg and partially in stack. */ 3890 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS) 3891 return 0; 3892 3893 /* Make sure small structs are left justified in a register. */ 3894 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) 3895 && byte_size < UNITS_PER_WORD && byte_size > 0) 3896 { 3897 machine_mode smode; 3898 rtx gr_reg; 3899 if (byte_size < 4) 3900 byte_size = 4; 3901 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT); 3902 gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 3903 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum), 3904 const0_rtx); 3905 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg)); 3906 } 3907 else 3908 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum); 3909} 3910 3911static void 3912spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, 3913 const_tree type, bool named ATTRIBUTE_UNUSED) 3914{ 3915 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 3916 3917 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST 3918 ? 1 3919 : mode == BLKmode 3920 ? ((int_size_in_bytes (type) + 15) / 16) 3921 : mode == VOIDmode 3922 ? 1 3923 : HARD_REGNO_NREGS (cum, mode)); 3924} 3925 3926/* Variable sized types are passed by reference. */ 3927static bool 3928spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, 3929 machine_mode mode ATTRIBUTE_UNUSED, 3930 const_tree type, bool named ATTRIBUTE_UNUSED) 3931{ 3932 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; 3933} 3934 3935 3936/* Var args. */ 3937 3938/* Create and return the va_list datatype. 3939 3940 On SPU, va_list is an array type equivalent to 3941 3942 typedef struct __va_list_tag 3943 { 3944 void *__args __attribute__((__aligned(16))); 3945 void *__skip __attribute__((__aligned(16))); 3946 3947 } va_list[1]; 3948 3949 where __args points to the arg that will be returned by the next 3950 va_arg(), and __skip points to the previous stack frame such that 3951 when __args == __skip we should advance __args by 32 bytes. */ 3952static tree 3953spu_build_builtin_va_list (void) 3954{ 3955 tree f_args, f_skip, record, type_decl; 3956 bool owp; 3957 3958 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 3959 3960 type_decl = 3961 build_decl (BUILTINS_LOCATION, 3962 TYPE_DECL, get_identifier ("__va_list_tag"), record); 3963 3964 f_args = build_decl (BUILTINS_LOCATION, 3965 FIELD_DECL, get_identifier ("__args"), ptr_type_node); 3966 f_skip = build_decl (BUILTINS_LOCATION, 3967 FIELD_DECL, get_identifier ("__skip"), ptr_type_node); 3968 3969 DECL_FIELD_CONTEXT (f_args) = record; 3970 DECL_ALIGN (f_args) = 128; 3971 DECL_USER_ALIGN (f_args) = 1; 3972 3973 DECL_FIELD_CONTEXT (f_skip) = record; 3974 DECL_ALIGN (f_skip) = 128; 3975 DECL_USER_ALIGN (f_skip) = 1; 3976 3977 TYPE_STUB_DECL (record) = type_decl; 3978 TYPE_NAME (record) = type_decl; 3979 TYPE_FIELDS (record) = f_args; 3980 DECL_CHAIN (f_args) = f_skip; 3981 3982 /* We know this is being padded and we want it too. It is an internal 3983 type so hide the warnings from the user. */ 3984 owp = warn_padded; 3985 warn_padded = false; 3986 3987 layout_type (record); 3988 3989 warn_padded = owp; 3990 3991 /* The correct type is an array type of one element. */ 3992 return build_array_type (record, build_index_type (size_zero_node)); 3993} 3994 3995/* Implement va_start by filling the va_list structure VALIST. 3996 NEXTARG points to the first anonymous stack argument. 3997 3998 The following global variables are used to initialize 3999 the va_list structure: 4000 4001 crtl->args.info; 4002 the CUMULATIVE_ARGS for this function 4003 4004 crtl->args.arg_offset_rtx: 4005 holds the offset of the first anonymous stack argument 4006 (relative to the virtual arg pointer). */ 4007 4008static void 4009spu_va_start (tree valist, rtx nextarg) 4010{ 4011 tree f_args, f_skip; 4012 tree args, skip, t; 4013 4014 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4015 f_skip = DECL_CHAIN (f_args); 4016 4017 valist = build_simple_mem_ref (valist); 4018 args = 4019 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE); 4020 skip = 4021 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE); 4022 4023 /* Find the __args area. */ 4024 t = make_tree (TREE_TYPE (args), nextarg); 4025 if (crtl->args.pretend_args_size > 0) 4026 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET); 4027 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t); 4028 TREE_SIDE_EFFECTS (t) = 1; 4029 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4030 4031 /* Find the __skip area. */ 4032 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx); 4033 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size 4034 - STACK_POINTER_OFFSET)); 4035 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t); 4036 TREE_SIDE_EFFECTS (t) = 1; 4037 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4038} 4039 4040/* Gimplify va_arg by updating the va_list structure 4041 VALIST as required to retrieve an argument of type 4042 TYPE, and returning that argument. 4043 4044 ret = va_arg(VALIST, TYPE); 4045 4046 generates code equivalent to: 4047 4048 paddedsize = (sizeof(TYPE) + 15) & -16; 4049 if (VALIST.__args + paddedsize > VALIST.__skip 4050 && VALIST.__args <= VALIST.__skip) 4051 addr = VALIST.__skip + 32; 4052 else 4053 addr = VALIST.__args; 4054 VALIST.__args = addr + paddedsize; 4055 ret = *(TYPE *)addr; 4056 */ 4057static tree 4058spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p, 4059 gimple_seq * post_p ATTRIBUTE_UNUSED) 4060{ 4061 tree f_args, f_skip; 4062 tree args, skip; 4063 HOST_WIDE_INT size, rsize; 4064 tree addr, tmp; 4065 bool pass_by_reference_p; 4066 4067 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4068 f_skip = DECL_CHAIN (f_args); 4069 4070 valist = build_simple_mem_ref (valist); 4071 args = 4072 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE); 4073 skip = 4074 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE); 4075 4076 addr = create_tmp_var (ptr_type_node, "va_arg"); 4077 4078 /* if an object is dynamically sized, a pointer to it is passed 4079 instead of the object itself. */ 4080 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type, 4081 false); 4082 if (pass_by_reference_p) 4083 type = build_pointer_type (type); 4084 size = int_size_in_bytes (type); 4085 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD; 4086 4087 /* build conditional expression to calculate addr. The expression 4088 will be gimplified later. */ 4089 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize); 4090 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node, 4091 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)), 4092 build2 (LE_EXPR, boolean_type_node, unshare_expr (args), 4093 unshare_expr (skip))); 4094 4095 tmp = build3 (COND_EXPR, ptr_type_node, tmp, 4096 fold_build_pointer_plus_hwi (unshare_expr (skip), 32), 4097 unshare_expr (args)); 4098 4099 gimplify_assign (addr, tmp, pre_p); 4100 4101 /* update VALIST.__args */ 4102 tmp = fold_build_pointer_plus_hwi (addr, rsize); 4103 gimplify_assign (unshare_expr (args), tmp, pre_p); 4104 4105 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true), 4106 addr); 4107 4108 if (pass_by_reference_p) 4109 addr = build_va_arg_indirect_ref (addr); 4110 4111 return build_va_arg_indirect_ref (addr); 4112} 4113 4114/* Save parameter registers starting with the register that corresponds 4115 to the first unnamed parameters. If the first unnamed parameter is 4116 in the stack then save no registers. Set pretend_args_size to the 4117 amount of space needed to save the registers. */ 4118static void 4119spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode, 4120 tree type, int *pretend_size, int no_rtl) 4121{ 4122 if (!no_rtl) 4123 { 4124 rtx tmp; 4125 int regno; 4126 int offset; 4127 int ncum = *get_cumulative_args (cum); 4128 4129 /* cum currently points to the last named argument, we want to 4130 start at the next argument. */ 4131 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true); 4132 4133 offset = -STACK_POINTER_OFFSET; 4134 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++) 4135 { 4136 tmp = gen_frame_mem (V4SImode, 4137 plus_constant (Pmode, virtual_incoming_args_rtx, 4138 offset)); 4139 emit_move_insn (tmp, 4140 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno)); 4141 offset += 16; 4142 } 4143 *pretend_size = offset + STACK_POINTER_OFFSET; 4144 } 4145} 4146 4147static void 4148spu_conditional_register_usage (void) 4149{ 4150 if (flag_pic) 4151 { 4152 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 4153 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 4154 } 4155} 4156 4157/* This is called any time we inspect the alignment of a register for 4158 addresses. */ 4159static int 4160reg_aligned_for_addr (rtx x) 4161{ 4162 int regno = 4163 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x); 4164 return REGNO_POINTER_ALIGN (regno) >= 128; 4165} 4166 4167/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF 4168 into its SYMBOL_REF_FLAGS. */ 4169static void 4170spu_encode_section_info (tree decl, rtx rtl, int first) 4171{ 4172 default_encode_section_info (decl, rtl, first); 4173 4174 /* If a variable has a forced alignment to < 16 bytes, mark it with 4175 SYMBOL_FLAG_ALIGN1. */ 4176 if (TREE_CODE (decl) == VAR_DECL 4177 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128) 4178 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1; 4179} 4180 4181/* Return TRUE if we are certain the mem refers to a complete object 4182 which is both 16-byte aligned and padded to a 16-byte boundary. This 4183 would make it safe to store with a single instruction. 4184 We guarantee the alignment and padding for static objects by aligning 4185 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.) 4186 FIXME: We currently cannot guarantee this for objects on the stack 4187 because assign_parm_setup_stack calls assign_stack_local with the 4188 alignment of the parameter mode and in that case the alignment never 4189 gets adjusted by LOCAL_ALIGNMENT. */ 4190static int 4191store_with_one_insn_p (rtx mem) 4192{ 4193 machine_mode mode = GET_MODE (mem); 4194 rtx addr = XEXP (mem, 0); 4195 if (mode == BLKmode) 4196 return 0; 4197 if (GET_MODE_SIZE (mode) >= 16) 4198 return 1; 4199 /* Only static objects. */ 4200 if (GET_CODE (addr) == SYMBOL_REF) 4201 { 4202 /* We use the associated declaration to make sure the access is 4203 referring to the whole object. 4204 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure 4205 if it is necessary. Will there be cases where one exists, and 4206 the other does not? Will there be cases where both exist, but 4207 have different types? */ 4208 tree decl = MEM_EXPR (mem); 4209 if (decl 4210 && TREE_CODE (decl) == VAR_DECL 4211 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl))) 4212 return 1; 4213 decl = SYMBOL_REF_DECL (addr); 4214 if (decl 4215 && TREE_CODE (decl) == VAR_DECL 4216 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl))) 4217 return 1; 4218 } 4219 return 0; 4220} 4221 4222/* Return 1 when the address is not valid for a simple load and store as 4223 required by the '_mov*' patterns. We could make this less strict 4224 for loads, but we prefer mem's to look the same so they are more 4225 likely to be merged. */ 4226static int 4227address_needs_split (rtx mem) 4228{ 4229 if (GET_MODE_SIZE (GET_MODE (mem)) < 16 4230 && (GET_MODE_SIZE (GET_MODE (mem)) < 4 4231 || !(store_with_one_insn_p (mem) 4232 || mem_is_padded_component_ref (mem)))) 4233 return 1; 4234 4235 return 0; 4236} 4237 4238static GTY(()) rtx cache_fetch; /* __cache_fetch function */ 4239static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */ 4240static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */ 4241 4242/* MEM is known to be an __ea qualified memory access. Emit a call to 4243 fetch the ppu memory to local store, and return its address in local 4244 store. */ 4245 4246static void 4247ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr) 4248{ 4249 if (is_store) 4250 { 4251 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem))); 4252 if (!cache_fetch_dirty) 4253 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty"); 4254 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode, 4255 2, ea_addr, EAmode, ndirty, SImode); 4256 } 4257 else 4258 { 4259 if (!cache_fetch) 4260 cache_fetch = init_one_libfunc ("__cache_fetch"); 4261 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode, 4262 1, ea_addr, EAmode); 4263 } 4264} 4265 4266/* Like ea_load_store, but do the cache tag comparison and, for stores, 4267 dirty bit marking, inline. 4268 4269 The cache control data structure is an array of 4270 4271 struct __cache_tag_array 4272 { 4273 unsigned int tag_lo[4]; 4274 unsigned int tag_hi[4]; 4275 void *data_pointer[4]; 4276 int reserved[4]; 4277 vector unsigned short dirty_bits[4]; 4278 } */ 4279 4280static void 4281ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr) 4282{ 4283 rtx ea_addr_si; 4284 HOST_WIDE_INT v; 4285 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size"); 4286 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array"); 4287 rtx index_mask = gen_reg_rtx (SImode); 4288 rtx tag_arr = gen_reg_rtx (Pmode); 4289 rtx splat_mask = gen_reg_rtx (TImode); 4290 rtx splat = gen_reg_rtx (V4SImode); 4291 rtx splat_hi = NULL_RTX; 4292 rtx tag_index = gen_reg_rtx (Pmode); 4293 rtx block_off = gen_reg_rtx (SImode); 4294 rtx tag_addr = gen_reg_rtx (Pmode); 4295 rtx tag = gen_reg_rtx (V4SImode); 4296 rtx cache_tag = gen_reg_rtx (V4SImode); 4297 rtx cache_tag_hi = NULL_RTX; 4298 rtx cache_ptrs = gen_reg_rtx (TImode); 4299 rtx cache_ptrs_si = gen_reg_rtx (SImode); 4300 rtx tag_equal = gen_reg_rtx (V4SImode); 4301 rtx tag_equal_hi = NULL_RTX; 4302 rtx tag_eq_pack = gen_reg_rtx (V4SImode); 4303 rtx tag_eq_pack_si = gen_reg_rtx (SImode); 4304 rtx eq_index = gen_reg_rtx (SImode); 4305 rtx bcomp, hit_label, hit_ref, cont_label; 4306 rtx_insn *insn; 4307 4308 if (spu_ea_model != 32) 4309 { 4310 splat_hi = gen_reg_rtx (V4SImode); 4311 cache_tag_hi = gen_reg_rtx (V4SImode); 4312 tag_equal_hi = gen_reg_rtx (V4SImode); 4313 } 4314 4315 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128)); 4316 emit_move_insn (tag_arr, tag_arr_sym); 4317 v = 0x0001020300010203LL; 4318 emit_move_insn (splat_mask, immed_double_const (v, v, TImode)); 4319 ea_addr_si = ea_addr; 4320 if (spu_ea_model != 32) 4321 ea_addr_si = convert_to_mode (SImode, ea_addr, 1); 4322 4323 /* tag_index = ea_addr & (tag_array_size - 128) */ 4324 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask)); 4325 4326 /* splat ea_addr to all 4 slots. */ 4327 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask)); 4328 /* Similarly for high 32 bits of ea_addr. */ 4329 if (spu_ea_model != 32) 4330 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask)); 4331 4332 /* block_off = ea_addr & 127 */ 4333 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127))); 4334 4335 /* tag_addr = tag_arr + tag_index */ 4336 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index)); 4337 4338 /* Read cache tags. */ 4339 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr)); 4340 if (spu_ea_model != 32) 4341 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode, 4342 plus_constant (Pmode, 4343 tag_addr, 16))); 4344 4345 /* tag = ea_addr & -128 */ 4346 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128))); 4347 4348 /* Read all four cache data pointers. */ 4349 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode, 4350 plus_constant (Pmode, 4351 tag_addr, 32))); 4352 4353 /* Compare tags. */ 4354 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag)); 4355 if (spu_ea_model != 32) 4356 { 4357 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi)); 4358 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi)); 4359 } 4360 4361 /* At most one of the tags compare equal, so tag_equal has one 4362 32-bit slot set to all 1's, with the other slots all zero. 4363 gbb picks off low bit from each byte in the 128-bit registers, 4364 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming 4365 we have a hit. */ 4366 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal))); 4367 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack)); 4368 4369 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */ 4370 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si)); 4371 4372 /* Allowing us to rotate the corresponding cache data pointer to slot0. 4373 (rotating eq_index mod 16 bytes). */ 4374 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index)); 4375 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs)); 4376 4377 /* Add block offset to form final data address. */ 4378 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off)); 4379 4380 /* Check that we did hit. */ 4381 hit_label = gen_label_rtx (); 4382 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label); 4383 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx); 4384 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, 4385 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, 4386 hit_ref, pc_rtx))); 4387 /* Say that this branch is very likely to happen. */ 4388 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1; 4389 add_int_reg_note (insn, REG_BR_PROB, v); 4390 4391 ea_load_store (mem, is_store, ea_addr, data_addr); 4392 cont_label = gen_label_rtx (); 4393 emit_jump_insn (gen_jump (cont_label)); 4394 emit_barrier (); 4395 4396 emit_label (hit_label); 4397 4398 if (is_store) 4399 { 4400 HOST_WIDE_INT v_hi; 4401 rtx dirty_bits = gen_reg_rtx (TImode); 4402 rtx dirty_off = gen_reg_rtx (SImode); 4403 rtx dirty_128 = gen_reg_rtx (TImode); 4404 rtx neg_block_off = gen_reg_rtx (SImode); 4405 4406 /* Set up mask with one dirty bit per byte of the mem we are 4407 writing, starting from top bit. */ 4408 v_hi = v = -1; 4409 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63; 4410 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64) 4411 { 4412 v_hi = v; 4413 v = 0; 4414 } 4415 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode)); 4416 4417 /* Form index into cache dirty_bits. eq_index is one of 4418 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us 4419 0x40, 0x50, 0x60 or 0x70 which just happens to be the 4420 offset to each of the four dirty_bits elements. */ 4421 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2))); 4422 4423 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off)); 4424 4425 /* Rotate bit mask to proper bit. */ 4426 emit_insn (gen_negsi2 (neg_block_off, block_off)); 4427 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off)); 4428 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off)); 4429 4430 /* Or in the new dirty bits. */ 4431 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128)); 4432 4433 /* Store. */ 4434 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off)); 4435 } 4436 4437 emit_label (cont_label); 4438} 4439 4440static rtx 4441expand_ea_mem (rtx mem, bool is_store) 4442{ 4443 rtx ea_addr; 4444 rtx data_addr = gen_reg_rtx (Pmode); 4445 rtx new_mem; 4446 4447 ea_addr = force_reg (EAmode, XEXP (mem, 0)); 4448 if (optimize_size || optimize == 0) 4449 ea_load_store (mem, is_store, ea_addr, data_addr); 4450 else 4451 ea_load_store_inline (mem, is_store, ea_addr, data_addr); 4452 4453 if (ea_alias_set == -1) 4454 ea_alias_set = new_alias_set (); 4455 4456 /* We generate a new MEM RTX to refer to the copy of the data 4457 in the cache. We do not copy memory attributes (except the 4458 alignment) from the original MEM, as they may no longer apply 4459 to the cache copy. */ 4460 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr); 4461 set_mem_alias_set (new_mem, ea_alias_set); 4462 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8)); 4463 4464 return new_mem; 4465} 4466 4467int 4468spu_expand_mov (rtx * ops, machine_mode mode) 4469{ 4470 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0])) 4471 { 4472 /* Perform the move in the destination SUBREG's inner mode. */ 4473 ops[0] = SUBREG_REG (ops[0]); 4474 mode = GET_MODE (ops[0]); 4475 ops[1] = gen_lowpart_common (mode, ops[1]); 4476 gcc_assert (ops[1]); 4477 } 4478 4479 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1])) 4480 { 4481 rtx from = SUBREG_REG (ops[1]); 4482 machine_mode imode = int_mode_for_mode (GET_MODE (from)); 4483 4484 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT 4485 && GET_MODE_CLASS (imode) == MODE_INT 4486 && subreg_lowpart_p (ops[1])); 4487 4488 if (GET_MODE_SIZE (imode) < 4) 4489 imode = SImode; 4490 if (imode != GET_MODE (from)) 4491 from = gen_rtx_SUBREG (imode, from, 0); 4492 4493 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode)) 4494 { 4495 enum insn_code icode = convert_optab_handler (trunc_optab, 4496 mode, imode); 4497 emit_insn (GEN_FCN (icode) (ops[0], from)); 4498 } 4499 else 4500 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1)); 4501 return 1; 4502 } 4503 4504 /* At least one of the operands needs to be a register. */ 4505 if ((reload_in_progress | reload_completed) == 0 4506 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode)) 4507 { 4508 rtx temp = force_reg (mode, ops[1]); 4509 emit_move_insn (ops[0], temp); 4510 return 1; 4511 } 4512 if (reload_in_progress || reload_completed) 4513 { 4514 if (CONSTANT_P (ops[1])) 4515 return spu_split_immediate (ops); 4516 return 0; 4517 } 4518 4519 /* Catch the SImode immediates greater than 0x7fffffff, and sign 4520 extend them. */ 4521 if (GET_CODE (ops[1]) == CONST_INT) 4522 { 4523 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode); 4524 if (val != INTVAL (ops[1])) 4525 { 4526 emit_move_insn (ops[0], GEN_INT (val)); 4527 return 1; 4528 } 4529 } 4530 if (MEM_P (ops[0])) 4531 { 4532 if (MEM_ADDR_SPACE (ops[0])) 4533 ops[0] = expand_ea_mem (ops[0], true); 4534 return spu_split_store (ops); 4535 } 4536 if (MEM_P (ops[1])) 4537 { 4538 if (MEM_ADDR_SPACE (ops[1])) 4539 ops[1] = expand_ea_mem (ops[1], false); 4540 return spu_split_load (ops); 4541 } 4542 4543 return 0; 4544} 4545 4546static void 4547spu_convert_move (rtx dst, rtx src) 4548{ 4549 machine_mode mode = GET_MODE (dst); 4550 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0); 4551 rtx reg; 4552 gcc_assert (GET_MODE (src) == TImode); 4553 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst; 4554 emit_insn (gen_rtx_SET (VOIDmode, reg, 4555 gen_rtx_TRUNCATE (int_mode, 4556 gen_rtx_LSHIFTRT (TImode, src, 4557 GEN_INT (int_mode == DImode ? 64 : 96))))); 4558 if (int_mode != mode) 4559 { 4560 reg = simplify_gen_subreg (mode, reg, int_mode, 0); 4561 emit_move_insn (dst, reg); 4562 } 4563} 4564 4565/* Load TImode values into DST0 and DST1 (when it is non-NULL) using 4566 the address from SRC and SRC+16. Return a REG or CONST_INT that 4567 specifies how many bytes to rotate the loaded registers, plus any 4568 extra from EXTRA_ROTQBY. The address and rotate amounts are 4569 normalized to improve merging of loads and rotate computations. */ 4570static rtx 4571spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby) 4572{ 4573 rtx addr = XEXP (src, 0); 4574 rtx p0, p1, rot, addr0, addr1; 4575 int rot_amt; 4576 4577 rot = 0; 4578 rot_amt = 0; 4579 4580 if (MEM_ALIGN (src) >= 128) 4581 /* Address is already aligned; simply perform a TImode load. */ ; 4582 else if (GET_CODE (addr) == PLUS) 4583 { 4584 /* 8 cases: 4585 aligned reg + aligned reg => lqx 4586 aligned reg + unaligned reg => lqx, rotqby 4587 aligned reg + aligned const => lqd 4588 aligned reg + unaligned const => lqd, rotqbyi 4589 unaligned reg + aligned reg => lqx, rotqby 4590 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch) 4591 unaligned reg + aligned const => lqd, rotqby 4592 unaligned reg + unaligned const -> not allowed by legitimate address 4593 */ 4594 p0 = XEXP (addr, 0); 4595 p1 = XEXP (addr, 1); 4596 if (!reg_aligned_for_addr (p0)) 4597 { 4598 if (REG_P (p1) && !reg_aligned_for_addr (p1)) 4599 { 4600 rot = gen_reg_rtx (SImode); 4601 emit_insn (gen_addsi3 (rot, p0, p1)); 4602 } 4603 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) 4604 { 4605 if (INTVAL (p1) > 0 4606 && REG_POINTER (p0) 4607 && INTVAL (p1) * BITS_PER_UNIT 4608 < REGNO_POINTER_ALIGN (REGNO (p0))) 4609 { 4610 rot = gen_reg_rtx (SImode); 4611 emit_insn (gen_addsi3 (rot, p0, p1)); 4612 addr = p0; 4613 } 4614 else 4615 { 4616 rtx x = gen_reg_rtx (SImode); 4617 emit_move_insn (x, p1); 4618 if (!spu_arith_operand (p1, SImode)) 4619 p1 = x; 4620 rot = gen_reg_rtx (SImode); 4621 emit_insn (gen_addsi3 (rot, p0, p1)); 4622 addr = gen_rtx_PLUS (Pmode, p0, x); 4623 } 4624 } 4625 else 4626 rot = p0; 4627 } 4628 else 4629 { 4630 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) 4631 { 4632 rot_amt = INTVAL (p1) & 15; 4633 if (INTVAL (p1) & -16) 4634 { 4635 p1 = GEN_INT (INTVAL (p1) & -16); 4636 addr = gen_rtx_PLUS (SImode, p0, p1); 4637 } 4638 else 4639 addr = p0; 4640 } 4641 else if (REG_P (p1) && !reg_aligned_for_addr (p1)) 4642 rot = p1; 4643 } 4644 } 4645 else if (REG_P (addr)) 4646 { 4647 if (!reg_aligned_for_addr (addr)) 4648 rot = addr; 4649 } 4650 else if (GET_CODE (addr) == CONST) 4651 { 4652 if (GET_CODE (XEXP (addr, 0)) == PLUS 4653 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) 4654 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) 4655 { 4656 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1)); 4657 if (rot_amt & -16) 4658 addr = gen_rtx_CONST (Pmode, 4659 gen_rtx_PLUS (Pmode, 4660 XEXP (XEXP (addr, 0), 0), 4661 GEN_INT (rot_amt & -16))); 4662 else 4663 addr = XEXP (XEXP (addr, 0), 0); 4664 } 4665 else 4666 { 4667 rot = gen_reg_rtx (Pmode); 4668 emit_move_insn (rot, addr); 4669 } 4670 } 4671 else if (GET_CODE (addr) == CONST_INT) 4672 { 4673 rot_amt = INTVAL (addr); 4674 addr = GEN_INT (rot_amt & -16); 4675 } 4676 else if (!ALIGNED_SYMBOL_REF_P (addr)) 4677 { 4678 rot = gen_reg_rtx (Pmode); 4679 emit_move_insn (rot, addr); 4680 } 4681 4682 rot_amt += extra_rotby; 4683 4684 rot_amt &= 15; 4685 4686 if (rot && rot_amt) 4687 { 4688 rtx x = gen_reg_rtx (SImode); 4689 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt))); 4690 rot = x; 4691 rot_amt = 0; 4692 } 4693 if (!rot && rot_amt) 4694 rot = GEN_INT (rot_amt); 4695 4696 addr0 = copy_rtx (addr); 4697 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); 4698 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0))); 4699 4700 if (dst1) 4701 { 4702 addr1 = plus_constant (SImode, copy_rtx (addr), 16); 4703 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16)); 4704 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1))); 4705 } 4706 4707 return rot; 4708} 4709 4710int 4711spu_split_load (rtx * ops) 4712{ 4713 machine_mode mode = GET_MODE (ops[0]); 4714 rtx addr, load, rot; 4715 int rot_amt; 4716 4717 if (GET_MODE_SIZE (mode) >= 16) 4718 return 0; 4719 4720 addr = XEXP (ops[1], 0); 4721 gcc_assert (GET_CODE (addr) != AND); 4722 4723 if (!address_needs_split (ops[1])) 4724 { 4725 ops[1] = change_address (ops[1], TImode, addr); 4726 load = gen_reg_rtx (TImode); 4727 emit_insn (gen__movti (load, ops[1])); 4728 spu_convert_move (ops[0], load); 4729 return 1; 4730 } 4731 4732 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0; 4733 4734 load = gen_reg_rtx (TImode); 4735 rot = spu_expand_load (load, 0, ops[1], rot_amt); 4736 4737 if (rot) 4738 emit_insn (gen_rotqby_ti (load, load, rot)); 4739 4740 spu_convert_move (ops[0], load); 4741 return 1; 4742} 4743 4744int 4745spu_split_store (rtx * ops) 4746{ 4747 machine_mode mode = GET_MODE (ops[0]); 4748 rtx reg; 4749 rtx addr, p0, p1, p1_lo, smem; 4750 int aform; 4751 int scalar; 4752 4753 if (GET_MODE_SIZE (mode) >= 16) 4754 return 0; 4755 4756 addr = XEXP (ops[0], 0); 4757 gcc_assert (GET_CODE (addr) != AND); 4758 4759 if (!address_needs_split (ops[0])) 4760 { 4761 reg = gen_reg_rtx (TImode); 4762 emit_insn (gen_spu_convert (reg, ops[1])); 4763 ops[0] = change_address (ops[0], TImode, addr); 4764 emit_move_insn (ops[0], reg); 4765 return 1; 4766 } 4767 4768 if (GET_CODE (addr) == PLUS) 4769 { 4770 /* 8 cases: 4771 aligned reg + aligned reg => lqx, c?x, shuf, stqx 4772 aligned reg + unaligned reg => lqx, c?x, shuf, stqx 4773 aligned reg + aligned const => lqd, c?d, shuf, stqx 4774 aligned reg + unaligned const => lqd, c?d, shuf, stqx 4775 unaligned reg + aligned reg => lqx, c?x, shuf, stqx 4776 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx 4777 unaligned reg + aligned const => lqd, c?d, shuf, stqx 4778 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx 4779 */ 4780 aform = 0; 4781 p0 = XEXP (addr, 0); 4782 p1 = p1_lo = XEXP (addr, 1); 4783 if (REG_P (p0) && GET_CODE (p1) == CONST_INT) 4784 { 4785 p1_lo = GEN_INT (INTVAL (p1) & 15); 4786 if (reg_aligned_for_addr (p0)) 4787 { 4788 p1 = GEN_INT (INTVAL (p1) & -16); 4789 if (p1 == const0_rtx) 4790 addr = p0; 4791 else 4792 addr = gen_rtx_PLUS (SImode, p0, p1); 4793 } 4794 else 4795 { 4796 rtx x = gen_reg_rtx (SImode); 4797 emit_move_insn (x, p1); 4798 addr = gen_rtx_PLUS (SImode, p0, x); 4799 } 4800 } 4801 } 4802 else if (REG_P (addr)) 4803 { 4804 aform = 0; 4805 p0 = addr; 4806 p1 = p1_lo = const0_rtx; 4807 } 4808 else 4809 { 4810 aform = 1; 4811 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); 4812 p1 = 0; /* aform doesn't use p1 */ 4813 p1_lo = addr; 4814 if (ALIGNED_SYMBOL_REF_P (addr)) 4815 p1_lo = const0_rtx; 4816 else if (GET_CODE (addr) == CONST 4817 && GET_CODE (XEXP (addr, 0)) == PLUS 4818 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) 4819 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) 4820 { 4821 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1)); 4822 if ((v & -16) != 0) 4823 addr = gen_rtx_CONST (Pmode, 4824 gen_rtx_PLUS (Pmode, 4825 XEXP (XEXP (addr, 0), 0), 4826 GEN_INT (v & -16))); 4827 else 4828 addr = XEXP (XEXP (addr, 0), 0); 4829 p1_lo = GEN_INT (v & 15); 4830 } 4831 else if (GET_CODE (addr) == CONST_INT) 4832 { 4833 p1_lo = GEN_INT (INTVAL (addr) & 15); 4834 addr = GEN_INT (INTVAL (addr) & -16); 4835 } 4836 else 4837 { 4838 p1_lo = gen_reg_rtx (SImode); 4839 emit_move_insn (p1_lo, addr); 4840 } 4841 } 4842 4843 gcc_assert (aform == 0 || aform == 1); 4844 reg = gen_reg_rtx (TImode); 4845 4846 scalar = store_with_one_insn_p (ops[0]); 4847 if (!scalar) 4848 { 4849 /* We could copy the flags from the ops[0] MEM to mem here, 4850 We don't because we want this load to be optimized away if 4851 possible, and copying the flags will prevent that in certain 4852 cases, e.g. consider the volatile flag. */ 4853 4854 rtx pat = gen_reg_rtx (TImode); 4855 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr)); 4856 set_mem_alias_set (lmem, 0); 4857 emit_insn (gen_movti (reg, lmem)); 4858 4859 if (!p0 || reg_aligned_for_addr (p0)) 4860 p0 = stack_pointer_rtx; 4861 if (!p1_lo) 4862 p1_lo = const0_rtx; 4863 4864 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode)))); 4865 emit_insn (gen_shufb (reg, ops[1], reg, pat)); 4866 } 4867 else 4868 { 4869 if (GET_CODE (ops[1]) == REG) 4870 emit_insn (gen_spu_convert (reg, ops[1])); 4871 else if (GET_CODE (ops[1]) == SUBREG) 4872 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1]))); 4873 else 4874 abort (); 4875 } 4876 4877 if (GET_MODE_SIZE (mode) < 4 && scalar) 4878 emit_insn (gen_ashlti3 4879 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode)))); 4880 4881 smem = change_address (ops[0], TImode, copy_rtx (addr)); 4882 /* We can't use the previous alias set because the memory has changed 4883 size and can potentially overlap objects of other types. */ 4884 set_mem_alias_set (smem, 0); 4885 4886 emit_insn (gen_movti (smem, reg)); 4887 return 1; 4888} 4889 4890/* Return TRUE if X is MEM which is a struct member reference 4891 and the member can safely be loaded and stored with a single 4892 instruction because it is padded. */ 4893static int 4894mem_is_padded_component_ref (rtx x) 4895{ 4896 tree t = MEM_EXPR (x); 4897 tree r; 4898 if (!t || TREE_CODE (t) != COMPONENT_REF) 4899 return 0; 4900 t = TREE_OPERAND (t, 1); 4901 if (!t || TREE_CODE (t) != FIELD_DECL 4902 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t))) 4903 return 0; 4904 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */ 4905 r = DECL_FIELD_CONTEXT (t); 4906 if (!r || TREE_CODE (r) != RECORD_TYPE) 4907 return 0; 4908 /* Make sure they are the same mode */ 4909 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t))) 4910 return 0; 4911 /* If there are no following fields then the field alignment assures 4912 the structure is padded to the alignment which means this field is 4913 padded too. */ 4914 if (TREE_CHAIN (t) == 0) 4915 return 1; 4916 /* If the following field is also aligned then this field will be 4917 padded. */ 4918 t = TREE_CHAIN (t); 4919 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128) 4920 return 1; 4921 return 0; 4922} 4923 4924/* Parse the -mfixed-range= option string. */ 4925static void 4926fix_range (const char *const_str) 4927{ 4928 int i, first, last; 4929 char *str, *dash, *comma; 4930 4931 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 4932 REG2 are either register names or register numbers. The effect 4933 of this option is to mark the registers in the range from REG1 to 4934 REG2 as ``fixed'' so they won't be used by the compiler. */ 4935 4936 i = strlen (const_str); 4937 str = (char *) alloca (i + 1); 4938 memcpy (str, const_str, i + 1); 4939 4940 while (1) 4941 { 4942 dash = strchr (str, '-'); 4943 if (!dash) 4944 { 4945 warning (0, "value of -mfixed-range must have form REG1-REG2"); 4946 return; 4947 } 4948 *dash = '\0'; 4949 comma = strchr (dash + 1, ','); 4950 if (comma) 4951 *comma = '\0'; 4952 4953 first = decode_reg_name (str); 4954 if (first < 0) 4955 { 4956 warning (0, "unknown register name: %s", str); 4957 return; 4958 } 4959 4960 last = decode_reg_name (dash + 1); 4961 if (last < 0) 4962 { 4963 warning (0, "unknown register name: %s", dash + 1); 4964 return; 4965 } 4966 4967 *dash = '-'; 4968 4969 if (first > last) 4970 { 4971 warning (0, "%s-%s is an empty range", str, dash + 1); 4972 return; 4973 } 4974 4975 for (i = first; i <= last; ++i) 4976 fixed_regs[i] = call_used_regs[i] = 1; 4977 4978 if (!comma) 4979 break; 4980 4981 *comma = ','; 4982 str = comma + 1; 4983 } 4984} 4985 4986/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that 4987 can be generated using the fsmbi instruction. */ 4988int 4989fsmbi_const_p (rtx x) 4990{ 4991 if (CONSTANT_P (x)) 4992 { 4993 /* We can always choose TImode for CONST_INT because the high bits 4994 of an SImode will always be all 1s, i.e., valid for fsmbi. */ 4995 enum immediate_class c = classify_immediate (x, TImode); 4996 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2); 4997 } 4998 return 0; 4999} 5000 5001/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that 5002 can be generated using the cbd, chd, cwd or cdd instruction. */ 5003int 5004cpat_const_p (rtx x, machine_mode mode) 5005{ 5006 if (CONSTANT_P (x)) 5007 { 5008 enum immediate_class c = classify_immediate (x, mode); 5009 return c == IC_CPAT; 5010 } 5011 return 0; 5012} 5013 5014rtx 5015gen_cpat_const (rtx * ops) 5016{ 5017 unsigned char dst[16]; 5018 int i, offset, shift, isize; 5019 if (GET_CODE (ops[3]) != CONST_INT 5020 || GET_CODE (ops[2]) != CONST_INT 5021 || (GET_CODE (ops[1]) != CONST_INT 5022 && GET_CODE (ops[1]) != REG)) 5023 return 0; 5024 if (GET_CODE (ops[1]) == REG 5025 && (!REG_POINTER (ops[1]) 5026 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128)) 5027 return 0; 5028 5029 for (i = 0; i < 16; i++) 5030 dst[i] = i + 16; 5031 isize = INTVAL (ops[3]); 5032 if (isize == 1) 5033 shift = 3; 5034 else if (isize == 2) 5035 shift = 2; 5036 else 5037 shift = 0; 5038 offset = (INTVAL (ops[2]) + 5039 (GET_CODE (ops[1]) == 5040 CONST_INT ? INTVAL (ops[1]) : 0)) & 15; 5041 for (i = 0; i < isize; i++) 5042 dst[offset + i] = i + shift; 5043 return array_to_constant (TImode, dst); 5044} 5045 5046/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte 5047 array. Use MODE for CONST_INT's. When the constant's mode is smaller 5048 than 16 bytes, the value is repeated across the rest of the array. */ 5049void 5050constant_to_array (machine_mode mode, rtx x, unsigned char arr[16]) 5051{ 5052 HOST_WIDE_INT val; 5053 int i, j, first; 5054 5055 memset (arr, 0, 16); 5056 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode; 5057 if (GET_CODE (x) == CONST_INT 5058 || (GET_CODE (x) == CONST_DOUBLE 5059 && (mode == SFmode || mode == DFmode))) 5060 { 5061 gcc_assert (mode != VOIDmode && mode != BLKmode); 5062 5063 if (GET_CODE (x) == CONST_DOUBLE) 5064 val = const_double_to_hwint (x); 5065 else 5066 val = INTVAL (x); 5067 first = GET_MODE_SIZE (mode) - 1; 5068 for (i = first; i >= 0; i--) 5069 { 5070 arr[i] = val & 0xff; 5071 val >>= 8; 5072 } 5073 /* Splat the constant across the whole array. */ 5074 for (j = 0, i = first + 1; i < 16; i++) 5075 { 5076 arr[i] = arr[j]; 5077 j = (j == first) ? 0 : j + 1; 5078 } 5079 } 5080 else if (GET_CODE (x) == CONST_DOUBLE) 5081 { 5082 val = CONST_DOUBLE_LOW (x); 5083 for (i = 15; i >= 8; i--) 5084 { 5085 arr[i] = val & 0xff; 5086 val >>= 8; 5087 } 5088 val = CONST_DOUBLE_HIGH (x); 5089 for (i = 7; i >= 0; i--) 5090 { 5091 arr[i] = val & 0xff; 5092 val >>= 8; 5093 } 5094 } 5095 else if (GET_CODE (x) == CONST_VECTOR) 5096 { 5097 int units; 5098 rtx elt; 5099 mode = GET_MODE_INNER (mode); 5100 units = CONST_VECTOR_NUNITS (x); 5101 for (i = 0; i < units; i++) 5102 { 5103 elt = CONST_VECTOR_ELT (x, i); 5104 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE) 5105 { 5106 if (GET_CODE (elt) == CONST_DOUBLE) 5107 val = const_double_to_hwint (elt); 5108 else 5109 val = INTVAL (elt); 5110 first = GET_MODE_SIZE (mode) - 1; 5111 if (first + i * GET_MODE_SIZE (mode) > 16) 5112 abort (); 5113 for (j = first; j >= 0; j--) 5114 { 5115 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff; 5116 val >>= 8; 5117 } 5118 } 5119 } 5120 } 5121 else 5122 gcc_unreachable(); 5123} 5124 5125/* Convert a 16 byte array to a constant of mode MODE. When MODE is 5126 smaller than 16 bytes, use the bytes that would represent that value 5127 in a register, e.g., for QImode return the value of arr[3]. */ 5128rtx 5129array_to_constant (machine_mode mode, const unsigned char arr[16]) 5130{ 5131 machine_mode inner_mode; 5132 rtvec v; 5133 int units, size, i, j, k; 5134 HOST_WIDE_INT val; 5135 5136 if (GET_MODE_CLASS (mode) == MODE_INT 5137 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) 5138 { 5139 j = GET_MODE_SIZE (mode); 5140 i = j < 4 ? 4 - j : 0; 5141 for (val = 0; i < j; i++) 5142 val = (val << 8) | arr[i]; 5143 val = trunc_int_for_mode (val, mode); 5144 return GEN_INT (val); 5145 } 5146 5147 if (mode == TImode) 5148 { 5149 HOST_WIDE_INT high; 5150 for (i = high = 0; i < 8; i++) 5151 high = (high << 8) | arr[i]; 5152 for (i = 8, val = 0; i < 16; i++) 5153 val = (val << 8) | arr[i]; 5154 return immed_double_const (val, high, TImode); 5155 } 5156 if (mode == SFmode) 5157 { 5158 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 5159 val = trunc_int_for_mode (val, SImode); 5160 return hwint_to_const_double (SFmode, val); 5161 } 5162 if (mode == DFmode) 5163 { 5164 for (i = 0, val = 0; i < 8; i++) 5165 val = (val << 8) | arr[i]; 5166 return hwint_to_const_double (DFmode, val); 5167 } 5168 5169 if (!VECTOR_MODE_P (mode)) 5170 abort (); 5171 5172 units = GET_MODE_NUNITS (mode); 5173 size = GET_MODE_UNIT_SIZE (mode); 5174 inner_mode = GET_MODE_INNER (mode); 5175 v = rtvec_alloc (units); 5176 5177 for (k = i = 0; i < units; ++i) 5178 { 5179 val = 0; 5180 for (j = 0; j < size; j++, k++) 5181 val = (val << 8) | arr[k]; 5182 5183 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT) 5184 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val); 5185 else 5186 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode)); 5187 } 5188 if (k > 16) 5189 abort (); 5190 5191 return gen_rtx_CONST_VECTOR (mode, v); 5192} 5193 5194static void 5195reloc_diagnostic (rtx x) 5196{ 5197 tree decl = 0; 5198 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC)) 5199 return; 5200 5201 if (GET_CODE (x) == SYMBOL_REF) 5202 decl = SYMBOL_REF_DECL (x); 5203 else if (GET_CODE (x) == CONST 5204 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) 5205 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0)); 5206 5207 /* SYMBOL_REF_DECL is not necessarily a DECL. */ 5208 if (decl && !DECL_P (decl)) 5209 decl = 0; 5210 5211 /* The decl could be a string constant. */ 5212 if (decl && DECL_P (decl)) 5213 { 5214 location_t loc; 5215 /* We use last_assemble_variable_decl to get line information. It's 5216 not always going to be right and might not even be close, but will 5217 be right for the more common cases. */ 5218 if (!last_assemble_variable_decl || in_section == ctors_section) 5219 loc = DECL_SOURCE_LOCATION (decl); 5220 else 5221 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl); 5222 5223 if (TARGET_WARN_RELOC) 5224 warning_at (loc, 0, 5225 "creating run-time relocation for %qD", decl); 5226 else 5227 error_at (loc, 5228 "creating run-time relocation for %qD", decl); 5229 } 5230 else 5231 { 5232 if (TARGET_WARN_RELOC) 5233 warning_at (input_location, 0, "creating run-time relocation"); 5234 else 5235 error_at (input_location, "creating run-time relocation"); 5236 } 5237} 5238 5239/* Hook into assemble_integer so we can generate an error for run-time 5240 relocations. The SPU ABI disallows them. */ 5241static bool 5242spu_assemble_integer (rtx x, unsigned int size, int aligned_p) 5243{ 5244 /* By default run-time relocations aren't supported, but we allow them 5245 in case users support it in their own run-time loader. And we provide 5246 a warning for those users that don't. */ 5247 if ((GET_CODE (x) == SYMBOL_REF) 5248 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST) 5249 reloc_diagnostic (x); 5250 5251 return default_assemble_integer (x, size, aligned_p); 5252} 5253 5254static void 5255spu_asm_globalize_label (FILE * file, const char *name) 5256{ 5257 fputs ("\t.global\t", file); 5258 assemble_name (file, name); 5259 fputs ("\n", file); 5260} 5261 5262static bool 5263spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, 5264 int opno ATTRIBUTE_UNUSED, int *total, 5265 bool speed ATTRIBUTE_UNUSED) 5266{ 5267 machine_mode mode = GET_MODE (x); 5268 int cost = COSTS_N_INSNS (2); 5269 5270 /* Folding to a CONST_VECTOR will use extra space but there might 5271 be only a small savings in cycles. We'd like to use a CONST_VECTOR 5272 only if it allows us to fold away multiple insns. Changing the cost 5273 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though 5274 because this cost will only be compared against a single insn. 5275 if (code == CONST_VECTOR) 5276 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6); 5277 */ 5278 5279 /* Use defaults for float operations. Not accurate but good enough. */ 5280 if (mode == DFmode) 5281 { 5282 *total = COSTS_N_INSNS (13); 5283 return true; 5284 } 5285 if (mode == SFmode) 5286 { 5287 *total = COSTS_N_INSNS (6); 5288 return true; 5289 } 5290 switch (code) 5291 { 5292 case CONST_INT: 5293 if (satisfies_constraint_K (x)) 5294 *total = 0; 5295 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll) 5296 *total = COSTS_N_INSNS (1); 5297 else 5298 *total = COSTS_N_INSNS (3); 5299 return true; 5300 5301 case CONST: 5302 *total = COSTS_N_INSNS (3); 5303 return true; 5304 5305 case LABEL_REF: 5306 case SYMBOL_REF: 5307 *total = COSTS_N_INSNS (0); 5308 return true; 5309 5310 case CONST_DOUBLE: 5311 *total = COSTS_N_INSNS (5); 5312 return true; 5313 5314 case FLOAT_EXTEND: 5315 case FLOAT_TRUNCATE: 5316 case FLOAT: 5317 case UNSIGNED_FLOAT: 5318 case FIX: 5319 case UNSIGNED_FIX: 5320 *total = COSTS_N_INSNS (7); 5321 return true; 5322 5323 case PLUS: 5324 if (mode == TImode) 5325 { 5326 *total = COSTS_N_INSNS (9); 5327 return true; 5328 } 5329 break; 5330 5331 case MULT: 5332 cost = 5333 GET_CODE (XEXP (x, 0)) == 5334 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7); 5335 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG) 5336 { 5337 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 5338 { 5339 HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); 5340 cost = COSTS_N_INSNS (14); 5341 if ((val & 0xffff) == 0) 5342 cost = COSTS_N_INSNS (9); 5343 else if (val > 0 && val < 0x10000) 5344 cost = COSTS_N_INSNS (11); 5345 } 5346 } 5347 *total = cost; 5348 return true; 5349 case DIV: 5350 case UDIV: 5351 case MOD: 5352 case UMOD: 5353 *total = COSTS_N_INSNS (20); 5354 return true; 5355 case ROTATE: 5356 case ROTATERT: 5357 case ASHIFT: 5358 case ASHIFTRT: 5359 case LSHIFTRT: 5360 *total = COSTS_N_INSNS (4); 5361 return true; 5362 case UNSPEC: 5363 if (XINT (x, 1) == UNSPEC_CONVERT) 5364 *total = COSTS_N_INSNS (0); 5365 else 5366 *total = COSTS_N_INSNS (4); 5367 return true; 5368 } 5369 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */ 5370 if (GET_MODE_CLASS (mode) == MODE_INT 5371 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl) 5372 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)) 5373 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)); 5374 *total = cost; 5375 return true; 5376} 5377 5378static machine_mode 5379spu_unwind_word_mode (void) 5380{ 5381 return SImode; 5382} 5383 5384/* Decide whether we can make a sibling call to a function. DECL is the 5385 declaration of the function being targeted by the call and EXP is the 5386 CALL_EXPR representing the call. */ 5387static bool 5388spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 5389{ 5390 return decl && !TARGET_LARGE_MEM; 5391} 5392 5393/* We need to correctly update the back chain pointer and the Available 5394 Stack Size (which is in the second slot of the sp register.) */ 5395void 5396spu_allocate_stack (rtx op0, rtx op1) 5397{ 5398 HOST_WIDE_INT v; 5399 rtx chain = gen_reg_rtx (V4SImode); 5400 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx); 5401 rtx sp = gen_reg_rtx (V4SImode); 5402 rtx splatted = gen_reg_rtx (V4SImode); 5403 rtx pat = gen_reg_rtx (TImode); 5404 5405 /* copy the back chain so we can save it back again. */ 5406 emit_move_insn (chain, stack_bot); 5407 5408 op1 = force_reg (SImode, op1); 5409 5410 v = 0x1020300010203ll; 5411 emit_move_insn (pat, immed_double_const (v, v, TImode)); 5412 emit_insn (gen_shufb (splatted, op1, op1, pat)); 5413 5414 emit_insn (gen_spu_convert (sp, stack_pointer_rtx)); 5415 emit_insn (gen_subv4si3 (sp, sp, splatted)); 5416 5417 if (flag_stack_check) 5418 { 5419 rtx avail = gen_reg_rtx(SImode); 5420 rtx result = gen_reg_rtx(SImode); 5421 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1))); 5422 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1))); 5423 emit_insn (gen_spu_heq (result, GEN_INT(0) )); 5424 } 5425 5426 emit_insn (gen_spu_convert (stack_pointer_rtx, sp)); 5427 5428 emit_move_insn (stack_bot, chain); 5429 5430 emit_move_insn (op0, virtual_stack_dynamic_rtx); 5431} 5432 5433void 5434spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1) 5435{ 5436 static unsigned char arr[16] = 5437 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 }; 5438 rtx temp = gen_reg_rtx (SImode); 5439 rtx temp2 = gen_reg_rtx (SImode); 5440 rtx temp3 = gen_reg_rtx (V4SImode); 5441 rtx temp4 = gen_reg_rtx (V4SImode); 5442 rtx pat = gen_reg_rtx (TImode); 5443 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM); 5444 5445 /* Restore the backchain from the first word, sp from the second. */ 5446 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0)); 5447 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4)); 5448 5449 emit_move_insn (pat, array_to_constant (TImode, arr)); 5450 5451 /* Compute Available Stack Size for sp */ 5452 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx)); 5453 emit_insn (gen_shufb (temp3, temp, temp, pat)); 5454 5455 /* Compute Available Stack Size for back chain */ 5456 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx)); 5457 emit_insn (gen_shufb (temp4, temp2, temp2, pat)); 5458 emit_insn (gen_addv4si3 (temp4, sp, temp4)); 5459 5460 emit_insn (gen_addv4si3 (sp, sp, temp3)); 5461 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4); 5462} 5463 5464static void 5465spu_init_libfuncs (void) 5466{ 5467 set_optab_libfunc (smul_optab, DImode, "__muldi3"); 5468 set_optab_libfunc (sdiv_optab, DImode, "__divdi3"); 5469 set_optab_libfunc (smod_optab, DImode, "__moddi3"); 5470 set_optab_libfunc (udiv_optab, DImode, "__udivdi3"); 5471 set_optab_libfunc (umod_optab, DImode, "__umoddi3"); 5472 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4"); 5473 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2"); 5474 set_optab_libfunc (clz_optab, DImode, "__clzdi2"); 5475 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2"); 5476 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2"); 5477 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2"); 5478 set_optab_libfunc (parity_optab, DImode, "__paritydi2"); 5479 5480 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf"); 5481 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf"); 5482 5483 set_optab_libfunc (addv_optab, SImode, "__addvsi3"); 5484 set_optab_libfunc (subv_optab, SImode, "__subvsi3"); 5485 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3"); 5486 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3"); 5487 set_optab_libfunc (negv_optab, SImode, "__negvsi2"); 5488 set_optab_libfunc (absv_optab, SImode, "__absvsi2"); 5489 set_optab_libfunc (addv_optab, DImode, "__addvdi3"); 5490 set_optab_libfunc (subv_optab, DImode, "__subvdi3"); 5491 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3"); 5492 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3"); 5493 set_optab_libfunc (negv_optab, DImode, "__negvdi2"); 5494 set_optab_libfunc (absv_optab, DImode, "__absvdi2"); 5495 5496 set_optab_libfunc (smul_optab, TImode, "__multi3"); 5497 set_optab_libfunc (sdiv_optab, TImode, "__divti3"); 5498 set_optab_libfunc (smod_optab, TImode, "__modti3"); 5499 set_optab_libfunc (udiv_optab, TImode, "__udivti3"); 5500 set_optab_libfunc (umod_optab, TImode, "__umodti3"); 5501 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4"); 5502} 5503 5504/* Make a subreg, stripping any existing subreg. We could possibly just 5505 call simplify_subreg, but in this case we know what we want. */ 5506rtx 5507spu_gen_subreg (machine_mode mode, rtx x) 5508{ 5509 if (GET_CODE (x) == SUBREG) 5510 x = SUBREG_REG (x); 5511 if (GET_MODE (x) == mode) 5512 return x; 5513 return gen_rtx_SUBREG (mode, x, 0); 5514} 5515 5516static bool 5517spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 5518{ 5519 return (TYPE_MODE (type) == BLKmode 5520 && ((type) == 0 5521 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST 5522 || int_size_in_bytes (type) > 5523 (MAX_REGISTER_RETURN * UNITS_PER_WORD))); 5524} 5525 5526/* Create the built-in types and functions */ 5527 5528enum spu_function_code 5529{ 5530#define DEF_BUILTIN(fcode, icode, name, type, params) fcode, 5531#include "spu-builtins.def" 5532#undef DEF_BUILTIN 5533 NUM_SPU_BUILTINS 5534}; 5535 5536extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS]; 5537 5538struct spu_builtin_description spu_builtins[] = { 5539#define DEF_BUILTIN(fcode, icode, name, type, params) \ 5540 {fcode, icode, name, type, params}, 5541#include "spu-builtins.def" 5542#undef DEF_BUILTIN 5543}; 5544 5545static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS]; 5546 5547/* Returns the spu builtin decl for CODE. */ 5548 5549static tree 5550spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 5551{ 5552 if (code >= NUM_SPU_BUILTINS) 5553 return error_mark_node; 5554 5555 return spu_builtin_decls[code]; 5556} 5557 5558 5559static void 5560spu_init_builtins (void) 5561{ 5562 struct spu_builtin_description *d; 5563 unsigned int i; 5564 5565 V16QI_type_node = build_vector_type (intQI_type_node, 16); 5566 V8HI_type_node = build_vector_type (intHI_type_node, 8); 5567 V4SI_type_node = build_vector_type (intSI_type_node, 4); 5568 V2DI_type_node = build_vector_type (intDI_type_node, 2); 5569 V4SF_type_node = build_vector_type (float_type_node, 4); 5570 V2DF_type_node = build_vector_type (double_type_node, 2); 5571 5572 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16); 5573 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8); 5574 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4); 5575 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2); 5576 5577 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node; 5578 5579 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE]; 5580 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE]; 5581 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE]; 5582 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE]; 5583 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE]; 5584 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE]; 5585 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE]; 5586 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE]; 5587 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE]; 5588 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE]; 5589 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE]; 5590 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE]; 5591 5592 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE]; 5593 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE]; 5594 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE]; 5595 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE]; 5596 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE]; 5597 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE]; 5598 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE]; 5599 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE]; 5600 5601 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE]; 5602 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE]; 5603 5604 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE]; 5605 5606 spu_builtin_types[SPU_BTI_PTR] = 5607 build_pointer_type (build_qualified_type 5608 (void_type_node, 5609 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE)); 5610 5611 /* For each builtin we build a new prototype. The tree code will make 5612 sure nodes are shared. */ 5613 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++) 5614 { 5615 tree p; 5616 char name[64]; /* build_function will make a copy. */ 5617 int parm; 5618 5619 if (d->name == 0) 5620 continue; 5621 5622 /* Find last parm. */ 5623 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++) 5624 ; 5625 5626 p = void_list_node; 5627 while (parm > 1) 5628 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p); 5629 5630 p = build_function_type (spu_builtin_types[d->parm[0]], p); 5631 5632 sprintf (name, "__builtin_%s", d->name); 5633 spu_builtin_decls[i] = 5634 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE); 5635 if (d->fcode == SPU_MASK_FOR_LOAD) 5636 TREE_READONLY (spu_builtin_decls[i]) = 1; 5637 5638 /* These builtins don't throw. */ 5639 TREE_NOTHROW (spu_builtin_decls[i]) = 1; 5640 } 5641} 5642 5643void 5644spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1) 5645{ 5646 static unsigned char arr[16] = 5647 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 }; 5648 5649 rtx temp = gen_reg_rtx (Pmode); 5650 rtx temp2 = gen_reg_rtx (V4SImode); 5651 rtx temp3 = gen_reg_rtx (V4SImode); 5652 rtx pat = gen_reg_rtx (TImode); 5653 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM); 5654 5655 emit_move_insn (pat, array_to_constant (TImode, arr)); 5656 5657 /* Restore the sp. */ 5658 emit_move_insn (temp, op1); 5659 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx)); 5660 5661 /* Compute available stack size for sp. */ 5662 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx)); 5663 emit_insn (gen_shufb (temp3, temp, temp, pat)); 5664 5665 emit_insn (gen_addv4si3 (sp, sp, temp3)); 5666 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2); 5667} 5668 5669int 5670spu_safe_dma (HOST_WIDE_INT channel) 5671{ 5672 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27; 5673} 5674 5675void 5676spu_builtin_splats (rtx ops[]) 5677{ 5678 machine_mode mode = GET_MODE (ops[0]); 5679 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE) 5680 { 5681 unsigned char arr[16]; 5682 constant_to_array (GET_MODE_INNER (mode), ops[1], arr); 5683 emit_move_insn (ops[0], array_to_constant (mode, arr)); 5684 } 5685 else 5686 { 5687 rtx reg = gen_reg_rtx (TImode); 5688 rtx shuf; 5689 if (GET_CODE (ops[1]) != REG 5690 && GET_CODE (ops[1]) != SUBREG) 5691 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]); 5692 switch (mode) 5693 { 5694 case V2DImode: 5695 case V2DFmode: 5696 shuf = 5697 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll, 5698 TImode); 5699 break; 5700 case V4SImode: 5701 case V4SFmode: 5702 shuf = 5703 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll, 5704 TImode); 5705 break; 5706 case V8HImode: 5707 shuf = 5708 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll, 5709 TImode); 5710 break; 5711 case V16QImode: 5712 shuf = 5713 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll, 5714 TImode); 5715 break; 5716 default: 5717 abort (); 5718 } 5719 emit_move_insn (reg, shuf); 5720 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg)); 5721 } 5722} 5723 5724void 5725spu_builtin_extract (rtx ops[]) 5726{ 5727 machine_mode mode; 5728 rtx rot, from, tmp; 5729 5730 mode = GET_MODE (ops[1]); 5731 5732 if (GET_CODE (ops[2]) == CONST_INT) 5733 { 5734 switch (mode) 5735 { 5736 case V16QImode: 5737 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2])); 5738 break; 5739 case V8HImode: 5740 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2])); 5741 break; 5742 case V4SFmode: 5743 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2])); 5744 break; 5745 case V4SImode: 5746 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2])); 5747 break; 5748 case V2DImode: 5749 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2])); 5750 break; 5751 case V2DFmode: 5752 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2])); 5753 break; 5754 default: 5755 abort (); 5756 } 5757 return; 5758 } 5759 5760 from = spu_gen_subreg (TImode, ops[1]); 5761 rot = gen_reg_rtx (TImode); 5762 tmp = gen_reg_rtx (SImode); 5763 5764 switch (mode) 5765 { 5766 case V16QImode: 5767 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3))); 5768 break; 5769 case V8HImode: 5770 emit_insn (gen_addsi3 (tmp, ops[2], ops[2])); 5771 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2))); 5772 break; 5773 case V4SFmode: 5774 case V4SImode: 5775 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2))); 5776 break; 5777 case V2DImode: 5778 case V2DFmode: 5779 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3))); 5780 break; 5781 default: 5782 abort (); 5783 } 5784 emit_insn (gen_rotqby_ti (rot, from, tmp)); 5785 5786 emit_insn (gen_spu_convert (ops[0], rot)); 5787} 5788 5789void 5790spu_builtin_insert (rtx ops[]) 5791{ 5792 machine_mode mode = GET_MODE (ops[0]); 5793 machine_mode imode = GET_MODE_INNER (mode); 5794 rtx mask = gen_reg_rtx (TImode); 5795 rtx offset; 5796 5797 if (GET_CODE (ops[3]) == CONST_INT) 5798 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode)); 5799 else 5800 { 5801 offset = gen_reg_rtx (SImode); 5802 emit_insn (gen_mulsi3 5803 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode)))); 5804 } 5805 emit_insn (gen_cpat 5806 (mask, stack_pointer_rtx, offset, 5807 GEN_INT (GET_MODE_SIZE (imode)))); 5808 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask)); 5809} 5810 5811void 5812spu_builtin_promote (rtx ops[]) 5813{ 5814 machine_mode mode, imode; 5815 rtx rot, from, offset; 5816 HOST_WIDE_INT pos; 5817 5818 mode = GET_MODE (ops[0]); 5819 imode = GET_MODE_INNER (mode); 5820 5821 from = gen_reg_rtx (TImode); 5822 rot = spu_gen_subreg (TImode, ops[0]); 5823 5824 emit_insn (gen_spu_convert (from, ops[1])); 5825 5826 if (GET_CODE (ops[2]) == CONST_INT) 5827 { 5828 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]); 5829 if (GET_MODE_SIZE (imode) < 4) 5830 pos += 4 - GET_MODE_SIZE (imode); 5831 offset = GEN_INT (pos & 15); 5832 } 5833 else 5834 { 5835 offset = gen_reg_rtx (SImode); 5836 switch (mode) 5837 { 5838 case V16QImode: 5839 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2])); 5840 break; 5841 case V8HImode: 5842 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2])); 5843 emit_insn (gen_addsi3 (offset, offset, offset)); 5844 break; 5845 case V4SFmode: 5846 case V4SImode: 5847 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2])); 5848 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2))); 5849 break; 5850 case V2DImode: 5851 case V2DFmode: 5852 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3))); 5853 break; 5854 default: 5855 abort (); 5856 } 5857 } 5858 emit_insn (gen_rotqby_ti (rot, from, offset)); 5859} 5860 5861static void 5862spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 5863{ 5864 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 5865 rtx shuf = gen_reg_rtx (V4SImode); 5866 rtx insn = gen_reg_rtx (V4SImode); 5867 rtx shufc; 5868 rtx insnc; 5869 rtx mem; 5870 5871 fnaddr = force_reg (SImode, fnaddr); 5872 cxt = force_reg (SImode, cxt); 5873 5874 if (TARGET_LARGE_MEM) 5875 { 5876 rtx rotl = gen_reg_rtx (V4SImode); 5877 rtx mask = gen_reg_rtx (V4SImode); 5878 rtx bi = gen_reg_rtx (SImode); 5879 static unsigned char const shufa[16] = { 5880 2, 3, 0, 1, 18, 19, 16, 17, 5881 0, 1, 2, 3, 16, 17, 18, 19 5882 }; 5883 static unsigned char const insna[16] = { 5884 0x41, 0, 0, 79, 5885 0x41, 0, 0, STATIC_CHAIN_REGNUM, 5886 0x60, 0x80, 0, 79, 5887 0x60, 0x80, 0, STATIC_CHAIN_REGNUM 5888 }; 5889 5890 shufc = force_reg (TImode, array_to_constant (TImode, shufa)); 5891 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna)); 5892 5893 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc)); 5894 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7))); 5895 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7))); 5896 emit_insn (gen_selb (insn, insnc, rotl, mask)); 5897 5898 mem = adjust_address (m_tramp, V4SImode, 0); 5899 emit_move_insn (mem, insn); 5900 5901 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7))); 5902 mem = adjust_address (m_tramp, Pmode, 16); 5903 emit_move_insn (mem, bi); 5904 } 5905 else 5906 { 5907 rtx scxt = gen_reg_rtx (SImode); 5908 rtx sfnaddr = gen_reg_rtx (SImode); 5909 static unsigned char const insna[16] = { 5910 0x42, 0, 0, STATIC_CHAIN_REGNUM, 5911 0x30, 0, 0, 0, 5912 0, 0, 0, 0, 5913 0, 0, 0, 0 5914 }; 5915 5916 shufc = gen_reg_rtx (TImode); 5917 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna)); 5918 5919 /* By or'ing all of cxt with the ila opcode we are assuming cxt 5920 fits 18 bits and the last 4 are zeros. This will be true if 5921 the stack pointer is initialized to 0x3fff0 at program start, 5922 otherwise the ila instruction will be garbage. */ 5923 5924 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7))); 5925 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5))); 5926 emit_insn (gen_cpat 5927 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4))); 5928 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc)); 5929 emit_insn (gen_iorv4si3 (insn, insnc, shuf)); 5930 5931 mem = adjust_address (m_tramp, V4SImode, 0); 5932 emit_move_insn (mem, insn); 5933 } 5934 emit_insn (gen_sync ()); 5935} 5936 5937static bool 5938spu_warn_func_return (tree decl) 5939{ 5940 /* Naked functions are implemented entirely in assembly, including the 5941 return sequence, so suppress warnings about this. */ 5942 return !spu_naked_function_p (decl); 5943} 5944 5945void 5946spu_expand_sign_extend (rtx ops[]) 5947{ 5948 unsigned char arr[16]; 5949 rtx pat = gen_reg_rtx (TImode); 5950 rtx sign, c; 5951 int i, last; 5952 last = GET_MODE (ops[0]) == DImode ? 7 : 15; 5953 if (GET_MODE (ops[1]) == QImode) 5954 { 5955 sign = gen_reg_rtx (HImode); 5956 emit_insn (gen_extendqihi2 (sign, ops[1])); 5957 for (i = 0; i < 16; i++) 5958 arr[i] = 0x12; 5959 arr[last] = 0x13; 5960 } 5961 else 5962 { 5963 for (i = 0; i < 16; i++) 5964 arr[i] = 0x10; 5965 switch (GET_MODE (ops[1])) 5966 { 5967 case HImode: 5968 sign = gen_reg_rtx (SImode); 5969 emit_insn (gen_extendhisi2 (sign, ops[1])); 5970 arr[last] = 0x03; 5971 arr[last - 1] = 0x02; 5972 break; 5973 case SImode: 5974 sign = gen_reg_rtx (SImode); 5975 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31))); 5976 for (i = 0; i < 4; i++) 5977 arr[last - i] = 3 - i; 5978 break; 5979 case DImode: 5980 sign = gen_reg_rtx (SImode); 5981 c = gen_reg_rtx (SImode); 5982 emit_insn (gen_spu_convert (c, ops[1])); 5983 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31))); 5984 for (i = 0; i < 8; i++) 5985 arr[last - i] = 7 - i; 5986 break; 5987 default: 5988 abort (); 5989 } 5990 } 5991 emit_move_insn (pat, array_to_constant (TImode, arr)); 5992 emit_insn (gen_shufb (ops[0], ops[1], sign, pat)); 5993} 5994 5995/* expand vector initialization. If there are any constant parts, 5996 load constant parts first. Then load any non-constant parts. */ 5997void 5998spu_expand_vector_init (rtx target, rtx vals) 5999{ 6000 machine_mode mode = GET_MODE (target); 6001 int n_elts = GET_MODE_NUNITS (mode); 6002 int n_var = 0; 6003 bool all_same = true; 6004 rtx first, x = NULL_RTX, first_constant = NULL_RTX; 6005 int i; 6006 6007 first = XVECEXP (vals, 0, 0); 6008 for (i = 0; i < n_elts; ++i) 6009 { 6010 x = XVECEXP (vals, 0, i); 6011 if (!(CONST_INT_P (x) 6012 || GET_CODE (x) == CONST_DOUBLE 6013 || GET_CODE (x) == CONST_FIXED)) 6014 ++n_var; 6015 else 6016 { 6017 if (first_constant == NULL_RTX) 6018 first_constant = x; 6019 } 6020 if (i > 0 && !rtx_equal_p (x, first)) 6021 all_same = false; 6022 } 6023 6024 /* if all elements are the same, use splats to repeat elements */ 6025 if (all_same) 6026 { 6027 if (!CONSTANT_P (first) 6028 && !register_operand (first, GET_MODE (x))) 6029 first = force_reg (GET_MODE (first), first); 6030 emit_insn (gen_spu_splats (target, first)); 6031 return; 6032 } 6033 6034 /* load constant parts */ 6035 if (n_var != n_elts) 6036 { 6037 if (n_var == 0) 6038 { 6039 emit_move_insn (target, 6040 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 6041 } 6042 else 6043 { 6044 rtx constant_parts_rtx = copy_rtx (vals); 6045 6046 gcc_assert (first_constant != NULL_RTX); 6047 /* fill empty slots with the first constant, this increases 6048 our chance of using splats in the recursive call below. */ 6049 for (i = 0; i < n_elts; ++i) 6050 { 6051 x = XVECEXP (constant_parts_rtx, 0, i); 6052 if (!(CONST_INT_P (x) 6053 || GET_CODE (x) == CONST_DOUBLE 6054 || GET_CODE (x) == CONST_FIXED)) 6055 XVECEXP (constant_parts_rtx, 0, i) = first_constant; 6056 } 6057 6058 spu_expand_vector_init (target, constant_parts_rtx); 6059 } 6060 } 6061 6062 /* load variable parts */ 6063 if (n_var != 0) 6064 { 6065 rtx insert_operands[4]; 6066 6067 insert_operands[0] = target; 6068 insert_operands[2] = target; 6069 for (i = 0; i < n_elts; ++i) 6070 { 6071 x = XVECEXP (vals, 0, i); 6072 if (!(CONST_INT_P (x) 6073 || GET_CODE (x) == CONST_DOUBLE 6074 || GET_CODE (x) == CONST_FIXED)) 6075 { 6076 if (!register_operand (x, GET_MODE (x))) 6077 x = force_reg (GET_MODE (x), x); 6078 insert_operands[1] = x; 6079 insert_operands[3] = GEN_INT (i); 6080 spu_builtin_insert (insert_operands); 6081 } 6082 } 6083 } 6084} 6085 6086/* Return insn index for the vector compare instruction for given CODE, 6087 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */ 6088 6089static int 6090get_vec_cmp_insn (enum rtx_code code, 6091 machine_mode dest_mode, 6092 machine_mode op_mode) 6093 6094{ 6095 switch (code) 6096 { 6097 case EQ: 6098 if (dest_mode == V16QImode && op_mode == V16QImode) 6099 return CODE_FOR_ceq_v16qi; 6100 if (dest_mode == V8HImode && op_mode == V8HImode) 6101 return CODE_FOR_ceq_v8hi; 6102 if (dest_mode == V4SImode && op_mode == V4SImode) 6103 return CODE_FOR_ceq_v4si; 6104 if (dest_mode == V4SImode && op_mode == V4SFmode) 6105 return CODE_FOR_ceq_v4sf; 6106 if (dest_mode == V2DImode && op_mode == V2DFmode) 6107 return CODE_FOR_ceq_v2df; 6108 break; 6109 case GT: 6110 if (dest_mode == V16QImode && op_mode == V16QImode) 6111 return CODE_FOR_cgt_v16qi; 6112 if (dest_mode == V8HImode && op_mode == V8HImode) 6113 return CODE_FOR_cgt_v8hi; 6114 if (dest_mode == V4SImode && op_mode == V4SImode) 6115 return CODE_FOR_cgt_v4si; 6116 if (dest_mode == V4SImode && op_mode == V4SFmode) 6117 return CODE_FOR_cgt_v4sf; 6118 if (dest_mode == V2DImode && op_mode == V2DFmode) 6119 return CODE_FOR_cgt_v2df; 6120 break; 6121 case GTU: 6122 if (dest_mode == V16QImode && op_mode == V16QImode) 6123 return CODE_FOR_clgt_v16qi; 6124 if (dest_mode == V8HImode && op_mode == V8HImode) 6125 return CODE_FOR_clgt_v8hi; 6126 if (dest_mode == V4SImode && op_mode == V4SImode) 6127 return CODE_FOR_clgt_v4si; 6128 break; 6129 default: 6130 break; 6131 } 6132 return -1; 6133} 6134 6135/* Emit vector compare for operands OP0 and OP1 using code RCODE. 6136 DMODE is expected destination mode. This is a recursive function. */ 6137 6138static rtx 6139spu_emit_vector_compare (enum rtx_code rcode, 6140 rtx op0, rtx op1, 6141 machine_mode dmode) 6142{ 6143 int vec_cmp_insn; 6144 rtx mask; 6145 machine_mode dest_mode; 6146 machine_mode op_mode = GET_MODE (op1); 6147 6148 gcc_assert (GET_MODE (op0) == GET_MODE (op1)); 6149 6150 /* Floating point vector compare instructions uses destination V4SImode. 6151 Double floating point vector compare instructions uses destination V2DImode. 6152 Move destination to appropriate mode later. */ 6153 if (dmode == V4SFmode) 6154 dest_mode = V4SImode; 6155 else if (dmode == V2DFmode) 6156 dest_mode = V2DImode; 6157 else 6158 dest_mode = dmode; 6159 6160 mask = gen_reg_rtx (dest_mode); 6161 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode); 6162 6163 if (vec_cmp_insn == -1) 6164 { 6165 bool swap_operands = false; 6166 bool try_again = false; 6167 switch (rcode) 6168 { 6169 case LT: 6170 rcode = GT; 6171 swap_operands = true; 6172 try_again = true; 6173 break; 6174 case LTU: 6175 rcode = GTU; 6176 swap_operands = true; 6177 try_again = true; 6178 break; 6179 case NE: 6180 case UNEQ: 6181 case UNLE: 6182 case UNLT: 6183 case UNGE: 6184 case UNGT: 6185 case UNORDERED: 6186 /* Treat A != B as ~(A==B). */ 6187 { 6188 enum rtx_code rev_code; 6189 enum insn_code nor_code; 6190 rtx rev_mask; 6191 6192 rev_code = reverse_condition_maybe_unordered (rcode); 6193 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode); 6194 6195 nor_code = optab_handler (one_cmpl_optab, dest_mode); 6196 gcc_assert (nor_code != CODE_FOR_nothing); 6197 emit_insn (GEN_FCN (nor_code) (mask, rev_mask)); 6198 if (dmode != dest_mode) 6199 { 6200 rtx temp = gen_reg_rtx (dest_mode); 6201 convert_move (temp, mask, 0); 6202 return temp; 6203 } 6204 return mask; 6205 } 6206 break; 6207 case GE: 6208 case GEU: 6209 case LE: 6210 case LEU: 6211 /* Try GT/GTU/LT/LTU OR EQ */ 6212 { 6213 rtx c_rtx, eq_rtx; 6214 enum insn_code ior_code; 6215 enum rtx_code new_code; 6216 6217 switch (rcode) 6218 { 6219 case GE: new_code = GT; break; 6220 case GEU: new_code = GTU; break; 6221 case LE: new_code = LT; break; 6222 case LEU: new_code = LTU; break; 6223 default: 6224 gcc_unreachable (); 6225 } 6226 6227 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode); 6228 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode); 6229 6230 ior_code = optab_handler (ior_optab, dest_mode); 6231 gcc_assert (ior_code != CODE_FOR_nothing); 6232 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); 6233 if (dmode != dest_mode) 6234 { 6235 rtx temp = gen_reg_rtx (dest_mode); 6236 convert_move (temp, mask, 0); 6237 return temp; 6238 } 6239 return mask; 6240 } 6241 break; 6242 case LTGT: 6243 /* Try LT OR GT */ 6244 { 6245 rtx lt_rtx, gt_rtx; 6246 enum insn_code ior_code; 6247 6248 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode); 6249 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode); 6250 6251 ior_code = optab_handler (ior_optab, dest_mode); 6252 gcc_assert (ior_code != CODE_FOR_nothing); 6253 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx)); 6254 if (dmode != dest_mode) 6255 { 6256 rtx temp = gen_reg_rtx (dest_mode); 6257 convert_move (temp, mask, 0); 6258 return temp; 6259 } 6260 return mask; 6261 } 6262 break; 6263 case ORDERED: 6264 /* Implement as (A==A) & (B==B) */ 6265 { 6266 rtx a_rtx, b_rtx; 6267 enum insn_code and_code; 6268 6269 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode); 6270 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode); 6271 6272 and_code = optab_handler (and_optab, dest_mode); 6273 gcc_assert (and_code != CODE_FOR_nothing); 6274 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx)); 6275 if (dmode != dest_mode) 6276 { 6277 rtx temp = gen_reg_rtx (dest_mode); 6278 convert_move (temp, mask, 0); 6279 return temp; 6280 } 6281 return mask; 6282 } 6283 break; 6284 default: 6285 gcc_unreachable (); 6286 } 6287 6288 /* You only get two chances. */ 6289 if (try_again) 6290 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode); 6291 6292 gcc_assert (vec_cmp_insn != -1); 6293 6294 if (swap_operands) 6295 { 6296 rtx tmp; 6297 tmp = op0; 6298 op0 = op1; 6299 op1 = tmp; 6300 } 6301 } 6302 6303 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1)); 6304 if (dmode != dest_mode) 6305 { 6306 rtx temp = gen_reg_rtx (dest_mode); 6307 convert_move (temp, mask, 0); 6308 return temp; 6309 } 6310 return mask; 6311} 6312 6313 6314/* Emit vector conditional expression. 6315 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands. 6316 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */ 6317 6318int 6319spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2, 6320 rtx cond, rtx cc_op0, rtx cc_op1) 6321{ 6322 machine_mode dest_mode = GET_MODE (dest); 6323 enum rtx_code rcode = GET_CODE (cond); 6324 rtx mask; 6325 6326 /* Get the vector mask for the given relational operations. */ 6327 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode); 6328 6329 emit_insn(gen_selb (dest, op2, op1, mask)); 6330 6331 return 1; 6332} 6333 6334static rtx 6335spu_force_reg (machine_mode mode, rtx op) 6336{ 6337 rtx x, r; 6338 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode) 6339 { 6340 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT) 6341 || GET_MODE (op) == BLKmode) 6342 return force_reg (mode, convert_to_mode (mode, op, 0)); 6343 abort (); 6344 } 6345 6346 r = force_reg (GET_MODE (op), op); 6347 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode)) 6348 { 6349 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0); 6350 if (x) 6351 return x; 6352 } 6353 6354 x = gen_reg_rtx (mode); 6355 emit_insn (gen_spu_convert (x, r)); 6356 return x; 6357} 6358 6359static void 6360spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p) 6361{ 6362 HOST_WIDE_INT v = 0; 6363 int lsbits; 6364 /* Check the range of immediate operands. */ 6365 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18) 6366 { 6367 int range = p - SPU_BTI_7; 6368 6369 if (!CONSTANT_P (op)) 6370 error ("%s expects an integer literal in the range [%d, %d]", 6371 d->name, 6372 spu_builtin_range[range].low, spu_builtin_range[range].high); 6373 6374 if (GET_CODE (op) == CONST 6375 && (GET_CODE (XEXP (op, 0)) == PLUS 6376 || GET_CODE (XEXP (op, 0)) == MINUS)) 6377 { 6378 v = INTVAL (XEXP (XEXP (op, 0), 1)); 6379 op = XEXP (XEXP (op, 0), 0); 6380 } 6381 else if (GET_CODE (op) == CONST_INT) 6382 v = INTVAL (op); 6383 else if (GET_CODE (op) == CONST_VECTOR 6384 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT) 6385 v = INTVAL (CONST_VECTOR_ELT (op, 0)); 6386 6387 /* The default for v is 0 which is valid in every range. */ 6388 if (v < spu_builtin_range[range].low 6389 || v > spu_builtin_range[range].high) 6390 error ("%s expects an integer literal in the range [%d, %d]. (%wd)", 6391 d->name, 6392 spu_builtin_range[range].low, spu_builtin_range[range].high, 6393 v); 6394 6395 switch (p) 6396 { 6397 case SPU_BTI_S10_4: 6398 lsbits = 4; 6399 break; 6400 case SPU_BTI_U16_2: 6401 /* This is only used in lqa, and stqa. Even though the insns 6402 encode 16 bits of the address (all but the 2 least 6403 significant), only 14 bits are used because it is masked to 6404 be 16 byte aligned. */ 6405 lsbits = 4; 6406 break; 6407 case SPU_BTI_S16_2: 6408 /* This is used for lqr and stqr. */ 6409 lsbits = 2; 6410 break; 6411 default: 6412 lsbits = 0; 6413 } 6414 6415 if (GET_CODE (op) == LABEL_REF 6416 || (GET_CODE (op) == SYMBOL_REF 6417 && SYMBOL_REF_FUNCTION_P (op)) 6418 || (v & ((1 << lsbits) - 1)) != 0) 6419 warning (0, "%d least significant bits of %s are ignored", lsbits, 6420 d->name); 6421 } 6422} 6423 6424 6425static int 6426expand_builtin_args (struct spu_builtin_description *d, tree exp, 6427 rtx target, rtx ops[]) 6428{ 6429 enum insn_code icode = (enum insn_code) d->icode; 6430 int i = 0, a; 6431 6432 /* Expand the arguments into rtl. */ 6433 6434 if (d->parm[0] != SPU_BTI_VOID) 6435 ops[i++] = target; 6436 6437 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++) 6438 { 6439 tree arg = CALL_EXPR_ARG (exp, a); 6440 if (arg == 0) 6441 abort (); 6442 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL); 6443 } 6444 6445 gcc_assert (i == insn_data[icode].n_generator_args); 6446 return i; 6447} 6448 6449static rtx 6450spu_expand_builtin_1 (struct spu_builtin_description *d, 6451 tree exp, rtx target) 6452{ 6453 rtx pat; 6454 rtx ops[8]; 6455 enum insn_code icode = (enum insn_code) d->icode; 6456 machine_mode mode, tmode; 6457 int i, p; 6458 int n_operands; 6459 tree return_type; 6460 6461 /* Set up ops[] with values from arglist. */ 6462 n_operands = expand_builtin_args (d, exp, target, ops); 6463 6464 /* Handle the target operand which must be operand 0. */ 6465 i = 0; 6466 if (d->parm[0] != SPU_BTI_VOID) 6467 { 6468 6469 /* We prefer the mode specified for the match_operand otherwise 6470 use the mode from the builtin function prototype. */ 6471 tmode = insn_data[d->icode].operand[0].mode; 6472 if (tmode == VOIDmode) 6473 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]); 6474 6475 /* Try to use target because not using it can lead to extra copies 6476 and when we are using all of the registers extra copies leads 6477 to extra spills. */ 6478 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode) 6479 ops[0] = target; 6480 else 6481 target = ops[0] = gen_reg_rtx (tmode); 6482 6483 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode)) 6484 abort (); 6485 6486 i++; 6487 } 6488 6489 if (d->fcode == SPU_MASK_FOR_LOAD) 6490 { 6491 machine_mode mode = insn_data[icode].operand[1].mode; 6492 tree arg; 6493 rtx addr, op, pat; 6494 6495 /* get addr */ 6496 arg = CALL_EXPR_ARG (exp, 0); 6497 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg))); 6498 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL); 6499 addr = memory_address (mode, op); 6500 6501 /* negate addr */ 6502 op = gen_reg_rtx (GET_MODE (addr)); 6503 emit_insn (gen_rtx_SET (VOIDmode, op, 6504 gen_rtx_NEG (GET_MODE (addr), addr))); 6505 op = gen_rtx_MEM (mode, op); 6506 6507 pat = GEN_FCN (icode) (target, op); 6508 if (!pat) 6509 return 0; 6510 emit_insn (pat); 6511 return target; 6512 } 6513 6514 /* Ignore align_hint, but still expand it's args in case they have 6515 side effects. */ 6516 if (icode == CODE_FOR_spu_align_hint) 6517 return 0; 6518 6519 /* Handle the rest of the operands. */ 6520 for (p = 1; i < n_operands; i++, p++) 6521 { 6522 if (insn_data[d->icode].operand[i].mode != VOIDmode) 6523 mode = insn_data[d->icode].operand[i].mode; 6524 else 6525 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]); 6526 6527 /* mode can be VOIDmode here for labels */ 6528 6529 /* For specific intrinsics with an immediate operand, e.g., 6530 si_ai(), we sometimes need to convert the scalar argument to a 6531 vector argument by splatting the scalar. */ 6532 if (VECTOR_MODE_P (mode) 6533 && (GET_CODE (ops[i]) == CONST_INT 6534 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT 6535 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT)) 6536 { 6537 if (GET_CODE (ops[i]) == CONST_INT) 6538 ops[i] = spu_const (mode, INTVAL (ops[i])); 6539 else 6540 { 6541 rtx reg = gen_reg_rtx (mode); 6542 machine_mode imode = GET_MODE_INNER (mode); 6543 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i]))) 6544 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]); 6545 if (imode != GET_MODE (ops[i])) 6546 ops[i] = convert_to_mode (imode, ops[i], 6547 TYPE_UNSIGNED (spu_builtin_types 6548 [d->parm[i]])); 6549 emit_insn (gen_spu_splats (reg, ops[i])); 6550 ops[i] = reg; 6551 } 6552 } 6553 6554 spu_check_builtin_parm (d, ops[i], d->parm[p]); 6555 6556 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode)) 6557 ops[i] = spu_force_reg (mode, ops[i]); 6558 } 6559 6560 switch (n_operands) 6561 { 6562 case 0: 6563 pat = GEN_FCN (icode) (0); 6564 break; 6565 case 1: 6566 pat = GEN_FCN (icode) (ops[0]); 6567 break; 6568 case 2: 6569 pat = GEN_FCN (icode) (ops[0], ops[1]); 6570 break; 6571 case 3: 6572 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]); 6573 break; 6574 case 4: 6575 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]); 6576 break; 6577 case 5: 6578 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]); 6579 break; 6580 case 6: 6581 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]); 6582 break; 6583 default: 6584 abort (); 6585 } 6586 6587 if (!pat) 6588 abort (); 6589 6590 if (d->type == B_CALL || d->type == B_BISLED) 6591 emit_call_insn (pat); 6592 else if (d->type == B_JUMP) 6593 { 6594 emit_jump_insn (pat); 6595 emit_barrier (); 6596 } 6597 else 6598 emit_insn (pat); 6599 6600 return_type = spu_builtin_types[d->parm[0]]; 6601 if (d->parm[0] != SPU_BTI_VOID 6602 && GET_MODE (target) != TYPE_MODE (return_type)) 6603 { 6604 /* target is the return value. It should always be the mode of 6605 the builtin function prototype. */ 6606 target = spu_force_reg (TYPE_MODE (return_type), target); 6607 } 6608 6609 return target; 6610} 6611 6612rtx 6613spu_expand_builtin (tree exp, 6614 rtx target, 6615 rtx subtarget ATTRIBUTE_UNUSED, 6616 machine_mode mode ATTRIBUTE_UNUSED, 6617 int ignore ATTRIBUTE_UNUSED) 6618{ 6619 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 6620 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 6621 struct spu_builtin_description *d; 6622 6623 if (fcode < NUM_SPU_BUILTINS) 6624 { 6625 d = &spu_builtins[fcode]; 6626 6627 return spu_expand_builtin_1 (d, exp, target); 6628 } 6629 abort (); 6630} 6631 6632/* Implement targetm.vectorize.builtin_mask_for_load. */ 6633static tree 6634spu_builtin_mask_for_load (void) 6635{ 6636 return spu_builtin_decls[SPU_MASK_FOR_LOAD]; 6637} 6638 6639/* Implement targetm.vectorize.builtin_vectorization_cost. */ 6640static int 6641spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, 6642 tree vectype, 6643 int misalign ATTRIBUTE_UNUSED) 6644{ 6645 unsigned elements; 6646 6647 switch (type_of_cost) 6648 { 6649 case scalar_stmt: 6650 case vector_stmt: 6651 case vector_load: 6652 case vector_store: 6653 case vec_to_scalar: 6654 case scalar_to_vec: 6655 case cond_branch_not_taken: 6656 case vec_perm: 6657 case vec_promote_demote: 6658 return 1; 6659 6660 case scalar_store: 6661 return 10; 6662 6663 case scalar_load: 6664 /* Load + rotate. */ 6665 return 2; 6666 6667 case unaligned_load: 6668 return 2; 6669 6670 case cond_branch_taken: 6671 return 6; 6672 6673 case vec_construct: 6674 elements = TYPE_VECTOR_SUBPARTS (vectype); 6675 return elements / 2 + 1; 6676 6677 default: 6678 gcc_unreachable (); 6679 } 6680} 6681 6682/* Implement targetm.vectorize.init_cost. */ 6683 6684static void * 6685spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED) 6686{ 6687 unsigned *cost = XNEWVEC (unsigned, 3); 6688 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0; 6689 return cost; 6690} 6691 6692/* Implement targetm.vectorize.add_stmt_cost. */ 6693 6694static unsigned 6695spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, 6696 struct _stmt_vec_info *stmt_info, int misalign, 6697 enum vect_cost_model_location where) 6698{ 6699 unsigned *cost = (unsigned *) data; 6700 unsigned retval = 0; 6701 6702 if (flag_vect_cost_model) 6703 { 6704 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; 6705 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign); 6706 6707 /* Statements in an inner loop relative to the loop being 6708 vectorized are weighted more heavily. The value here is 6709 arbitrary and could potentially be improved with analysis. */ 6710 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) 6711 count *= 50; /* FIXME. */ 6712 6713 retval = (unsigned) (count * stmt_cost); 6714 cost[where] += retval; 6715 } 6716 6717 return retval; 6718} 6719 6720/* Implement targetm.vectorize.finish_cost. */ 6721 6722static void 6723spu_finish_cost (void *data, unsigned *prologue_cost, 6724 unsigned *body_cost, unsigned *epilogue_cost) 6725{ 6726 unsigned *cost = (unsigned *) data; 6727 *prologue_cost = cost[vect_prologue]; 6728 *body_cost = cost[vect_body]; 6729 *epilogue_cost = cost[vect_epilogue]; 6730} 6731 6732/* Implement targetm.vectorize.destroy_cost_data. */ 6733 6734static void 6735spu_destroy_cost_data (void *data) 6736{ 6737 free (data); 6738} 6739 6740/* Return true iff, data reference of TYPE can reach vector alignment (16) 6741 after applying N number of iterations. This routine does not determine 6742 how may iterations are required to reach desired alignment. */ 6743 6744static bool 6745spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed) 6746{ 6747 if (is_packed) 6748 return false; 6749 6750 /* All other types are naturally aligned. */ 6751 return true; 6752} 6753 6754/* Return the appropriate mode for a named address pointer. */ 6755static machine_mode 6756spu_addr_space_pointer_mode (addr_space_t addrspace) 6757{ 6758 switch (addrspace) 6759 { 6760 case ADDR_SPACE_GENERIC: 6761 return ptr_mode; 6762 case ADDR_SPACE_EA: 6763 return EAmode; 6764 default: 6765 gcc_unreachable (); 6766 } 6767} 6768 6769/* Return the appropriate mode for a named address address. */ 6770static machine_mode 6771spu_addr_space_address_mode (addr_space_t addrspace) 6772{ 6773 switch (addrspace) 6774 { 6775 case ADDR_SPACE_GENERIC: 6776 return Pmode; 6777 case ADDR_SPACE_EA: 6778 return EAmode; 6779 default: 6780 gcc_unreachable (); 6781 } 6782} 6783 6784/* Determine if one named address space is a subset of another. */ 6785 6786static bool 6787spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset) 6788{ 6789 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA); 6790 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA); 6791 6792 if (subset == superset) 6793 return true; 6794 6795 /* If we have -mno-address-space-conversion, treat __ea and generic as not 6796 being subsets but instead as disjoint address spaces. */ 6797 else if (!TARGET_ADDRESS_SPACE_CONVERSION) 6798 return false; 6799 6800 else 6801 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA); 6802} 6803 6804/* Convert from one address space to another. */ 6805static rtx 6806spu_addr_space_convert (rtx op, tree from_type, tree to_type) 6807{ 6808 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type)); 6809 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type)); 6810 6811 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA); 6812 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA); 6813 6814 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA) 6815 { 6816 rtx result, ls; 6817 6818 ls = gen_const_mem (DImode, 6819 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store")); 6820 set_mem_align (ls, 128); 6821 6822 result = gen_reg_rtx (Pmode); 6823 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1)); 6824 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1)); 6825 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode, 6826 ls, const0_rtx, Pmode, 1); 6827 6828 emit_insn (gen_subsi3 (result, op, ls)); 6829 6830 return result; 6831 } 6832 6833 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC) 6834 { 6835 rtx result, ls; 6836 6837 ls = gen_const_mem (DImode, 6838 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store")); 6839 set_mem_align (ls, 128); 6840 6841 result = gen_reg_rtx (EAmode); 6842 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1)); 6843 op = force_reg (Pmode, op); 6844 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode, 6845 ls, const0_rtx, EAmode, 1); 6846 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1)); 6847 6848 if (EAmode == SImode) 6849 emit_insn (gen_addsi3 (result, op, ls)); 6850 else 6851 emit_insn (gen_adddi3 (result, op, ls)); 6852 6853 return result; 6854 } 6855 6856 else 6857 gcc_unreachable (); 6858} 6859 6860 6861/* Count the total number of instructions in each pipe and return the 6862 maximum, which is used as the Minimum Iteration Interval (MII) 6863 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1. 6864 -2 are instructions that can go in pipe0 or pipe1. */ 6865static int 6866spu_sms_res_mii (struct ddg *g) 6867{ 6868 int i; 6869 unsigned t[4] = {0, 0, 0, 0}; 6870 6871 for (i = 0; i < g->num_nodes; i++) 6872 { 6873 rtx_insn *insn = g->nodes[i].insn; 6874 int p = get_pipe (insn) + 2; 6875 6876 gcc_assert (p >= 0); 6877 gcc_assert (p < 4); 6878 6879 t[p]++; 6880 if (dump_file && INSN_P (insn)) 6881 fprintf (dump_file, "i%d %s %d %d\n", 6882 INSN_UID (insn), 6883 insn_data[INSN_CODE(insn)].name, 6884 p, t[p]); 6885 } 6886 if (dump_file) 6887 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]); 6888 6889 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3])); 6890} 6891 6892 6893void 6894spu_init_expanders (void) 6895{ 6896 if (cfun) 6897 { 6898 rtx r0, r1; 6899 /* HARD_FRAME_REGISTER is only 128 bit aligned when 6900 frame_pointer_needed is true. We don't know that until we're 6901 expanding the prologue. */ 6902 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8; 6903 6904 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and 6905 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them 6906 to be treated as aligned, so generate them here. */ 6907 r0 = gen_reg_rtx (SImode); 6908 r1 = gen_reg_rtx (SImode); 6909 mark_reg_pointer (r0, 128); 6910 mark_reg_pointer (r1, 128); 6911 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1 6912 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2); 6913 } 6914} 6915 6916static machine_mode 6917spu_libgcc_cmp_return_mode (void) 6918{ 6919 6920/* For SPU word mode is TI mode so it is better to use SImode 6921 for compare returns. */ 6922 return SImode; 6923} 6924 6925static machine_mode 6926spu_libgcc_shift_count_mode (void) 6927{ 6928/* For SPU word mode is TI mode so it is better to use SImode 6929 for shift counts. */ 6930 return SImode; 6931} 6932 6933/* Implement targetm.section_type_flags. */ 6934static unsigned int 6935spu_section_type_flags (tree decl, const char *name, int reloc) 6936{ 6937 /* .toe needs to have type @nobits. */ 6938 if (strcmp (name, ".toe") == 0) 6939 return SECTION_BSS; 6940 /* Don't load _ea into the current address space. */ 6941 if (strcmp (name, "._ea") == 0) 6942 return SECTION_WRITE | SECTION_DEBUG; 6943 return default_section_type_flags (decl, name, reloc); 6944} 6945 6946/* Implement targetm.select_section. */ 6947static section * 6948spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align) 6949{ 6950 /* Variables and constants defined in the __ea address space 6951 go into a special section named "._ea". */ 6952 if (TREE_TYPE (decl) != error_mark_node 6953 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA) 6954 { 6955 /* We might get called with string constants, but get_named_section 6956 doesn't like them as they are not DECLs. Also, we need to set 6957 flags in that case. */ 6958 if (!DECL_P (decl)) 6959 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL); 6960 6961 return get_named_section (decl, "._ea", reloc); 6962 } 6963 6964 return default_elf_select_section (decl, reloc, align); 6965} 6966 6967/* Implement targetm.unique_section. */ 6968static void 6969spu_unique_section (tree decl, int reloc) 6970{ 6971 /* We don't support unique section names in the __ea address 6972 space for now. */ 6973 if (TREE_TYPE (decl) != error_mark_node 6974 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0) 6975 return; 6976 6977 default_unique_section (decl, reloc); 6978} 6979 6980/* Generate a constant or register which contains 2^SCALE. We assume 6981 the result is valid for MODE. Currently, MODE must be V4SFmode and 6982 SCALE must be SImode. */ 6983rtx 6984spu_gen_exp2 (machine_mode mode, rtx scale) 6985{ 6986 gcc_assert (mode == V4SFmode); 6987 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT); 6988 if (GET_CODE (scale) != CONST_INT) 6989 { 6990 /* unsigned int exp = (127 + scale) << 23; 6991 __vector float m = (__vector float) spu_splats (exp); */ 6992 rtx reg = force_reg (SImode, scale); 6993 rtx exp = gen_reg_rtx (SImode); 6994 rtx mul = gen_reg_rtx (mode); 6995 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127))); 6996 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23))); 6997 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0))); 6998 return mul; 6999 } 7000 else 7001 { 7002 HOST_WIDE_INT exp = 127 + INTVAL (scale); 7003 unsigned char arr[16]; 7004 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1; 7005 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7; 7006 arr[2] = arr[6] = arr[10] = arr[14] = 0; 7007 arr[3] = arr[7] = arr[11] = arr[15] = 0; 7008 return array_to_constant (mode, arr); 7009 } 7010} 7011 7012/* After reload, just change the convert into a move instruction 7013 or a dead instruction. */ 7014void 7015spu_split_convert (rtx ops[]) 7016{ 7017 if (REGNO (ops[0]) == REGNO (ops[1])) 7018 emit_note (NOTE_INSN_DELETED); 7019 else 7020 { 7021 /* Use TImode always as this might help hard reg copyprop. */ 7022 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0])); 7023 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1])); 7024 emit_insn (gen_move_insn (op0, op1)); 7025 } 7026} 7027 7028void 7029spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED) 7030{ 7031 fprintf (file, "# profile\n"); 7032 fprintf (file, "brsl $75, _mcount\n"); 7033} 7034 7035/* Implement targetm.ref_may_alias_errno. */ 7036static bool 7037spu_ref_may_alias_errno (ao_ref *ref) 7038{ 7039 tree base = ao_ref_base (ref); 7040 7041 /* With SPU newlib, errno is defined as something like 7042 _impure_data._errno 7043 The default implementation of this target macro does not 7044 recognize such expressions, so special-code for it here. */ 7045 7046 if (TREE_CODE (base) == VAR_DECL 7047 && !TREE_STATIC (base) 7048 && DECL_EXTERNAL (base) 7049 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE 7050 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)), 7051 "_impure_data") == 0 7052 /* _errno is the first member of _impure_data. */ 7053 && ref->offset == 0) 7054 return true; 7055 7056 return default_ref_may_alias_errno (ref); 7057} 7058 7059/* Output thunk to FILE that implements a C++ virtual function call (with 7060 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer 7061 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment 7062 stored at VCALL_OFFSET in the vtable whose address is located at offset 0 7063 relative to the resulting this pointer. */ 7064 7065static void 7066spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, 7067 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 7068 tree function) 7069{ 7070 rtx op[8]; 7071 7072 /* Make sure unwind info is emitted for the thunk if needed. */ 7073 final_start_function (emit_barrier (), file, 1); 7074 7075 /* Operand 0 is the target function. */ 7076 op[0] = XEXP (DECL_RTL (function), 0); 7077 7078 /* Operand 1 is the 'this' pointer. */ 7079 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 7080 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1); 7081 else 7082 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM); 7083 7084 /* Operands 2/3 are the low/high halfwords of delta. */ 7085 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode)); 7086 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode)); 7087 7088 /* Operands 4/5 are the low/high halfwords of vcall_offset. */ 7089 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode)); 7090 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode)); 7091 7092 /* Operands 6/7 are temporary registers. */ 7093 op[6] = gen_rtx_REG (Pmode, 79); 7094 op[7] = gen_rtx_REG (Pmode, 78); 7095 7096 /* Add DELTA to this pointer. */ 7097 if (delta) 7098 { 7099 if (delta >= -0x200 && delta < 0x200) 7100 output_asm_insn ("ai\t%1,%1,%2", op); 7101 else if (delta >= -0x8000 && delta < 0x8000) 7102 { 7103 output_asm_insn ("il\t%6,%2", op); 7104 output_asm_insn ("a\t%1,%1,%6", op); 7105 } 7106 else 7107 { 7108 output_asm_insn ("ilhu\t%6,%3", op); 7109 output_asm_insn ("iohl\t%6,%2", op); 7110 output_asm_insn ("a\t%1,%1,%6", op); 7111 } 7112 } 7113 7114 /* Perform vcall adjustment. */ 7115 if (vcall_offset) 7116 { 7117 output_asm_insn ("lqd\t%7,0(%1)", op); 7118 output_asm_insn ("rotqby\t%7,%7,%1", op); 7119 7120 if (vcall_offset >= -0x200 && vcall_offset < 0x200) 7121 output_asm_insn ("ai\t%7,%7,%4", op); 7122 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000) 7123 { 7124 output_asm_insn ("il\t%6,%4", op); 7125 output_asm_insn ("a\t%7,%7,%6", op); 7126 } 7127 else 7128 { 7129 output_asm_insn ("ilhu\t%6,%5", op); 7130 output_asm_insn ("iohl\t%6,%4", op); 7131 output_asm_insn ("a\t%7,%7,%6", op); 7132 } 7133 7134 output_asm_insn ("lqd\t%6,0(%7)", op); 7135 output_asm_insn ("rotqby\t%6,%6,%7", op); 7136 output_asm_insn ("a\t%1,%1,%6", op); 7137 } 7138 7139 /* Jump to target. */ 7140 output_asm_insn ("br\t%0", op); 7141 7142 final_end_function (); 7143} 7144 7145/* Canonicalize a comparison from one we don't have to one we do have. */ 7146static void 7147spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1, 7148 bool op0_preserve_value) 7149{ 7150 if (!op0_preserve_value 7151 && (*code == LE || *code == LT || *code == LEU || *code == LTU)) 7152 { 7153 rtx tem = *op0; 7154 *op0 = *op1; 7155 *op1 = tem; 7156 *code = (int)swap_condition ((enum rtx_code)*code); 7157 } 7158} 7159 7160/* Table of machine attributes. */ 7161static const struct attribute_spec spu_attribute_table[] = 7162{ 7163 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, 7164 affects_type_identity } */ 7165 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute, 7166 false }, 7167 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute, 7168 false }, 7169 { NULL, 0, 0, false, false, false, NULL, false } 7170}; 7171 7172/* TARGET overrides. */ 7173 7174#undef TARGET_ADDR_SPACE_POINTER_MODE 7175#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode 7176 7177#undef TARGET_ADDR_SPACE_ADDRESS_MODE 7178#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode 7179 7180#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P 7181#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \ 7182 spu_addr_space_legitimate_address_p 7183 7184#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS 7185#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address 7186 7187#undef TARGET_ADDR_SPACE_SUBSET_P 7188#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p 7189 7190#undef TARGET_ADDR_SPACE_CONVERT 7191#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert 7192 7193#undef TARGET_INIT_BUILTINS 7194#define TARGET_INIT_BUILTINS spu_init_builtins 7195#undef TARGET_BUILTIN_DECL 7196#define TARGET_BUILTIN_DECL spu_builtin_decl 7197 7198#undef TARGET_EXPAND_BUILTIN 7199#define TARGET_EXPAND_BUILTIN spu_expand_builtin 7200 7201#undef TARGET_UNWIND_WORD_MODE 7202#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode 7203 7204#undef TARGET_LEGITIMIZE_ADDRESS 7205#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address 7206 7207/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long 7208 and .quad for the debugger. When it is known that the assembler is fixed, 7209 these can be removed. */ 7210#undef TARGET_ASM_UNALIGNED_SI_OP 7211#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t" 7212 7213#undef TARGET_ASM_ALIGNED_DI_OP 7214#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 7215 7216/* The .8byte directive doesn't seem to work well for a 32 bit 7217 architecture. */ 7218#undef TARGET_ASM_UNALIGNED_DI_OP 7219#define TARGET_ASM_UNALIGNED_DI_OP NULL 7220 7221#undef TARGET_RTX_COSTS 7222#define TARGET_RTX_COSTS spu_rtx_costs 7223 7224#undef TARGET_ADDRESS_COST 7225#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 7226 7227#undef TARGET_SCHED_ISSUE_RATE 7228#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate 7229 7230#undef TARGET_SCHED_INIT_GLOBAL 7231#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global 7232 7233#undef TARGET_SCHED_INIT 7234#define TARGET_SCHED_INIT spu_sched_init 7235 7236#undef TARGET_SCHED_VARIABLE_ISSUE 7237#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue 7238 7239#undef TARGET_SCHED_REORDER 7240#define TARGET_SCHED_REORDER spu_sched_reorder 7241 7242#undef TARGET_SCHED_REORDER2 7243#define TARGET_SCHED_REORDER2 spu_sched_reorder 7244 7245#undef TARGET_SCHED_ADJUST_COST 7246#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost 7247 7248#undef TARGET_ATTRIBUTE_TABLE 7249#define TARGET_ATTRIBUTE_TABLE spu_attribute_table 7250 7251#undef TARGET_ASM_INTEGER 7252#define TARGET_ASM_INTEGER spu_assemble_integer 7253 7254#undef TARGET_SCALAR_MODE_SUPPORTED_P 7255#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p 7256 7257#undef TARGET_VECTOR_MODE_SUPPORTED_P 7258#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p 7259 7260#undef TARGET_FUNCTION_OK_FOR_SIBCALL 7261#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall 7262 7263#undef TARGET_ASM_GLOBALIZE_LABEL 7264#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label 7265 7266#undef TARGET_PASS_BY_REFERENCE 7267#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference 7268 7269#undef TARGET_FUNCTION_ARG 7270#define TARGET_FUNCTION_ARG spu_function_arg 7271 7272#undef TARGET_FUNCTION_ARG_ADVANCE 7273#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance 7274 7275#undef TARGET_MUST_PASS_IN_STACK 7276#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 7277 7278#undef TARGET_BUILD_BUILTIN_VA_LIST 7279#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list 7280 7281#undef TARGET_EXPAND_BUILTIN_VA_START 7282#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start 7283 7284#undef TARGET_SETUP_INCOMING_VARARGS 7285#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs 7286 7287#undef TARGET_MACHINE_DEPENDENT_REORG 7288#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg 7289 7290#undef TARGET_GIMPLIFY_VA_ARG_EXPR 7291#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr 7292 7293#undef TARGET_INIT_LIBFUNCS 7294#define TARGET_INIT_LIBFUNCS spu_init_libfuncs 7295 7296#undef TARGET_RETURN_IN_MEMORY 7297#define TARGET_RETURN_IN_MEMORY spu_return_in_memory 7298 7299#undef TARGET_ENCODE_SECTION_INFO 7300#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info 7301 7302#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 7303#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load 7304 7305#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 7306#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost 7307 7308#undef TARGET_VECTORIZE_INIT_COST 7309#define TARGET_VECTORIZE_INIT_COST spu_init_cost 7310 7311#undef TARGET_VECTORIZE_ADD_STMT_COST 7312#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost 7313 7314#undef TARGET_VECTORIZE_FINISH_COST 7315#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost 7316 7317#undef TARGET_VECTORIZE_DESTROY_COST_DATA 7318#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data 7319 7320#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE 7321#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable 7322 7323#undef TARGET_LIBGCC_CMP_RETURN_MODE 7324#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode 7325 7326#undef TARGET_LIBGCC_SHIFT_COUNT_MODE 7327#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode 7328 7329#undef TARGET_SCHED_SMS_RES_MII 7330#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii 7331 7332#undef TARGET_SECTION_TYPE_FLAGS 7333#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags 7334 7335#undef TARGET_ASM_SELECT_SECTION 7336#define TARGET_ASM_SELECT_SECTION spu_select_section 7337 7338#undef TARGET_ASM_UNIQUE_SECTION 7339#define TARGET_ASM_UNIQUE_SECTION spu_unique_section 7340 7341#undef TARGET_LEGITIMATE_ADDRESS_P 7342#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p 7343 7344#undef TARGET_LEGITIMATE_CONSTANT_P 7345#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p 7346 7347#undef TARGET_TRAMPOLINE_INIT 7348#define TARGET_TRAMPOLINE_INIT spu_trampoline_init 7349 7350#undef TARGET_WARN_FUNC_RETURN 7351#define TARGET_WARN_FUNC_RETURN spu_warn_func_return 7352 7353#undef TARGET_OPTION_OVERRIDE 7354#define TARGET_OPTION_OVERRIDE spu_option_override 7355 7356#undef TARGET_CONDITIONAL_REGISTER_USAGE 7357#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage 7358 7359#undef TARGET_REF_MAY_ALIAS_ERRNO 7360#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno 7361 7362#undef TARGET_ASM_OUTPUT_MI_THUNK 7363#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk 7364#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 7365#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 7366 7367/* Variable tracking should be run after all optimizations which 7368 change order of insns. It also needs a valid CFG. */ 7369#undef TARGET_DELAY_VARTRACK 7370#define TARGET_DELAY_VARTRACK true 7371 7372#undef TARGET_CANONICALIZE_COMPARISON 7373#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison 7374 7375#undef TARGET_CAN_USE_DOLOOP_P 7376#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost 7377 7378struct gcc_target targetm = TARGET_INITIALIZER; 7379 7380#include "gt-spu.h" 7381