1/* Subroutines used for code generation on the DEC Alpha. 2 Copyright (C) 1992-2015 Free Software Foundation, Inc. 3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "tm.h" 26#include "rtl.h" 27#include "hash-set.h" 28#include "machmode.h" 29#include "vec.h" 30#include "double-int.h" 31#include "input.h" 32#include "alias.h" 33#include "symtab.h" 34#include "wide-int.h" 35#include "inchash.h" 36#include "tree.h" 37#include "fold-const.h" 38#include "stor-layout.h" 39#include "calls.h" 40#include "varasm.h" 41#include "regs.h" 42#include "hard-reg-set.h" 43#include "insn-config.h" 44#include "conditions.h" 45#include "output.h" 46#include "insn-attr.h" 47#include "flags.h" 48#include "recog.h" 49#include "hashtab.h" 50#include "function.h" 51#include "statistics.h" 52#include "real.h" 53#include "fixed-value.h" 54#include "expmed.h" 55#include "dojump.h" 56#include "explow.h" 57#include "emit-rtl.h" 58#include "stmt.h" 59#include "expr.h" 60#include "insn-codes.h" 61#include "optabs.h" 62#include "reload.h" 63#include "obstack.h" 64#include "except.h" 65#include "diagnostic-core.h" 66#include "ggc.h" 67#include "tm_p.h" 68#include "target.h" 69#include "target-def.h" 70#include "common/common-target.h" 71#include "debug.h" 72#include "langhooks.h" 73#include "hash-map.h" 74#include "hash-table.h" 75#include "predict.h" 76#include "dominance.h" 77#include "cfg.h" 78#include "cfgrtl.h" 79#include "cfganal.h" 80#include "lcm.h" 81#include "cfgbuild.h" 82#include "cfgcleanup.h" 83#include "basic-block.h" 84#include "tree-ssa-alias.h" 85#include "internal-fn.h" 86#include "gimple-fold.h" 87#include "tree-eh.h" 88#include "gimple-expr.h" 89#include "is-a.h" 90#include "gimple.h" 91#include "tree-pass.h" 92#include "context.h" 93#include "pass_manager.h" 94#include "gimple-iterator.h" 95#include "gimplify.h" 96#include "gimple-ssa.h" 97#include "stringpool.h" 98#include "tree-ssanames.h" 99#include "tree-stdarg.h" 100#include "tm-constrs.h" 101#include "df.h" 102#include "libfuncs.h" 103#include "opts.h" 104#include "params.h" 105#include "builtins.h" 106#include "rtl-iter.h" 107 108/* Specify which cpu to schedule for. */ 109enum processor_type alpha_tune; 110 111/* Which cpu we're generating code for. */ 112enum processor_type alpha_cpu; 113 114static const char * const alpha_cpu_name[] = 115{ 116 "ev4", "ev5", "ev6" 117}; 118 119/* Specify how accurate floating-point traps need to be. */ 120 121enum alpha_trap_precision alpha_tp; 122 123/* Specify the floating-point rounding mode. */ 124 125enum alpha_fp_rounding_mode alpha_fprm; 126 127/* Specify which things cause traps. */ 128 129enum alpha_fp_trap_mode alpha_fptm; 130 131/* Nonzero if inside of a function, because the Alpha asm can't 132 handle .files inside of functions. */ 133 134static int inside_function = FALSE; 135 136/* The number of cycles of latency we should assume on memory reads. */ 137 138int alpha_memory_latency = 3; 139 140/* Whether the function needs the GP. */ 141 142static int alpha_function_needs_gp; 143 144/* The assembler name of the current function. */ 145 146static const char *alpha_fnname; 147 148/* The next explicit relocation sequence number. */ 149extern GTY(()) int alpha_next_sequence_number; 150int alpha_next_sequence_number = 1; 151 152/* The literal and gpdisp sequence numbers for this insn, as printed 153 by %# and %* respectively. */ 154extern GTY(()) int alpha_this_literal_sequence_number; 155extern GTY(()) int alpha_this_gpdisp_sequence_number; 156int alpha_this_literal_sequence_number; 157int alpha_this_gpdisp_sequence_number; 158 159/* Costs of various operations on the different architectures. */ 160 161struct alpha_rtx_cost_data 162{ 163 unsigned char fp_add; 164 unsigned char fp_mult; 165 unsigned char fp_div_sf; 166 unsigned char fp_div_df; 167 unsigned char int_mult_si; 168 unsigned char int_mult_di; 169 unsigned char int_shift; 170 unsigned char int_cmov; 171 unsigned short int_div; 172}; 173 174static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] = 175{ 176 { /* EV4 */ 177 COSTS_N_INSNS (6), /* fp_add */ 178 COSTS_N_INSNS (6), /* fp_mult */ 179 COSTS_N_INSNS (34), /* fp_div_sf */ 180 COSTS_N_INSNS (63), /* fp_div_df */ 181 COSTS_N_INSNS (23), /* int_mult_si */ 182 COSTS_N_INSNS (23), /* int_mult_di */ 183 COSTS_N_INSNS (2), /* int_shift */ 184 COSTS_N_INSNS (2), /* int_cmov */ 185 COSTS_N_INSNS (97), /* int_div */ 186 }, 187 { /* EV5 */ 188 COSTS_N_INSNS (4), /* fp_add */ 189 COSTS_N_INSNS (4), /* fp_mult */ 190 COSTS_N_INSNS (15), /* fp_div_sf */ 191 COSTS_N_INSNS (22), /* fp_div_df */ 192 COSTS_N_INSNS (8), /* int_mult_si */ 193 COSTS_N_INSNS (12), /* int_mult_di */ 194 COSTS_N_INSNS (1) + 1, /* int_shift */ 195 COSTS_N_INSNS (1), /* int_cmov */ 196 COSTS_N_INSNS (83), /* int_div */ 197 }, 198 { /* EV6 */ 199 COSTS_N_INSNS (4), /* fp_add */ 200 COSTS_N_INSNS (4), /* fp_mult */ 201 COSTS_N_INSNS (12), /* fp_div_sf */ 202 COSTS_N_INSNS (15), /* fp_div_df */ 203 COSTS_N_INSNS (7), /* int_mult_si */ 204 COSTS_N_INSNS (7), /* int_mult_di */ 205 COSTS_N_INSNS (1), /* int_shift */ 206 COSTS_N_INSNS (2), /* int_cmov */ 207 COSTS_N_INSNS (86), /* int_div */ 208 }, 209}; 210 211/* Similar but tuned for code size instead of execution latency. The 212 extra +N is fractional cost tuning based on latency. It's used to 213 encourage use of cheaper insns like shift, but only if there's just 214 one of them. */ 215 216static struct alpha_rtx_cost_data const alpha_rtx_cost_size = 217{ 218 COSTS_N_INSNS (1), /* fp_add */ 219 COSTS_N_INSNS (1), /* fp_mult */ 220 COSTS_N_INSNS (1), /* fp_div_sf */ 221 COSTS_N_INSNS (1) + 1, /* fp_div_df */ 222 COSTS_N_INSNS (1) + 1, /* int_mult_si */ 223 COSTS_N_INSNS (1) + 2, /* int_mult_di */ 224 COSTS_N_INSNS (1), /* int_shift */ 225 COSTS_N_INSNS (1), /* int_cmov */ 226 COSTS_N_INSNS (6), /* int_div */ 227}; 228 229/* Get the number of args of a function in one of two ways. */ 230#if TARGET_ABI_OPEN_VMS 231#define NUM_ARGS crtl->args.info.num_args 232#else 233#define NUM_ARGS crtl->args.info 234#endif 235 236#define REG_PV 27 237#define REG_RA 26 238 239/* Declarations of static functions. */ 240static struct machine_function *alpha_init_machine_status (void); 241static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx); 242static void alpha_handle_trap_shadows (void); 243static void alpha_align_insns (void); 244static void alpha_override_options_after_change (void); 245 246#if TARGET_ABI_OPEN_VMS 247static void alpha_write_linkage (FILE *, const char *); 248static bool vms_valid_pointer_mode (machine_mode); 249#else 250#define vms_patch_builtins() gcc_unreachable() 251#endif 252 253static unsigned int 254rest_of_handle_trap_shadows (void) 255{ 256 alpha_handle_trap_shadows (); 257 return 0; 258} 259 260namespace { 261 262const pass_data pass_data_handle_trap_shadows = 263{ 264 RTL_PASS, 265 "trap_shadows", /* name */ 266 OPTGROUP_NONE, /* optinfo_flags */ 267 TV_NONE, /* tv_id */ 268 0, /* properties_required */ 269 0, /* properties_provided */ 270 0, /* properties_destroyed */ 271 0, /* todo_flags_start */ 272 TODO_df_finish, /* todo_flags_finish */ 273}; 274 275class pass_handle_trap_shadows : public rtl_opt_pass 276{ 277public: 278 pass_handle_trap_shadows(gcc::context *ctxt) 279 : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt) 280 {} 281 282 /* opt_pass methods: */ 283 virtual bool gate (function *) 284 { 285 return alpha_tp != ALPHA_TP_PROG || flag_exceptions; 286 } 287 288 virtual unsigned int execute (function *) 289 { 290 return rest_of_handle_trap_shadows (); 291 } 292 293}; // class pass_handle_trap_shadows 294 295} // anon namespace 296 297rtl_opt_pass * 298make_pass_handle_trap_shadows (gcc::context *ctxt) 299{ 300 return new pass_handle_trap_shadows (ctxt); 301} 302 303static unsigned int 304rest_of_align_insns (void) 305{ 306 alpha_align_insns (); 307 return 0; 308} 309 310namespace { 311 312const pass_data pass_data_align_insns = 313{ 314 RTL_PASS, 315 "align_insns", /* name */ 316 OPTGROUP_NONE, /* optinfo_flags */ 317 TV_NONE, /* tv_id */ 318 0, /* properties_required */ 319 0, /* properties_provided */ 320 0, /* properties_destroyed */ 321 0, /* todo_flags_start */ 322 TODO_df_finish, /* todo_flags_finish */ 323}; 324 325class pass_align_insns : public rtl_opt_pass 326{ 327public: 328 pass_align_insns(gcc::context *ctxt) 329 : rtl_opt_pass(pass_data_align_insns, ctxt) 330 {} 331 332 /* opt_pass methods: */ 333 virtual bool gate (function *) 334 { 335 /* Due to the number of extra trapb insns, don't bother fixing up 336 alignment when trap precision is instruction. Moreover, we can 337 only do our job when sched2 is run. */ 338 return ((alpha_tune == PROCESSOR_EV4 339 || alpha_tune == PROCESSOR_EV5) 340 && optimize && !optimize_size 341 && alpha_tp != ALPHA_TP_INSN 342 && flag_schedule_insns_after_reload); 343 } 344 345 virtual unsigned int execute (function *) 346 { 347 return rest_of_align_insns (); 348 } 349 350}; // class pass_align_insns 351 352} // anon namespace 353 354rtl_opt_pass * 355make_pass_align_insns (gcc::context *ctxt) 356{ 357 return new pass_align_insns (ctxt); 358} 359 360#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 361/* Implement TARGET_MANGLE_TYPE. */ 362 363static const char * 364alpha_mangle_type (const_tree type) 365{ 366 if (TYPE_MAIN_VARIANT (type) == long_double_type_node 367 && TARGET_LONG_DOUBLE_128) 368 return "g"; 369 370 /* For all other types, use normal C++ mangling. */ 371 return NULL; 372} 373#endif 374 375/* Parse target option strings. */ 376 377static void 378alpha_option_override (void) 379{ 380 static const struct cpu_table { 381 const char *const name; 382 const enum processor_type processor; 383 const int flags; 384 const unsigned short line_size; /* in bytes */ 385 const unsigned short l1_size; /* in kb. */ 386 const unsigned short l2_size; /* in kb. */ 387 } cpu_table[] = { 388 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches. 389 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45 390 had 64k to 8M 8-byte direct Bcache. */ 391 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 }, 392 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 }, 393 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 }, 394 395 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2, 396 and 1M to 16M 64 byte L3 (not modeled). 397 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache. 398 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */ 399 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 }, 400 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 }, 401 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 }, 402 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 }, 403 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, 404 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, 405 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, 406 407 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */ 408 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 }, 409 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 }, 410 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX, 411 64, 64, 16*1024 }, 412 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX, 413 64, 64, 16*1024 } 414 }; 415 416 opt_pass *pass_handle_trap_shadows = make_pass_handle_trap_shadows (g); 417 struct register_pass_info handle_trap_shadows_info 418 = { pass_handle_trap_shadows, "eh_ranges", 419 1, PASS_POS_INSERT_AFTER 420 }; 421 422 opt_pass *pass_align_insns = make_pass_align_insns (g); 423 struct register_pass_info align_insns_info 424 = { pass_align_insns, "shorten", 425 1, PASS_POS_INSERT_BEFORE 426 }; 427 428 int const ct_size = ARRAY_SIZE (cpu_table); 429 int line_size = 0, l1_size = 0, l2_size = 0; 430 int i; 431 432#ifdef SUBTARGET_OVERRIDE_OPTIONS 433 SUBTARGET_OVERRIDE_OPTIONS; 434#endif 435 436 /* Default to full IEEE compliance mode for Go language. */ 437 if (strcmp (lang_hooks.name, "GNU Go") == 0 438 && !(target_flags_explicit & MASK_IEEE)) 439 target_flags |= MASK_IEEE; 440 441 alpha_fprm = ALPHA_FPRM_NORM; 442 alpha_tp = ALPHA_TP_PROG; 443 alpha_fptm = ALPHA_FPTM_N; 444 445 if (TARGET_IEEE) 446 { 447 alpha_tp = ALPHA_TP_INSN; 448 alpha_fptm = ALPHA_FPTM_SU; 449 } 450 if (TARGET_IEEE_WITH_INEXACT) 451 { 452 alpha_tp = ALPHA_TP_INSN; 453 alpha_fptm = ALPHA_FPTM_SUI; 454 } 455 456 if (alpha_tp_string) 457 { 458 if (! strcmp (alpha_tp_string, "p")) 459 alpha_tp = ALPHA_TP_PROG; 460 else if (! strcmp (alpha_tp_string, "f")) 461 alpha_tp = ALPHA_TP_FUNC; 462 else if (! strcmp (alpha_tp_string, "i")) 463 alpha_tp = ALPHA_TP_INSN; 464 else 465 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string); 466 } 467 468 if (alpha_fprm_string) 469 { 470 if (! strcmp (alpha_fprm_string, "n")) 471 alpha_fprm = ALPHA_FPRM_NORM; 472 else if (! strcmp (alpha_fprm_string, "m")) 473 alpha_fprm = ALPHA_FPRM_MINF; 474 else if (! strcmp (alpha_fprm_string, "c")) 475 alpha_fprm = ALPHA_FPRM_CHOP; 476 else if (! strcmp (alpha_fprm_string,"d")) 477 alpha_fprm = ALPHA_FPRM_DYN; 478 else 479 error ("bad value %qs for -mfp-rounding-mode switch", 480 alpha_fprm_string); 481 } 482 483 if (alpha_fptm_string) 484 { 485 if (strcmp (alpha_fptm_string, "n") == 0) 486 alpha_fptm = ALPHA_FPTM_N; 487 else if (strcmp (alpha_fptm_string, "u") == 0) 488 alpha_fptm = ALPHA_FPTM_U; 489 else if (strcmp (alpha_fptm_string, "su") == 0) 490 alpha_fptm = ALPHA_FPTM_SU; 491 else if (strcmp (alpha_fptm_string, "sui") == 0) 492 alpha_fptm = ALPHA_FPTM_SUI; 493 else 494 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string); 495 } 496 497 if (alpha_cpu_string) 498 { 499 for (i = 0; i < ct_size; i++) 500 if (! strcmp (alpha_cpu_string, cpu_table [i].name)) 501 { 502 alpha_tune = alpha_cpu = cpu_table[i].processor; 503 line_size = cpu_table[i].line_size; 504 l1_size = cpu_table[i].l1_size; 505 l2_size = cpu_table[i].l2_size; 506 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX); 507 target_flags |= cpu_table[i].flags; 508 break; 509 } 510 if (i == ct_size) 511 error ("bad value %qs for -mcpu switch", alpha_cpu_string); 512 } 513 514 if (alpha_tune_string) 515 { 516 for (i = 0; i < ct_size; i++) 517 if (! strcmp (alpha_tune_string, cpu_table [i].name)) 518 { 519 alpha_tune = cpu_table[i].processor; 520 line_size = cpu_table[i].line_size; 521 l1_size = cpu_table[i].l1_size; 522 l2_size = cpu_table[i].l2_size; 523 break; 524 } 525 if (i == ct_size) 526 error ("bad value %qs for -mtune switch", alpha_tune_string); 527 } 528 529 if (line_size) 530 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size, 531 global_options.x_param_values, 532 global_options_set.x_param_values); 533 if (l1_size) 534 maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size, 535 global_options.x_param_values, 536 global_options_set.x_param_values); 537 if (l2_size) 538 maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size, 539 global_options.x_param_values, 540 global_options_set.x_param_values); 541 542 /* Do some sanity checks on the above options. */ 543 544 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI) 545 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6) 546 { 547 warning (0, "fp software completion requires -mtrap-precision=i"); 548 alpha_tp = ALPHA_TP_INSN; 549 } 550 551 if (alpha_cpu == PROCESSOR_EV6) 552 { 553 /* Except for EV6 pass 1 (not released), we always have precise 554 arithmetic traps. Which means we can do software completion 555 without minding trap shadows. */ 556 alpha_tp = ALPHA_TP_PROG; 557 } 558 559 if (TARGET_FLOAT_VAX) 560 { 561 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN) 562 { 563 warning (0, "rounding mode not supported for VAX floats"); 564 alpha_fprm = ALPHA_FPRM_NORM; 565 } 566 if (alpha_fptm == ALPHA_FPTM_SUI) 567 { 568 warning (0, "trap mode not supported for VAX floats"); 569 alpha_fptm = ALPHA_FPTM_SU; 570 } 571 if (target_flags_explicit & MASK_LONG_DOUBLE_128) 572 warning (0, "128-bit long double not supported for VAX floats"); 573 target_flags &= ~MASK_LONG_DOUBLE_128; 574 } 575 576 { 577 char *end; 578 int lat; 579 580 if (!alpha_mlat_string) 581 alpha_mlat_string = "L1"; 582 583 if (ISDIGIT ((unsigned char)alpha_mlat_string[0]) 584 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0')) 585 ; 586 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l') 587 && ISDIGIT ((unsigned char)alpha_mlat_string[1]) 588 && alpha_mlat_string[2] == '\0') 589 { 590 static int const cache_latency[][4] = 591 { 592 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */ 593 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */ 594 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */ 595 }; 596 597 lat = alpha_mlat_string[1] - '0'; 598 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1) 599 { 600 warning (0, "L%d cache latency unknown for %s", 601 lat, alpha_cpu_name[alpha_tune]); 602 lat = 3; 603 } 604 else 605 lat = cache_latency[alpha_tune][lat-1]; 606 } 607 else if (! strcmp (alpha_mlat_string, "main")) 608 { 609 /* Most current memories have about 370ns latency. This is 610 a reasonable guess for a fast cpu. */ 611 lat = 150; 612 } 613 else 614 { 615 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string); 616 lat = 3; 617 } 618 619 alpha_memory_latency = lat; 620 } 621 622 /* Default the definition of "small data" to 8 bytes. */ 623 if (!global_options_set.x_g_switch_value) 624 g_switch_value = 8; 625 626 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */ 627 if (flag_pic == 1) 628 target_flags |= MASK_SMALL_DATA; 629 else if (flag_pic == 2) 630 target_flags &= ~MASK_SMALL_DATA; 631 632 alpha_override_options_after_change (); 633 634 /* Register variables and functions with the garbage collector. */ 635 636 /* Set up function hooks. */ 637 init_machine_status = alpha_init_machine_status; 638 639 /* Tell the compiler when we're using VAX floating point. */ 640 if (TARGET_FLOAT_VAX) 641 { 642 REAL_MODE_FORMAT (SFmode) = &vax_f_format; 643 REAL_MODE_FORMAT (DFmode) = &vax_g_format; 644 REAL_MODE_FORMAT (TFmode) = NULL; 645 } 646 647#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 648 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) 649 target_flags |= MASK_LONG_DOUBLE_128; 650#endif 651 652 /* This needs to be done at start up. It's convenient to do it here. */ 653 register_pass (&handle_trap_shadows_info); 654 register_pass (&align_insns_info); 655} 656 657/* Implement targetm.override_options_after_change. */ 658 659static void 660alpha_override_options_after_change (void) 661{ 662 /* Align labels and loops for optimal branching. */ 663 /* ??? Kludge these by not doing anything if we don't optimize. */ 664 if (optimize > 0) 665 { 666 if (align_loops <= 0) 667 align_loops = 16; 668 if (align_jumps <= 0) 669 align_jumps = 16; 670 } 671 if (align_functions <= 0) 672 align_functions = 16; 673} 674 675/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */ 676 677int 678zap_mask (HOST_WIDE_INT value) 679{ 680 int i; 681 682 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; 683 i++, value >>= 8) 684 if ((value & 0xff) != 0 && (value & 0xff) != 0xff) 685 return 0; 686 687 return 1; 688} 689 690/* Return true if OP is valid for a particular TLS relocation. 691 We are already guaranteed that OP is a CONST. */ 692 693int 694tls_symbolic_operand_1 (rtx op, int size, int unspec) 695{ 696 op = XEXP (op, 0); 697 698 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec) 699 return 0; 700 op = XVECEXP (op, 0, 0); 701 702 if (GET_CODE (op) != SYMBOL_REF) 703 return 0; 704 705 switch (SYMBOL_REF_TLS_MODEL (op)) 706 { 707 case TLS_MODEL_LOCAL_DYNAMIC: 708 return unspec == UNSPEC_DTPREL && size == alpha_tls_size; 709 case TLS_MODEL_INITIAL_EXEC: 710 return unspec == UNSPEC_TPREL && size == 64; 711 case TLS_MODEL_LOCAL_EXEC: 712 return unspec == UNSPEC_TPREL && size == alpha_tls_size; 713 default: 714 gcc_unreachable (); 715 } 716} 717 718/* Used by aligned_memory_operand and unaligned_memory_operand to 719 resolve what reload is going to do with OP if it's a register. */ 720 721rtx 722resolve_reload_operand (rtx op) 723{ 724 if (reload_in_progress) 725 { 726 rtx tmp = op; 727 if (GET_CODE (tmp) == SUBREG) 728 tmp = SUBREG_REG (tmp); 729 if (REG_P (tmp) 730 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER) 731 { 732 op = reg_equiv_memory_loc (REGNO (tmp)); 733 if (op == 0) 734 return 0; 735 } 736 } 737 return op; 738} 739 740/* The scalar modes supported differs from the default check-what-c-supports 741 version in that sometimes TFmode is available even when long double 742 indicates only DFmode. */ 743 744static bool 745alpha_scalar_mode_supported_p (machine_mode mode) 746{ 747 switch (mode) 748 { 749 case QImode: 750 case HImode: 751 case SImode: 752 case DImode: 753 case TImode: /* via optabs.c */ 754 return true; 755 756 case SFmode: 757 case DFmode: 758 return true; 759 760 case TFmode: 761 return TARGET_HAS_XFLOATING_LIBS; 762 763 default: 764 return false; 765 } 766} 767 768/* Alpha implements a couple of integer vector mode operations when 769 TARGET_MAX is enabled. We do not check TARGET_MAX here, however, 770 which allows the vectorizer to operate on e.g. move instructions, 771 or when expand_vector_operations can do something useful. */ 772 773static bool 774alpha_vector_mode_supported_p (machine_mode mode) 775{ 776 return mode == V8QImode || mode == V4HImode || mode == V2SImode; 777} 778 779/* Return 1 if this function can directly return via $26. */ 780 781int 782direct_return (void) 783{ 784 return (TARGET_ABI_OSF 785 && reload_completed 786 && alpha_sa_size () == 0 787 && get_frame_size () == 0 788 && crtl->outgoing_args_size == 0 789 && crtl->args.pretend_args_size == 0); 790} 791 792/* Return the TLS model to use for SYMBOL. */ 793 794static enum tls_model 795tls_symbolic_operand_type (rtx symbol) 796{ 797 enum tls_model model; 798 799 if (GET_CODE (symbol) != SYMBOL_REF) 800 return TLS_MODEL_NONE; 801 model = SYMBOL_REF_TLS_MODEL (symbol); 802 803 /* Local-exec with a 64-bit size is the same code as initial-exec. */ 804 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64) 805 model = TLS_MODEL_INITIAL_EXEC; 806 807 return model; 808} 809 810/* Return true if the function DECL will share the same GP as any 811 function in the current unit of translation. */ 812 813static bool 814decl_has_samegp (const_tree decl) 815{ 816 /* Functions that are not local can be overridden, and thus may 817 not share the same gp. */ 818 if (!(*targetm.binds_local_p) (decl)) 819 return false; 820 821 /* If -msmall-data is in effect, assume that there is only one GP 822 for the module, and so any local symbol has this property. We 823 need explicit relocations to be able to enforce this for symbols 824 not defined in this unit of translation, however. */ 825 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA) 826 return true; 827 828 /* Functions that are not external are defined in this UoT. */ 829 /* ??? Irritatingly, static functions not yet emitted are still 830 marked "external". Apply this to non-static functions only. */ 831 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl); 832} 833 834/* Return true if EXP should be placed in the small data section. */ 835 836static bool 837alpha_in_small_data_p (const_tree exp) 838{ 839 /* We want to merge strings, so we never consider them small data. */ 840 if (TREE_CODE (exp) == STRING_CST) 841 return false; 842 843 /* Functions are never in the small data area. Duh. */ 844 if (TREE_CODE (exp) == FUNCTION_DECL) 845 return false; 846 847 /* COMMON symbols are never small data. */ 848 if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp)) 849 return false; 850 851 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 852 { 853 const char *section = DECL_SECTION_NAME (exp); 854 if (strcmp (section, ".sdata") == 0 855 || strcmp (section, ".sbss") == 0) 856 return true; 857 } 858 else 859 { 860 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 861 862 /* If this is an incomplete type with size 0, then we can't put it 863 in sdata because it might be too big when completed. */ 864 if (size > 0 && size <= g_switch_value) 865 return true; 866 } 867 868 return false; 869} 870 871#if TARGET_ABI_OPEN_VMS 872static bool 873vms_valid_pointer_mode (machine_mode mode) 874{ 875 return (mode == SImode || mode == DImode); 876} 877 878static bool 879alpha_linkage_symbol_p (const char *symname) 880{ 881 int symlen = strlen (symname); 882 883 if (symlen > 4) 884 return strcmp (&symname [symlen - 4], "..lk") == 0; 885 886 return false; 887} 888 889#define LINKAGE_SYMBOL_REF_P(X) \ 890 ((GET_CODE (X) == SYMBOL_REF \ 891 && alpha_linkage_symbol_p (XSTR (X, 0))) \ 892 || (GET_CODE (X) == CONST \ 893 && GET_CODE (XEXP (X, 0)) == PLUS \ 894 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \ 895 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0)))) 896#endif 897 898/* legitimate_address_p recognizes an RTL expression that is a valid 899 memory address for an instruction. The MODE argument is the 900 machine mode for the MEM expression that wants to use this address. 901 902 For Alpha, we have either a constant address or the sum of a 903 register and a constant address, or just a register. For DImode, 904 any of those forms can be surrounded with an AND that clear the 905 low-order three bits; this is an "unaligned" access. */ 906 907static bool 908alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict) 909{ 910 /* If this is an ldq_u type address, discard the outer AND. */ 911 if (mode == DImode 912 && GET_CODE (x) == AND 913 && CONST_INT_P (XEXP (x, 1)) 914 && INTVAL (XEXP (x, 1)) == -8) 915 x = XEXP (x, 0); 916 917 /* Discard non-paradoxical subregs. */ 918 if (GET_CODE (x) == SUBREG 919 && (GET_MODE_SIZE (GET_MODE (x)) 920 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) 921 x = SUBREG_REG (x); 922 923 /* Unadorned general registers are valid. */ 924 if (REG_P (x) 925 && (strict 926 ? STRICT_REG_OK_FOR_BASE_P (x) 927 : NONSTRICT_REG_OK_FOR_BASE_P (x))) 928 return true; 929 930 /* Constant addresses (i.e. +/- 32k) are valid. */ 931 if (CONSTANT_ADDRESS_P (x)) 932 return true; 933 934#if TARGET_ABI_OPEN_VMS 935 if (LINKAGE_SYMBOL_REF_P (x)) 936 return true; 937#endif 938 939 /* Register plus a small constant offset is valid. */ 940 if (GET_CODE (x) == PLUS) 941 { 942 rtx ofs = XEXP (x, 1); 943 x = XEXP (x, 0); 944 945 /* Discard non-paradoxical subregs. */ 946 if (GET_CODE (x) == SUBREG 947 && (GET_MODE_SIZE (GET_MODE (x)) 948 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) 949 x = SUBREG_REG (x); 950 951 if (REG_P (x)) 952 { 953 if (! strict 954 && NONSTRICT_REG_OK_FP_BASE_P (x) 955 && CONST_INT_P (ofs)) 956 return true; 957 if ((strict 958 ? STRICT_REG_OK_FOR_BASE_P (x) 959 : NONSTRICT_REG_OK_FOR_BASE_P (x)) 960 && CONSTANT_ADDRESS_P (ofs)) 961 return true; 962 } 963 } 964 965 /* If we're managing explicit relocations, LO_SUM is valid, as are small 966 data symbols. Avoid explicit relocations of modes larger than word 967 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */ 968 else if (TARGET_EXPLICIT_RELOCS 969 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) 970 { 971 if (small_symbolic_operand (x, Pmode)) 972 return true; 973 974 if (GET_CODE (x) == LO_SUM) 975 { 976 rtx ofs = XEXP (x, 1); 977 x = XEXP (x, 0); 978 979 /* Discard non-paradoxical subregs. */ 980 if (GET_CODE (x) == SUBREG 981 && (GET_MODE_SIZE (GET_MODE (x)) 982 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) 983 x = SUBREG_REG (x); 984 985 /* Must have a valid base register. */ 986 if (! (REG_P (x) 987 && (strict 988 ? STRICT_REG_OK_FOR_BASE_P (x) 989 : NONSTRICT_REG_OK_FOR_BASE_P (x)))) 990 return false; 991 992 /* The symbol must be local. */ 993 if (local_symbolic_operand (ofs, Pmode) 994 || dtp32_symbolic_operand (ofs, Pmode) 995 || tp32_symbolic_operand (ofs, Pmode)) 996 return true; 997 } 998 } 999 1000 return false; 1001} 1002 1003/* Build the SYMBOL_REF for __tls_get_addr. */ 1004 1005static GTY(()) rtx tls_get_addr_libfunc; 1006 1007static rtx 1008get_tls_get_addr (void) 1009{ 1010 if (!tls_get_addr_libfunc) 1011 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); 1012 return tls_get_addr_libfunc; 1013} 1014 1015/* Try machine-dependent ways of modifying an illegitimate address 1016 to be legitimate. If we find one, return the new, valid address. */ 1017 1018static rtx 1019alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode) 1020{ 1021 HOST_WIDE_INT addend; 1022 1023 /* If the address is (plus reg const_int) and the CONST_INT is not a 1024 valid offset, compute the high part of the constant and add it to 1025 the register. Then our address is (plus temp low-part-const). */ 1026 if (GET_CODE (x) == PLUS 1027 && REG_P (XEXP (x, 0)) 1028 && CONST_INT_P (XEXP (x, 1)) 1029 && ! CONSTANT_ADDRESS_P (XEXP (x, 1))) 1030 { 1031 addend = INTVAL (XEXP (x, 1)); 1032 x = XEXP (x, 0); 1033 goto split_addend; 1034 } 1035 1036 /* If the address is (const (plus FOO const_int)), find the low-order 1037 part of the CONST_INT. Then load FOO plus any high-order part of the 1038 CONST_INT into a register. Our address is (plus reg low-part-const). 1039 This is done to reduce the number of GOT entries. */ 1040 if (can_create_pseudo_p () 1041 && GET_CODE (x) == CONST 1042 && GET_CODE (XEXP (x, 0)) == PLUS 1043 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 1044 { 1045 addend = INTVAL (XEXP (XEXP (x, 0), 1)); 1046 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0)); 1047 goto split_addend; 1048 } 1049 1050 /* If we have a (plus reg const), emit the load as in (2), then add 1051 the two registers, and finally generate (plus reg low-part-const) as 1052 our address. */ 1053 if (can_create_pseudo_p () 1054 && GET_CODE (x) == PLUS 1055 && REG_P (XEXP (x, 0)) 1056 && GET_CODE (XEXP (x, 1)) == CONST 1057 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS 1058 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1))) 1059 { 1060 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1)); 1061 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0), 1062 XEXP (XEXP (XEXP (x, 1), 0), 0), 1063 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1064 goto split_addend; 1065 } 1066 1067 /* If this is a local symbol, split the address into HIGH/LO_SUM parts. 1068 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold 1069 around +/- 32k offset. */ 1070 if (TARGET_EXPLICIT_RELOCS 1071 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD 1072 && symbolic_operand (x, Pmode)) 1073 { 1074 rtx r0, r16, eqv, tga, tp, insn, dest, seq; 1075 1076 switch (tls_symbolic_operand_type (x)) 1077 { 1078 case TLS_MODEL_NONE: 1079 break; 1080 1081 case TLS_MODEL_GLOBAL_DYNAMIC: 1082 start_sequence (); 1083 1084 r0 = gen_rtx_REG (Pmode, 0); 1085 r16 = gen_rtx_REG (Pmode, 16); 1086 tga = get_tls_get_addr (); 1087 dest = gen_reg_rtx (Pmode); 1088 seq = GEN_INT (alpha_next_sequence_number++); 1089 1090 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq)); 1091 insn = gen_call_value_osf_tlsgd (r0, tga, seq); 1092 insn = emit_call_insn (insn); 1093 RTL_CONST_CALL_P (insn) = 1; 1094 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); 1095 1096 insn = get_insns (); 1097 end_sequence (); 1098 1099 emit_libcall_block (insn, dest, r0, x); 1100 return dest; 1101 1102 case TLS_MODEL_LOCAL_DYNAMIC: 1103 start_sequence (); 1104 1105 r0 = gen_rtx_REG (Pmode, 0); 1106 r16 = gen_rtx_REG (Pmode, 16); 1107 tga = get_tls_get_addr (); 1108 scratch = gen_reg_rtx (Pmode); 1109 seq = GEN_INT (alpha_next_sequence_number++); 1110 1111 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq)); 1112 insn = gen_call_value_osf_tlsldm (r0, tga, seq); 1113 insn = emit_call_insn (insn); 1114 RTL_CONST_CALL_P (insn) = 1; 1115 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); 1116 1117 insn = get_insns (); 1118 end_sequence (); 1119 1120 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 1121 UNSPEC_TLSLDM_CALL); 1122 emit_libcall_block (insn, scratch, r0, eqv); 1123 1124 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL); 1125 eqv = gen_rtx_CONST (Pmode, eqv); 1126 1127 if (alpha_tls_size == 64) 1128 { 1129 dest = gen_reg_rtx (Pmode); 1130 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv)); 1131 emit_insn (gen_adddi3 (dest, dest, scratch)); 1132 return dest; 1133 } 1134 if (alpha_tls_size == 32) 1135 { 1136 insn = gen_rtx_HIGH (Pmode, eqv); 1137 insn = gen_rtx_PLUS (Pmode, scratch, insn); 1138 scratch = gen_reg_rtx (Pmode); 1139 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn)); 1140 } 1141 return gen_rtx_LO_SUM (Pmode, scratch, eqv); 1142 1143 case TLS_MODEL_INITIAL_EXEC: 1144 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); 1145 eqv = gen_rtx_CONST (Pmode, eqv); 1146 tp = gen_reg_rtx (Pmode); 1147 scratch = gen_reg_rtx (Pmode); 1148 dest = gen_reg_rtx (Pmode); 1149 1150 emit_insn (gen_get_thread_pointerdi (tp)); 1151 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv)); 1152 emit_insn (gen_adddi3 (dest, tp, scratch)); 1153 return dest; 1154 1155 case TLS_MODEL_LOCAL_EXEC: 1156 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); 1157 eqv = gen_rtx_CONST (Pmode, eqv); 1158 tp = gen_reg_rtx (Pmode); 1159 1160 emit_insn (gen_get_thread_pointerdi (tp)); 1161 if (alpha_tls_size == 32) 1162 { 1163 insn = gen_rtx_HIGH (Pmode, eqv); 1164 insn = gen_rtx_PLUS (Pmode, tp, insn); 1165 tp = gen_reg_rtx (Pmode); 1166 emit_insn (gen_rtx_SET (VOIDmode, tp, insn)); 1167 } 1168 return gen_rtx_LO_SUM (Pmode, tp, eqv); 1169 1170 default: 1171 gcc_unreachable (); 1172 } 1173 1174 if (local_symbolic_operand (x, Pmode)) 1175 { 1176 if (small_symbolic_operand (x, Pmode)) 1177 return x; 1178 else 1179 { 1180 if (can_create_pseudo_p ()) 1181 scratch = gen_reg_rtx (Pmode); 1182 emit_insn (gen_rtx_SET (VOIDmode, scratch, 1183 gen_rtx_HIGH (Pmode, x))); 1184 return gen_rtx_LO_SUM (Pmode, scratch, x); 1185 } 1186 } 1187 } 1188 1189 return NULL; 1190 1191 split_addend: 1192 { 1193 HOST_WIDE_INT low, high; 1194 1195 low = ((addend & 0xffff) ^ 0x8000) - 0x8000; 1196 addend -= low; 1197 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000; 1198 addend -= high; 1199 1200 if (addend) 1201 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend), 1202 (!can_create_pseudo_p () ? scratch : NULL_RTX), 1203 1, OPTAB_LIB_WIDEN); 1204 if (high) 1205 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high), 1206 (!can_create_pseudo_p () ? scratch : NULL_RTX), 1207 1, OPTAB_LIB_WIDEN); 1208 1209 return plus_constant (Pmode, x, low); 1210 } 1211} 1212 1213 1214/* Try machine-dependent ways of modifying an illegitimate address 1215 to be legitimate. Return X or the new, valid address. */ 1216 1217static rtx 1218alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 1219 machine_mode mode) 1220{ 1221 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode); 1222 return new_x ? new_x : x; 1223} 1224 1225/* Return true if ADDR has an effect that depends on the machine mode it 1226 is used for. On the Alpha this is true only for the unaligned modes. 1227 We can simplify the test since we know that the address must be valid. */ 1228 1229static bool 1230alpha_mode_dependent_address_p (const_rtx addr, 1231 addr_space_t as ATTRIBUTE_UNUSED) 1232{ 1233 return GET_CODE (addr) == AND; 1234} 1235 1236/* Primarily this is required for TLS symbols, but given that our move 1237 patterns *ought* to be able to handle any symbol at any time, we 1238 should never be spilling symbolic operands to the constant pool, ever. */ 1239 1240static bool 1241alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 1242{ 1243 enum rtx_code code = GET_CODE (x); 1244 return code == SYMBOL_REF || code == LABEL_REF || code == CONST; 1245} 1246 1247/* We do not allow indirect calls to be optimized into sibling calls, nor 1248 can we allow a call to a function with a different GP to be optimized 1249 into a sibcall. */ 1250 1251static bool 1252alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 1253{ 1254 /* Can't do indirect tail calls, since we don't know if the target 1255 uses the same GP. */ 1256 if (!decl) 1257 return false; 1258 1259 /* Otherwise, we can make a tail call if the target function shares 1260 the same GP. */ 1261 return decl_has_samegp (decl); 1262} 1263 1264bool 1265some_small_symbolic_operand_int (rtx x) 1266{ 1267 subrtx_var_iterator::array_type array; 1268 FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) 1269 { 1270 rtx x = *iter; 1271 /* Don't re-split. */ 1272 if (GET_CODE (x) == LO_SUM) 1273 iter.skip_subrtxes (); 1274 else if (small_symbolic_operand (x, Pmode)) 1275 return true; 1276 } 1277 return false; 1278} 1279 1280rtx 1281split_small_symbolic_operand (rtx x) 1282{ 1283 x = copy_insn (x); 1284 subrtx_ptr_iterator::array_type array; 1285 FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL) 1286 { 1287 rtx *ptr = *iter; 1288 rtx x = *ptr; 1289 /* Don't re-split. */ 1290 if (GET_CODE (x) == LO_SUM) 1291 iter.skip_subrtxes (); 1292 else if (small_symbolic_operand (x, Pmode)) 1293 { 1294 *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x); 1295 iter.skip_subrtxes (); 1296 } 1297 } 1298 return x; 1299} 1300 1301/* Indicate that INSN cannot be duplicated. This is true for any insn 1302 that we've marked with gpdisp relocs, since those have to stay in 1303 1-1 correspondence with one another. 1304 1305 Technically we could copy them if we could set up a mapping from one 1306 sequence number to another, across the set of insns to be duplicated. 1307 This seems overly complicated and error-prone since interblock motion 1308 from sched-ebb could move one of the pair of insns to a different block. 1309 1310 Also cannot allow jsr insns to be duplicated. If they throw exceptions, 1311 then they'll be in a different block from their ldgp. Which could lead 1312 the bb reorder code to think that it would be ok to copy just the block 1313 containing the call and branch to the block containing the ldgp. */ 1314 1315static bool 1316alpha_cannot_copy_insn_p (rtx_insn *insn) 1317{ 1318 if (!reload_completed || !TARGET_EXPLICIT_RELOCS) 1319 return false; 1320 if (recog_memoized (insn) >= 0) 1321 return get_attr_cannot_copy (insn); 1322 else 1323 return false; 1324} 1325 1326 1327/* Try a machine-dependent way of reloading an illegitimate address 1328 operand. If we find one, push the reload and return the new rtx. */ 1329 1330rtx 1331alpha_legitimize_reload_address (rtx x, 1332 machine_mode mode ATTRIBUTE_UNUSED, 1333 int opnum, int type, 1334 int ind_levels ATTRIBUTE_UNUSED) 1335{ 1336 /* We must recognize output that we have already generated ourselves. */ 1337 if (GET_CODE (x) == PLUS 1338 && GET_CODE (XEXP (x, 0)) == PLUS 1339 && REG_P (XEXP (XEXP (x, 0), 0)) 1340 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 1341 && CONST_INT_P (XEXP (x, 1))) 1342 { 1343 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 1344 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 1345 opnum, (enum reload_type) type); 1346 return x; 1347 } 1348 1349 /* We wish to handle large displacements off a base register by 1350 splitting the addend across an ldah and the mem insn. This 1351 cuts number of extra insns needed from 3 to 1. */ 1352 if (GET_CODE (x) == PLUS 1353 && REG_P (XEXP (x, 0)) 1354 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER 1355 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0))) 1356 && GET_CODE (XEXP (x, 1)) == CONST_INT) 1357 { 1358 HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); 1359 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; 1360 HOST_WIDE_INT high 1361 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; 1362 1363 /* Check for 32-bit overflow. */ 1364 if (high + low != val) 1365 return NULL_RTX; 1366 1367 /* Reload the high part into a base reg; leave the low part 1368 in the mem directly. */ 1369 x = gen_rtx_PLUS (GET_MODE (x), 1370 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), 1371 GEN_INT (high)), 1372 GEN_INT (low)); 1373 1374 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 1375 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 1376 opnum, (enum reload_type) type); 1377 return x; 1378 } 1379 1380 return NULL_RTX; 1381} 1382 1383/* Compute a (partial) cost for rtx X. Return true if the complete 1384 cost has been computed, and false if subexpressions should be 1385 scanned. In either case, *TOTAL contains the cost result. */ 1386 1387static bool 1388alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total, 1389 bool speed) 1390{ 1391 machine_mode mode = GET_MODE (x); 1392 bool float_mode_p = FLOAT_MODE_P (mode); 1393 const struct alpha_rtx_cost_data *cost_data; 1394 1395 if (!speed) 1396 cost_data = &alpha_rtx_cost_size; 1397 else 1398 cost_data = &alpha_rtx_cost_data[alpha_tune]; 1399 1400 switch (code) 1401 { 1402 case CONST_INT: 1403 /* If this is an 8-bit constant, return zero since it can be used 1404 nearly anywhere with no cost. If it is a valid operand for an 1405 ADD or AND, likewise return 0 if we know it will be used in that 1406 context. Otherwise, return 2 since it might be used there later. 1407 All other constants take at least two insns. */ 1408 if (INTVAL (x) >= 0 && INTVAL (x) < 256) 1409 { 1410 *total = 0; 1411 return true; 1412 } 1413 /* FALLTHRU */ 1414 1415 case CONST_DOUBLE: 1416 if (x == CONST0_RTX (mode)) 1417 *total = 0; 1418 else if ((outer_code == PLUS && add_operand (x, VOIDmode)) 1419 || (outer_code == AND && and_operand (x, VOIDmode))) 1420 *total = 0; 1421 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode)) 1422 *total = 2; 1423 else 1424 *total = COSTS_N_INSNS (2); 1425 return true; 1426 1427 case CONST: 1428 case SYMBOL_REF: 1429 case LABEL_REF: 1430 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode)) 1431 *total = COSTS_N_INSNS (outer_code != MEM); 1432 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode)) 1433 *total = COSTS_N_INSNS (1 + (outer_code != MEM)); 1434 else if (tls_symbolic_operand_type (x)) 1435 /* Estimate of cost for call_pal rduniq. */ 1436 /* ??? How many insns do we emit here? More than one... */ 1437 *total = COSTS_N_INSNS (15); 1438 else 1439 /* Otherwise we do a load from the GOT. */ 1440 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); 1441 return true; 1442 1443 case HIGH: 1444 /* This is effectively an add_operand. */ 1445 *total = 2; 1446 return true; 1447 1448 case PLUS: 1449 case MINUS: 1450 if (float_mode_p) 1451 *total = cost_data->fp_add; 1452 else if (GET_CODE (XEXP (x, 0)) == MULT 1453 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) 1454 { 1455 *total = (rtx_cost (XEXP (XEXP (x, 0), 0), 1456 (enum rtx_code) outer_code, opno, speed) 1457 + rtx_cost (XEXP (x, 1), 1458 (enum rtx_code) outer_code, opno, speed) 1459 + COSTS_N_INSNS (1)); 1460 return true; 1461 } 1462 return false; 1463 1464 case MULT: 1465 if (float_mode_p) 1466 *total = cost_data->fp_mult; 1467 else if (mode == DImode) 1468 *total = cost_data->int_mult_di; 1469 else 1470 *total = cost_data->int_mult_si; 1471 return false; 1472 1473 case ASHIFT: 1474 if (CONST_INT_P (XEXP (x, 1)) 1475 && INTVAL (XEXP (x, 1)) <= 3) 1476 { 1477 *total = COSTS_N_INSNS (1); 1478 return false; 1479 } 1480 /* FALLTHRU */ 1481 1482 case ASHIFTRT: 1483 case LSHIFTRT: 1484 *total = cost_data->int_shift; 1485 return false; 1486 1487 case IF_THEN_ELSE: 1488 if (float_mode_p) 1489 *total = cost_data->fp_add; 1490 else 1491 *total = cost_data->int_cmov; 1492 return false; 1493 1494 case DIV: 1495 case UDIV: 1496 case MOD: 1497 case UMOD: 1498 if (!float_mode_p) 1499 *total = cost_data->int_div; 1500 else if (mode == SFmode) 1501 *total = cost_data->fp_div_sf; 1502 else 1503 *total = cost_data->fp_div_df; 1504 return false; 1505 1506 case MEM: 1507 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); 1508 return true; 1509 1510 case NEG: 1511 if (! float_mode_p) 1512 { 1513 *total = COSTS_N_INSNS (1); 1514 return false; 1515 } 1516 /* FALLTHRU */ 1517 1518 case ABS: 1519 if (! float_mode_p) 1520 { 1521 *total = COSTS_N_INSNS (1) + cost_data->int_cmov; 1522 return false; 1523 } 1524 /* FALLTHRU */ 1525 1526 case FLOAT: 1527 case UNSIGNED_FLOAT: 1528 case FIX: 1529 case UNSIGNED_FIX: 1530 case FLOAT_TRUNCATE: 1531 *total = cost_data->fp_add; 1532 return false; 1533 1534 case FLOAT_EXTEND: 1535 if (MEM_P (XEXP (x, 0))) 1536 *total = 0; 1537 else 1538 *total = cost_data->fp_add; 1539 return false; 1540 1541 default: 1542 return false; 1543 } 1544} 1545 1546/* REF is an alignable memory location. Place an aligned SImode 1547 reference into *PALIGNED_MEM and the number of bits to shift into 1548 *PBITNUM. SCRATCH is a free register for use in reloading out 1549 of range stack slots. */ 1550 1551void 1552get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum) 1553{ 1554 rtx base; 1555 HOST_WIDE_INT disp, offset; 1556 1557 gcc_assert (MEM_P (ref)); 1558 1559 if (reload_in_progress) 1560 { 1561 base = find_replacement (&XEXP (ref, 0)); 1562 gcc_assert (memory_address_p (GET_MODE (ref), base)); 1563 } 1564 else 1565 base = XEXP (ref, 0); 1566 1567 if (GET_CODE (base) == PLUS) 1568 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0); 1569 else 1570 disp = 0; 1571 1572 /* Find the byte offset within an aligned word. If the memory itself is 1573 claimed to be aligned, believe it. Otherwise, aligned_memory_operand 1574 will have examined the base register and determined it is aligned, and 1575 thus displacements from it are naturally alignable. */ 1576 if (MEM_ALIGN (ref) >= 32) 1577 offset = 0; 1578 else 1579 offset = disp & 3; 1580 1581 /* The location should not cross aligned word boundary. */ 1582 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref)) 1583 <= GET_MODE_SIZE (SImode)); 1584 1585 /* Access the entire aligned word. */ 1586 *paligned_mem = widen_memory_access (ref, SImode, -offset); 1587 1588 /* Convert the byte offset within the word to a bit offset. */ 1589 offset *= BITS_PER_UNIT; 1590 *pbitnum = GEN_INT (offset); 1591} 1592 1593/* Similar, but just get the address. Handle the two reload cases. 1594 Add EXTRA_OFFSET to the address we return. */ 1595 1596rtx 1597get_unaligned_address (rtx ref) 1598{ 1599 rtx base; 1600 HOST_WIDE_INT offset = 0; 1601 1602 gcc_assert (MEM_P (ref)); 1603 1604 if (reload_in_progress) 1605 { 1606 base = find_replacement (&XEXP (ref, 0)); 1607 gcc_assert (memory_address_p (GET_MODE (ref), base)); 1608 } 1609 else 1610 base = XEXP (ref, 0); 1611 1612 if (GET_CODE (base) == PLUS) 1613 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0); 1614 1615 return plus_constant (Pmode, base, offset); 1616} 1617 1618/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7. 1619 X is always returned in a register. */ 1620 1621rtx 1622get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs) 1623{ 1624 if (GET_CODE (addr) == PLUS) 1625 { 1626 ofs += INTVAL (XEXP (addr, 1)); 1627 addr = XEXP (addr, 0); 1628 } 1629 1630 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7), 1631 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1632} 1633 1634/* On the Alpha, all (non-symbolic) constants except zero go into 1635 a floating-point register via memory. Note that we cannot 1636 return anything that is not a subset of RCLASS, and that some 1637 symbolic constants cannot be dropped to memory. */ 1638 1639enum reg_class 1640alpha_preferred_reload_class(rtx x, enum reg_class rclass) 1641{ 1642 /* Zero is present in any register class. */ 1643 if (x == CONST0_RTX (GET_MODE (x))) 1644 return rclass; 1645 1646 /* These sorts of constants we can easily drop to memory. */ 1647 if (CONST_INT_P (x) 1648 || GET_CODE (x) == CONST_DOUBLE 1649 || GET_CODE (x) == CONST_VECTOR) 1650 { 1651 if (rclass == FLOAT_REGS) 1652 return NO_REGS; 1653 if (rclass == ALL_REGS) 1654 return GENERAL_REGS; 1655 return rclass; 1656 } 1657 1658 /* All other kinds of constants should not (and in the case of HIGH 1659 cannot) be dropped to memory -- instead we use a GENERAL_REGS 1660 secondary reload. */ 1661 if (CONSTANT_P (x)) 1662 return (rclass == ALL_REGS ? GENERAL_REGS : rclass); 1663 1664 return rclass; 1665} 1666 1667/* Inform reload about cases where moving X with a mode MODE to a register in 1668 RCLASS requires an extra scratch or immediate register. Return the class 1669 needed for the immediate register. */ 1670 1671static reg_class_t 1672alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 1673 machine_mode mode, secondary_reload_info *sri) 1674{ 1675 enum reg_class rclass = (enum reg_class) rclass_i; 1676 1677 /* Loading and storing HImode or QImode values to and from memory 1678 usually requires a scratch register. */ 1679 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode)) 1680 { 1681 if (any_memory_operand (x, mode)) 1682 { 1683 if (in_p) 1684 { 1685 if (!aligned_memory_operand (x, mode)) 1686 sri->icode = direct_optab_handler (reload_in_optab, mode); 1687 } 1688 else 1689 sri->icode = direct_optab_handler (reload_out_optab, mode); 1690 return NO_REGS; 1691 } 1692 } 1693 1694 /* We also cannot do integral arithmetic into FP regs, as might result 1695 from register elimination into a DImode fp register. */ 1696 if (rclass == FLOAT_REGS) 1697 { 1698 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) 1699 return GENERAL_REGS; 1700 if (in_p && INTEGRAL_MODE_P (mode) 1701 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x)) 1702 return GENERAL_REGS; 1703 } 1704 1705 return NO_REGS; 1706} 1707 1708/* Given SEQ, which is an INSN list, look for any MEMs in either 1709 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and 1710 volatile flags from REF into each of the MEMs found. If REF is not 1711 a MEM, don't do anything. */ 1712 1713void 1714alpha_set_memflags (rtx seq, rtx ref) 1715{ 1716 rtx_insn *insn; 1717 1718 if (!MEM_P (ref)) 1719 return; 1720 1721 /* This is only called from alpha.md, after having had something 1722 generated from one of the insn patterns. So if everything is 1723 zero, the pattern is already up-to-date. */ 1724 if (!MEM_VOLATILE_P (ref) 1725 && !MEM_NOTRAP_P (ref) 1726 && !MEM_READONLY_P (ref)) 1727 return; 1728 1729 subrtx_var_iterator::array_type array; 1730 for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn)) 1731 if (INSN_P (insn)) 1732 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST) 1733 { 1734 rtx x = *iter; 1735 if (MEM_P (x)) 1736 { 1737 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref); 1738 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref); 1739 MEM_READONLY_P (x) = MEM_READONLY_P (ref); 1740 /* Sadly, we cannot use alias sets because the extra 1741 aliasing produced by the AND interferes. Given that 1742 two-byte quantities are the only thing we would be 1743 able to differentiate anyway, there does not seem to 1744 be any point in convoluting the early out of the 1745 alias check. */ 1746 iter.skip_subrtxes (); 1747 } 1748 } 1749 else 1750 gcc_unreachable (); 1751} 1752 1753static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT, 1754 int, bool); 1755 1756/* Internal routine for alpha_emit_set_const to check for N or below insns. 1757 If NO_OUTPUT is true, then we only check to see if N insns are possible, 1758 and return pc_rtx if successful. */ 1759 1760static rtx 1761alpha_emit_set_const_1 (rtx target, machine_mode mode, 1762 HOST_WIDE_INT c, int n, bool no_output) 1763{ 1764 HOST_WIDE_INT new_const; 1765 int i, bits; 1766 /* Use a pseudo if highly optimizing and still generating RTL. */ 1767 rtx subtarget 1768 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target); 1769 rtx temp, insn; 1770 1771 /* If this is a sign-extended 32-bit constant, we can do this in at most 1772 three insns, so do it if we have enough insns left. We always have 1773 a sign-extended 32-bit constant when compiling on a narrow machine. */ 1774 1775 if (HOST_BITS_PER_WIDE_INT != 64 1776 || c >> 31 == -1 || c >> 31 == 0) 1777 { 1778 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000; 1779 HOST_WIDE_INT tmp1 = c - low; 1780 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000; 1781 HOST_WIDE_INT extra = 0; 1782 1783 /* If HIGH will be interpreted as negative but the constant is 1784 positive, we must adjust it to do two ldha insns. */ 1785 1786 if ((high & 0x8000) != 0 && c >= 0) 1787 { 1788 extra = 0x4000; 1789 tmp1 -= 0x40000000; 1790 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000); 1791 } 1792 1793 if (c == low || (low == 0 && extra == 0)) 1794 { 1795 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode) 1796 but that meant that we can't handle INT_MIN on 32-bit machines 1797 (like NT/Alpha), because we recurse indefinitely through 1798 emit_move_insn to gen_movdi. So instead, since we know exactly 1799 what we want, create it explicitly. */ 1800 1801 if (no_output) 1802 return pc_rtx; 1803 if (target == NULL) 1804 target = gen_reg_rtx (mode); 1805 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c))); 1806 return target; 1807 } 1808 else if (n >= 2 + (extra != 0)) 1809 { 1810 if (no_output) 1811 return pc_rtx; 1812 if (!can_create_pseudo_p ()) 1813 { 1814 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16))); 1815 temp = target; 1816 } 1817 else 1818 temp = copy_to_suggested_reg (GEN_INT (high << 16), 1819 subtarget, mode); 1820 1821 /* As of 2002-02-23, addsi3 is only available when not optimizing. 1822 This means that if we go through expand_binop, we'll try to 1823 generate extensions, etc, which will require new pseudos, which 1824 will fail during some split phases. The SImode add patterns 1825 still exist, but are not named. So build the insns by hand. */ 1826 1827 if (extra != 0) 1828 { 1829 if (! subtarget) 1830 subtarget = gen_reg_rtx (mode); 1831 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16)); 1832 insn = gen_rtx_SET (VOIDmode, subtarget, insn); 1833 emit_insn (insn); 1834 temp = subtarget; 1835 } 1836 1837 if (target == NULL) 1838 target = gen_reg_rtx (mode); 1839 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low)); 1840 insn = gen_rtx_SET (VOIDmode, target, insn); 1841 emit_insn (insn); 1842 return target; 1843 } 1844 } 1845 1846 /* If we couldn't do it that way, try some other methods. But if we have 1847 no instructions left, don't bother. Likewise, if this is SImode and 1848 we can't make pseudos, we can't do anything since the expand_binop 1849 and expand_unop calls will widen and try to make pseudos. */ 1850 1851 if (n == 1 || (mode == SImode && !can_create_pseudo_p ())) 1852 return 0; 1853 1854 /* Next, see if we can load a related constant and then shift and possibly 1855 negate it to get the constant we want. Try this once each increasing 1856 numbers of insns. */ 1857 1858 for (i = 1; i < n; i++) 1859 { 1860 /* First, see if minus some low bits, we've an easy load of 1861 high bits. */ 1862 1863 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000; 1864 if (new_const != 0) 1865 { 1866 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output); 1867 if (temp) 1868 { 1869 if (no_output) 1870 return temp; 1871 return expand_binop (mode, add_optab, temp, GEN_INT (new_const), 1872 target, 0, OPTAB_WIDEN); 1873 } 1874 } 1875 1876 /* Next try complementing. */ 1877 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output); 1878 if (temp) 1879 { 1880 if (no_output) 1881 return temp; 1882 return expand_unop (mode, one_cmpl_optab, temp, target, 0); 1883 } 1884 1885 /* Next try to form a constant and do a left shift. We can do this 1886 if some low-order bits are zero; the exact_log2 call below tells 1887 us that information. The bits we are shifting out could be any 1888 value, but here we'll just try the 0- and sign-extended forms of 1889 the constant. To try to increase the chance of having the same 1890 constant in more than one insn, start at the highest number of 1891 bits to shift, but try all possibilities in case a ZAPNOT will 1892 be useful. */ 1893 1894 bits = exact_log2 (c & -c); 1895 if (bits > 0) 1896 for (; bits > 0; bits--) 1897 { 1898 new_const = c >> bits; 1899 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); 1900 if (!temp && c < 0) 1901 { 1902 new_const = (unsigned HOST_WIDE_INT)c >> bits; 1903 temp = alpha_emit_set_const (subtarget, mode, new_const, 1904 i, no_output); 1905 } 1906 if (temp) 1907 { 1908 if (no_output) 1909 return temp; 1910 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits), 1911 target, 0, OPTAB_WIDEN); 1912 } 1913 } 1914 1915 /* Now try high-order zero bits. Here we try the shifted-in bits as 1916 all zero and all ones. Be careful to avoid shifting outside the 1917 mode and to avoid shifting outside the host wide int size. */ 1918 /* On narrow hosts, don't shift a 1 into the high bit, since we'll 1919 confuse the recursive call and set all of the high 32 bits. */ 1920 1921 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) 1922 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64)); 1923 if (bits > 0) 1924 for (; bits > 0; bits--) 1925 { 1926 new_const = c << bits; 1927 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); 1928 if (!temp) 1929 { 1930 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1); 1931 temp = alpha_emit_set_const (subtarget, mode, new_const, 1932 i, no_output); 1933 } 1934 if (temp) 1935 { 1936 if (no_output) 1937 return temp; 1938 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits), 1939 target, 1, OPTAB_WIDEN); 1940 } 1941 } 1942 1943 /* Now try high-order 1 bits. We get that with a sign-extension. 1944 But one bit isn't enough here. Be careful to avoid shifting outside 1945 the mode and to avoid shifting outside the host wide int size. */ 1946 1947 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) 1948 - floor_log2 (~ c) - 2); 1949 if (bits > 0) 1950 for (; bits > 0; bits--) 1951 { 1952 new_const = c << bits; 1953 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); 1954 if (!temp) 1955 { 1956 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1); 1957 temp = alpha_emit_set_const (subtarget, mode, new_const, 1958 i, no_output); 1959 } 1960 if (temp) 1961 { 1962 if (no_output) 1963 return temp; 1964 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits), 1965 target, 0, OPTAB_WIDEN); 1966 } 1967 } 1968 } 1969 1970#if HOST_BITS_PER_WIDE_INT == 64 1971 /* Finally, see if can load a value into the target that is the same as the 1972 constant except that all bytes that are 0 are changed to be 0xff. If we 1973 can, then we can do a ZAPNOT to obtain the desired constant. */ 1974 1975 new_const = c; 1976 for (i = 0; i < 64; i += 8) 1977 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0) 1978 new_const |= (HOST_WIDE_INT) 0xff << i; 1979 1980 /* We are only called for SImode and DImode. If this is SImode, ensure that 1981 we are sign extended to a full word. */ 1982 1983 if (mode == SImode) 1984 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000; 1985 1986 if (new_const != c) 1987 { 1988 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output); 1989 if (temp) 1990 { 1991 if (no_output) 1992 return temp; 1993 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const), 1994 target, 0, OPTAB_WIDEN); 1995 } 1996 } 1997#endif 1998 1999 return 0; 2000} 2001 2002/* Try to output insns to set TARGET equal to the constant C if it can be 2003 done in less than N insns. Do all computations in MODE. Returns the place 2004 where the output has been placed if it can be done and the insns have been 2005 emitted. If it would take more than N insns, zero is returned and no 2006 insns and emitted. */ 2007 2008static rtx 2009alpha_emit_set_const (rtx target, machine_mode mode, 2010 HOST_WIDE_INT c, int n, bool no_output) 2011{ 2012 machine_mode orig_mode = mode; 2013 rtx orig_target = target; 2014 rtx result = 0; 2015 int i; 2016 2017 /* If we can't make any pseudos, TARGET is an SImode hard register, we 2018 can't load this constant in one insn, do this in DImode. */ 2019 if (!can_create_pseudo_p () && mode == SImode 2020 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER) 2021 { 2022 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output); 2023 if (result) 2024 return result; 2025 2026 target = no_output ? NULL : gen_lowpart (DImode, target); 2027 mode = DImode; 2028 } 2029 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode) 2030 { 2031 target = no_output ? NULL : gen_lowpart (DImode, target); 2032 mode = DImode; 2033 } 2034 2035 /* Try 1 insn, then 2, then up to N. */ 2036 for (i = 1; i <= n; i++) 2037 { 2038 result = alpha_emit_set_const_1 (target, mode, c, i, no_output); 2039 if (result) 2040 { 2041 rtx_insn *insn; 2042 rtx set; 2043 2044 if (no_output) 2045 return result; 2046 2047 insn = get_last_insn (); 2048 set = single_set (insn); 2049 if (! CONSTANT_P (SET_SRC (set))) 2050 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c)); 2051 break; 2052 } 2053 } 2054 2055 /* Allow for the case where we changed the mode of TARGET. */ 2056 if (result) 2057 { 2058 if (result == target) 2059 result = orig_target; 2060 else if (mode != orig_mode) 2061 result = gen_lowpart (orig_mode, result); 2062 } 2063 2064 return result; 2065} 2066 2067/* Having failed to find a 3 insn sequence in alpha_emit_set_const, 2068 fall back to a straight forward decomposition. We do this to avoid 2069 exponential run times encountered when looking for longer sequences 2070 with alpha_emit_set_const. */ 2071 2072static rtx 2073alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2) 2074{ 2075 HOST_WIDE_INT d1, d2, d3, d4; 2076 2077 /* Decompose the entire word */ 2078#if HOST_BITS_PER_WIDE_INT >= 64 2079 gcc_assert (c2 == -(c1 < 0)); 2080 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; 2081 c1 -= d1; 2082 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; 2083 c1 = (c1 - d2) >> 32; 2084 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; 2085 c1 -= d3; 2086 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; 2087 gcc_assert (c1 == d4); 2088#else 2089 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; 2090 c1 -= d1; 2091 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; 2092 gcc_assert (c1 == d2); 2093 c2 += (d2 < 0); 2094 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000; 2095 c2 -= d3; 2096 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000; 2097 gcc_assert (c2 == d4); 2098#endif 2099 2100 /* Construct the high word */ 2101 if (d4) 2102 { 2103 emit_move_insn (target, GEN_INT (d4)); 2104 if (d3) 2105 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3))); 2106 } 2107 else 2108 emit_move_insn (target, GEN_INT (d3)); 2109 2110 /* Shift it into place */ 2111 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32))); 2112 2113 /* Add in the low bits. */ 2114 if (d2) 2115 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2))); 2116 if (d1) 2117 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1))); 2118 2119 return target; 2120} 2121 2122/* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return 2123 the low 64 bits. */ 2124 2125static void 2126alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1) 2127{ 2128 HOST_WIDE_INT i0, i1; 2129 2130 if (GET_CODE (x) == CONST_VECTOR) 2131 x = simplify_subreg (DImode, x, GET_MODE (x), 0); 2132 2133 2134 if (CONST_INT_P (x)) 2135 { 2136 i0 = INTVAL (x); 2137 i1 = -(i0 < 0); 2138 } 2139 else if (HOST_BITS_PER_WIDE_INT >= 64) 2140 { 2141 i0 = CONST_DOUBLE_LOW (x); 2142 i1 = -(i0 < 0); 2143 } 2144 else 2145 { 2146 i0 = CONST_DOUBLE_LOW (x); 2147 i1 = CONST_DOUBLE_HIGH (x); 2148 } 2149 2150 *p0 = i0; 2151 *p1 = i1; 2152} 2153 2154/* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which 2155 we are willing to load the value into a register via a move pattern. 2156 Normally this is all symbolic constants, integral constants that 2157 take three or fewer instructions, and floating-point zero. */ 2158 2159bool 2160alpha_legitimate_constant_p (machine_mode mode, rtx x) 2161{ 2162 HOST_WIDE_INT i0, i1; 2163 2164 switch (GET_CODE (x)) 2165 { 2166 case LABEL_REF: 2167 case HIGH: 2168 return true; 2169 2170 case CONST: 2171 if (GET_CODE (XEXP (x, 0)) == PLUS 2172 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) 2173 x = XEXP (XEXP (x, 0), 0); 2174 else 2175 return true; 2176 2177 if (GET_CODE (x) != SYMBOL_REF) 2178 return true; 2179 2180 /* FALLTHRU */ 2181 2182 case SYMBOL_REF: 2183 /* TLS symbols are never valid. */ 2184 return SYMBOL_REF_TLS_MODEL (x) == 0; 2185 2186 case CONST_DOUBLE: 2187 if (x == CONST0_RTX (mode)) 2188 return true; 2189 if (FLOAT_MODE_P (mode)) 2190 return false; 2191 goto do_integer; 2192 2193 case CONST_VECTOR: 2194 if (x == CONST0_RTX (mode)) 2195 return true; 2196 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) 2197 return false; 2198 if (GET_MODE_SIZE (mode) != 8) 2199 return false; 2200 goto do_integer; 2201 2202 case CONST_INT: 2203 do_integer: 2204 if (TARGET_BUILD_CONSTANTS) 2205 return true; 2206 alpha_extract_integer (x, &i0, &i1); 2207 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0)) 2208 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL; 2209 return false; 2210 2211 default: 2212 return false; 2213 } 2214} 2215 2216/* Operand 1 is known to be a constant, and should require more than one 2217 instruction to load. Emit that multi-part load. */ 2218 2219bool 2220alpha_split_const_mov (machine_mode mode, rtx *operands) 2221{ 2222 HOST_WIDE_INT i0, i1; 2223 rtx temp = NULL_RTX; 2224 2225 alpha_extract_integer (operands[1], &i0, &i1); 2226 2227 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0)) 2228 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false); 2229 2230 if (!temp && TARGET_BUILD_CONSTANTS) 2231 temp = alpha_emit_set_long_const (operands[0], i0, i1); 2232 2233 if (temp) 2234 { 2235 if (!rtx_equal_p (operands[0], temp)) 2236 emit_move_insn (operands[0], temp); 2237 return true; 2238 } 2239 2240 return false; 2241} 2242 2243/* Expand a move instruction; return true if all work is done. 2244 We don't handle non-bwx subword loads here. */ 2245 2246bool 2247alpha_expand_mov (machine_mode mode, rtx *operands) 2248{ 2249 rtx tmp; 2250 2251 /* If the output is not a register, the input must be. */ 2252 if (MEM_P (operands[0]) 2253 && ! reg_or_0_operand (operands[1], mode)) 2254 operands[1] = force_reg (mode, operands[1]); 2255 2256 /* Allow legitimize_address to perform some simplifications. */ 2257 if (mode == Pmode && symbolic_operand (operands[1], mode)) 2258 { 2259 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode); 2260 if (tmp) 2261 { 2262 if (tmp == operands[0]) 2263 return true; 2264 operands[1] = tmp; 2265 return false; 2266 } 2267 } 2268 2269 /* Early out for non-constants and valid constants. */ 2270 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode)) 2271 return false; 2272 2273 /* Split large integers. */ 2274 if (CONST_INT_P (operands[1]) 2275 || GET_CODE (operands[1]) == CONST_DOUBLE 2276 || GET_CODE (operands[1]) == CONST_VECTOR) 2277 { 2278 if (alpha_split_const_mov (mode, operands)) 2279 return true; 2280 } 2281 2282 /* Otherwise we've nothing left but to drop the thing to memory. */ 2283 tmp = force_const_mem (mode, operands[1]); 2284 2285 if (tmp == NULL_RTX) 2286 return false; 2287 2288 if (reload_in_progress) 2289 { 2290 emit_move_insn (operands[0], XEXP (tmp, 0)); 2291 operands[1] = replace_equiv_address (tmp, operands[0]); 2292 } 2293 else 2294 operands[1] = validize_mem (tmp); 2295 return false; 2296} 2297 2298/* Expand a non-bwx QImode or HImode move instruction; 2299 return true if all work is done. */ 2300 2301bool 2302alpha_expand_mov_nobwx (machine_mode mode, rtx *operands) 2303{ 2304 rtx seq; 2305 2306 /* If the output is not a register, the input must be. */ 2307 if (MEM_P (operands[0])) 2308 operands[1] = force_reg (mode, operands[1]); 2309 2310 /* Handle four memory cases, unaligned and aligned for either the input 2311 or the output. The only case where we can be called during reload is 2312 for aligned loads; all other cases require temporaries. */ 2313 2314 if (any_memory_operand (operands[1], mode)) 2315 { 2316 if (aligned_memory_operand (operands[1], mode)) 2317 { 2318 if (reload_in_progress) 2319 { 2320 if (mode == QImode) 2321 seq = gen_reload_inqi_aligned (operands[0], operands[1]); 2322 else 2323 seq = gen_reload_inhi_aligned (operands[0], operands[1]); 2324 emit_insn (seq); 2325 } 2326 else 2327 { 2328 rtx aligned_mem, bitnum; 2329 rtx scratch = gen_reg_rtx (SImode); 2330 rtx subtarget; 2331 bool copyout; 2332 2333 get_aligned_mem (operands[1], &aligned_mem, &bitnum); 2334 2335 subtarget = operands[0]; 2336 if (REG_P (subtarget)) 2337 subtarget = gen_lowpart (DImode, subtarget), copyout = false; 2338 else 2339 subtarget = gen_reg_rtx (DImode), copyout = true; 2340 2341 if (mode == QImode) 2342 seq = gen_aligned_loadqi (subtarget, aligned_mem, 2343 bitnum, scratch); 2344 else 2345 seq = gen_aligned_loadhi (subtarget, aligned_mem, 2346 bitnum, scratch); 2347 emit_insn (seq); 2348 2349 if (copyout) 2350 emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); 2351 } 2352 } 2353 else 2354 { 2355 /* Don't pass these as parameters since that makes the generated 2356 code depend on parameter evaluation order which will cause 2357 bootstrap failures. */ 2358 2359 rtx temp1, temp2, subtarget, ua; 2360 bool copyout; 2361 2362 temp1 = gen_reg_rtx (DImode); 2363 temp2 = gen_reg_rtx (DImode); 2364 2365 subtarget = operands[0]; 2366 if (REG_P (subtarget)) 2367 subtarget = gen_lowpart (DImode, subtarget), copyout = false; 2368 else 2369 subtarget = gen_reg_rtx (DImode), copyout = true; 2370 2371 ua = get_unaligned_address (operands[1]); 2372 if (mode == QImode) 2373 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2); 2374 else 2375 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2); 2376 2377 alpha_set_memflags (seq, operands[1]); 2378 emit_insn (seq); 2379 2380 if (copyout) 2381 emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); 2382 } 2383 return true; 2384 } 2385 2386 if (any_memory_operand (operands[0], mode)) 2387 { 2388 if (aligned_memory_operand (operands[0], mode)) 2389 { 2390 rtx aligned_mem, bitnum; 2391 rtx temp1 = gen_reg_rtx (SImode); 2392 rtx temp2 = gen_reg_rtx (SImode); 2393 2394 get_aligned_mem (operands[0], &aligned_mem, &bitnum); 2395 2396 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum, 2397 temp1, temp2)); 2398 } 2399 else 2400 { 2401 rtx temp1 = gen_reg_rtx (DImode); 2402 rtx temp2 = gen_reg_rtx (DImode); 2403 rtx temp3 = gen_reg_rtx (DImode); 2404 rtx ua = get_unaligned_address (operands[0]); 2405 2406 if (mode == QImode) 2407 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3); 2408 else 2409 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3); 2410 2411 alpha_set_memflags (seq, operands[0]); 2412 emit_insn (seq); 2413 } 2414 return true; 2415 } 2416 2417 return false; 2418} 2419 2420/* Implement the movmisalign patterns. One of the operands is a memory 2421 that is not naturally aligned. Emit instructions to load it. */ 2422 2423void 2424alpha_expand_movmisalign (machine_mode mode, rtx *operands) 2425{ 2426 /* Honor misaligned loads, for those we promised to do so. */ 2427 if (MEM_P (operands[1])) 2428 { 2429 rtx tmp; 2430 2431 if (register_operand (operands[0], mode)) 2432 tmp = operands[0]; 2433 else 2434 tmp = gen_reg_rtx (mode); 2435 2436 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0); 2437 if (tmp != operands[0]) 2438 emit_move_insn (operands[0], tmp); 2439 } 2440 else if (MEM_P (operands[0])) 2441 { 2442 if (!reg_or_0_operand (operands[1], mode)) 2443 operands[1] = force_reg (mode, operands[1]); 2444 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0); 2445 } 2446 else 2447 gcc_unreachable (); 2448} 2449 2450/* Generate an unsigned DImode to FP conversion. This is the same code 2451 optabs would emit if we didn't have TFmode patterns. 2452 2453 For SFmode, this is the only construction I've found that can pass 2454 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode 2455 intermediates will work, because you'll get intermediate rounding 2456 that ruins the end result. Some of this could be fixed by turning 2457 on round-to-positive-infinity, but that requires diddling the fpsr, 2458 which kills performance. I tried turning this around and converting 2459 to a negative number, so that I could turn on /m, but either I did 2460 it wrong or there's something else cause I wound up with the exact 2461 same single-bit error. There is a branch-less form of this same code: 2462 2463 srl $16,1,$1 2464 and $16,1,$2 2465 cmplt $16,0,$3 2466 or $1,$2,$2 2467 cmovge $16,$16,$2 2468 itoft $3,$f10 2469 itoft $2,$f11 2470 cvtqs $f11,$f11 2471 adds $f11,$f11,$f0 2472 fcmoveq $f10,$f11,$f0 2473 2474 I'm not using it because it's the same number of instructions as 2475 this branch-full form, and it has more serialized long latency 2476 instructions on the critical path. 2477 2478 For DFmode, we can avoid rounding errors by breaking up the word 2479 into two pieces, converting them separately, and adding them back: 2480 2481 LC0: .long 0,0x5f800000 2482 2483 itoft $16,$f11 2484 lda $2,LC0 2485 cmplt $16,0,$1 2486 cpyse $f11,$f31,$f10 2487 cpyse $f31,$f11,$f11 2488 s4addq $1,$2,$1 2489 lds $f12,0($1) 2490 cvtqt $f10,$f10 2491 cvtqt $f11,$f11 2492 addt $f12,$f10,$f0 2493 addt $f0,$f11,$f0 2494 2495 This doesn't seem to be a clear-cut win over the optabs form. 2496 It probably all depends on the distribution of numbers being 2497 converted -- in the optabs form, all but high-bit-set has a 2498 much lower minimum execution time. */ 2499 2500void 2501alpha_emit_floatuns (rtx operands[2]) 2502{ 2503 rtx neglab, donelab, i0, i1, f0, in, out; 2504 machine_mode mode; 2505 2506 out = operands[0]; 2507 in = force_reg (DImode, operands[1]); 2508 mode = GET_MODE (out); 2509 neglab = gen_label_rtx (); 2510 donelab = gen_label_rtx (); 2511 i0 = gen_reg_rtx (DImode); 2512 i1 = gen_reg_rtx (DImode); 2513 f0 = gen_reg_rtx (mode); 2514 2515 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); 2516 2517 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); 2518 emit_jump_insn (gen_jump (donelab)); 2519 emit_barrier (); 2520 2521 emit_label (neglab); 2522 2523 emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); 2524 emit_insn (gen_anddi3 (i1, in, const1_rtx)); 2525 emit_insn (gen_iordi3 (i0, i0, i1)); 2526 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0))); 2527 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 2528 2529 emit_label (donelab); 2530} 2531 2532/* Generate the comparison for a conditional branch. */ 2533 2534void 2535alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode) 2536{ 2537 enum rtx_code cmp_code, branch_code; 2538 machine_mode branch_mode = VOIDmode; 2539 enum rtx_code code = GET_CODE (operands[0]); 2540 rtx op0 = operands[1], op1 = operands[2]; 2541 rtx tem; 2542 2543 if (cmp_mode == TFmode) 2544 { 2545 op0 = alpha_emit_xfloating_compare (&code, op0, op1); 2546 op1 = const0_rtx; 2547 cmp_mode = DImode; 2548 } 2549 2550 /* The general case: fold the comparison code to the types of compares 2551 that we have, choosing the branch as necessary. */ 2552 switch (code) 2553 { 2554 case EQ: case LE: case LT: case LEU: case LTU: 2555 case UNORDERED: 2556 /* We have these compares. */ 2557 cmp_code = code, branch_code = NE; 2558 break; 2559 2560 case NE: 2561 case ORDERED: 2562 /* These must be reversed. */ 2563 cmp_code = reverse_condition (code), branch_code = EQ; 2564 break; 2565 2566 case GE: case GT: case GEU: case GTU: 2567 /* For FP, we swap them, for INT, we reverse them. */ 2568 if (cmp_mode == DFmode) 2569 { 2570 cmp_code = swap_condition (code); 2571 branch_code = NE; 2572 std::swap (op0, op1); 2573 } 2574 else 2575 { 2576 cmp_code = reverse_condition (code); 2577 branch_code = EQ; 2578 } 2579 break; 2580 2581 default: 2582 gcc_unreachable (); 2583 } 2584 2585 if (cmp_mode == DFmode) 2586 { 2587 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED) 2588 { 2589 /* When we are not as concerned about non-finite values, and we 2590 are comparing against zero, we can branch directly. */ 2591 if (op1 == CONST0_RTX (DFmode)) 2592 cmp_code = UNKNOWN, branch_code = code; 2593 else if (op0 == CONST0_RTX (DFmode)) 2594 { 2595 /* Undo the swap we probably did just above. */ 2596 std::swap (op0, op1); 2597 branch_code = swap_condition (cmp_code); 2598 cmp_code = UNKNOWN; 2599 } 2600 } 2601 else 2602 { 2603 /* ??? We mark the branch mode to be CCmode to prevent the 2604 compare and branch from being combined, since the compare 2605 insn follows IEEE rules that the branch does not. */ 2606 branch_mode = CCmode; 2607 } 2608 } 2609 else 2610 { 2611 /* The following optimizations are only for signed compares. */ 2612 if (code != LEU && code != LTU && code != GEU && code != GTU) 2613 { 2614 /* Whee. Compare and branch against 0 directly. */ 2615 if (op1 == const0_rtx) 2616 cmp_code = UNKNOWN, branch_code = code; 2617 2618 /* If the constants doesn't fit into an immediate, but can 2619 be generated by lda/ldah, we adjust the argument and 2620 compare against zero, so we can use beq/bne directly. */ 2621 /* ??? Don't do this when comparing against symbols, otherwise 2622 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will 2623 be declared false out of hand (at least for non-weak). */ 2624 else if (CONST_INT_P (op1) 2625 && (code == EQ || code == NE) 2626 && !(symbolic_operand (op0, VOIDmode) 2627 || (REG_P (op0) && REG_POINTER (op0)))) 2628 { 2629 rtx n_op1 = GEN_INT (-INTVAL (op1)); 2630 2631 if (! satisfies_constraint_I (op1) 2632 && (satisfies_constraint_K (n_op1) 2633 || satisfies_constraint_L (n_op1))) 2634 cmp_code = PLUS, branch_code = code, op1 = n_op1; 2635 } 2636 } 2637 2638 if (!reg_or_0_operand (op0, DImode)) 2639 op0 = force_reg (DImode, op0); 2640 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode)) 2641 op1 = force_reg (DImode, op1); 2642 } 2643 2644 /* Emit an initial compare instruction, if necessary. */ 2645 tem = op0; 2646 if (cmp_code != UNKNOWN) 2647 { 2648 tem = gen_reg_rtx (cmp_mode); 2649 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)); 2650 } 2651 2652 /* Emit the branch instruction. */ 2653 tem = gen_rtx_SET (VOIDmode, pc_rtx, 2654 gen_rtx_IF_THEN_ELSE (VOIDmode, 2655 gen_rtx_fmt_ee (branch_code, 2656 branch_mode, tem, 2657 CONST0_RTX (cmp_mode)), 2658 gen_rtx_LABEL_REF (VOIDmode, 2659 operands[3]), 2660 pc_rtx)); 2661 emit_jump_insn (tem); 2662} 2663 2664/* Certain simplifications can be done to make invalid setcc operations 2665 valid. Return the final comparison, or NULL if we can't work. */ 2666 2667bool 2668alpha_emit_setcc (rtx operands[], machine_mode cmp_mode) 2669{ 2670 enum rtx_code cmp_code; 2671 enum rtx_code code = GET_CODE (operands[1]); 2672 rtx op0 = operands[2], op1 = operands[3]; 2673 rtx tmp; 2674 2675 if (cmp_mode == TFmode) 2676 { 2677 op0 = alpha_emit_xfloating_compare (&code, op0, op1); 2678 op1 = const0_rtx; 2679 cmp_mode = DImode; 2680 } 2681 2682 if (cmp_mode == DFmode && !TARGET_FIX) 2683 return 0; 2684 2685 /* The general case: fold the comparison code to the types of compares 2686 that we have, choosing the branch as necessary. */ 2687 2688 cmp_code = UNKNOWN; 2689 switch (code) 2690 { 2691 case EQ: case LE: case LT: case LEU: case LTU: 2692 case UNORDERED: 2693 /* We have these compares. */ 2694 if (cmp_mode == DFmode) 2695 cmp_code = code, code = NE; 2696 break; 2697 2698 case NE: 2699 if (cmp_mode == DImode && op1 == const0_rtx) 2700 break; 2701 /* FALLTHRU */ 2702 2703 case ORDERED: 2704 cmp_code = reverse_condition (code); 2705 code = EQ; 2706 break; 2707 2708 case GE: case GT: case GEU: case GTU: 2709 /* These normally need swapping, but for integer zero we have 2710 special patterns that recognize swapped operands. */ 2711 if (cmp_mode == DImode && op1 == const0_rtx) 2712 break; 2713 code = swap_condition (code); 2714 if (cmp_mode == DFmode) 2715 cmp_code = code, code = NE; 2716 std::swap (op0, op1); 2717 break; 2718 2719 default: 2720 gcc_unreachable (); 2721 } 2722 2723 if (cmp_mode == DImode) 2724 { 2725 if (!register_operand (op0, DImode)) 2726 op0 = force_reg (DImode, op0); 2727 if (!reg_or_8bit_operand (op1, DImode)) 2728 op1 = force_reg (DImode, op1); 2729 } 2730 2731 /* Emit an initial compare instruction, if necessary. */ 2732 if (cmp_code != UNKNOWN) 2733 { 2734 tmp = gen_reg_rtx (cmp_mode); 2735 emit_insn (gen_rtx_SET (VOIDmode, tmp, 2736 gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1))); 2737 2738 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp; 2739 op1 = const0_rtx; 2740 } 2741 2742 /* Emit the setcc instruction. */ 2743 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2744 gen_rtx_fmt_ee (code, DImode, op0, op1))); 2745 return true; 2746} 2747 2748 2749/* Rewrite a comparison against zero CMP of the form 2750 (CODE (cc0) (const_int 0)) so it can be written validly in 2751 a conditional move (if_then_else CMP ...). 2752 If both of the operands that set cc0 are nonzero we must emit 2753 an insn to perform the compare (it can't be done within 2754 the conditional move). */ 2755 2756rtx 2757alpha_emit_conditional_move (rtx cmp, machine_mode mode) 2758{ 2759 enum rtx_code code = GET_CODE (cmp); 2760 enum rtx_code cmov_code = NE; 2761 rtx op0 = XEXP (cmp, 0); 2762 rtx op1 = XEXP (cmp, 1); 2763 machine_mode cmp_mode 2764 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0)); 2765 machine_mode cmov_mode = VOIDmode; 2766 int local_fast_math = flag_unsafe_math_optimizations; 2767 rtx tem; 2768 2769 if (cmp_mode == TFmode) 2770 { 2771 op0 = alpha_emit_xfloating_compare (&code, op0, op1); 2772 op1 = const0_rtx; 2773 cmp_mode = DImode; 2774 } 2775 2776 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode); 2777 2778 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode)) 2779 { 2780 enum rtx_code cmp_code; 2781 2782 if (! TARGET_FIX) 2783 return 0; 2784 2785 /* If we have fp<->int register move instructions, do a cmov by 2786 performing the comparison in fp registers, and move the 2787 zero/nonzero value to integer registers, where we can then 2788 use a normal cmov, or vice-versa. */ 2789 2790 switch (code) 2791 { 2792 case EQ: case LE: case LT: case LEU: case LTU: 2793 case UNORDERED: 2794 /* We have these compares. */ 2795 cmp_code = code, code = NE; 2796 break; 2797 2798 case NE: 2799 case ORDERED: 2800 /* These must be reversed. */ 2801 cmp_code = reverse_condition (code), code = EQ; 2802 break; 2803 2804 case GE: case GT: case GEU: case GTU: 2805 /* These normally need swapping, but for integer zero we have 2806 special patterns that recognize swapped operands. */ 2807 if (cmp_mode == DImode && op1 == const0_rtx) 2808 cmp_code = code, code = NE; 2809 else 2810 { 2811 cmp_code = swap_condition (code); 2812 code = NE; 2813 std::swap (op0, op1); 2814 } 2815 break; 2816 2817 default: 2818 gcc_unreachable (); 2819 } 2820 2821 if (cmp_mode == DImode) 2822 { 2823 if (!reg_or_0_operand (op0, DImode)) 2824 op0 = force_reg (DImode, op0); 2825 if (!reg_or_8bit_operand (op1, DImode)) 2826 op1 = force_reg (DImode, op1); 2827 } 2828 2829 tem = gen_reg_rtx (cmp_mode); 2830 emit_insn (gen_rtx_SET (VOIDmode, tem, 2831 gen_rtx_fmt_ee (cmp_code, cmp_mode, 2832 op0, op1))); 2833 2834 cmp_mode = cmp_mode == DImode ? DFmode : DImode; 2835 op0 = gen_lowpart (cmp_mode, tem); 2836 op1 = CONST0_RTX (cmp_mode); 2837 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); 2838 local_fast_math = 1; 2839 } 2840 2841 if (cmp_mode == DImode) 2842 { 2843 if (!reg_or_0_operand (op0, DImode)) 2844 op0 = force_reg (DImode, op0); 2845 if (!reg_or_8bit_operand (op1, DImode)) 2846 op1 = force_reg (DImode, op1); 2847 } 2848 2849 /* We may be able to use a conditional move directly. 2850 This avoids emitting spurious compares. */ 2851 if (signed_comparison_operator (cmp, VOIDmode) 2852 && (cmp_mode == DImode || local_fast_math) 2853 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode))) 2854 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1); 2855 2856 /* We can't put the comparison inside the conditional move; 2857 emit a compare instruction and put that inside the 2858 conditional move. Make sure we emit only comparisons we have; 2859 swap or reverse as necessary. */ 2860 2861 if (!can_create_pseudo_p ()) 2862 return NULL_RTX; 2863 2864 switch (code) 2865 { 2866 case EQ: case LE: case LT: case LEU: case LTU: 2867 case UNORDERED: 2868 /* We have these compares: */ 2869 break; 2870 2871 case NE: 2872 case ORDERED: 2873 /* These must be reversed. */ 2874 code = reverse_condition (code); 2875 cmov_code = EQ; 2876 break; 2877 2878 case GE: case GT: case GEU: case GTU: 2879 /* These normally need swapping, but for integer zero we have 2880 special patterns that recognize swapped operands. */ 2881 if (cmp_mode == DImode && op1 == const0_rtx) 2882 break; 2883 code = swap_condition (code); 2884 std::swap (op0, op1); 2885 break; 2886 2887 default: 2888 gcc_unreachable (); 2889 } 2890 2891 if (cmp_mode == DImode) 2892 { 2893 if (!reg_or_0_operand (op0, DImode)) 2894 op0 = force_reg (DImode, op0); 2895 if (!reg_or_8bit_operand (op1, DImode)) 2896 op1 = force_reg (DImode, op1); 2897 } 2898 2899 /* ??? We mark the branch mode to be CCmode to prevent the compare 2900 and cmov from being combined, since the compare insn follows IEEE 2901 rules that the cmov does not. */ 2902 if (cmp_mode == DFmode && !local_fast_math) 2903 cmov_mode = CCmode; 2904 2905 tem = gen_reg_rtx (cmp_mode); 2906 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1)); 2907 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode)); 2908} 2909 2910/* Simplify a conditional move of two constants into a setcc with 2911 arithmetic. This is done with a splitter since combine would 2912 just undo the work if done during code generation. It also catches 2913 cases we wouldn't have before cse. */ 2914 2915int 2916alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond, 2917 rtx t_rtx, rtx f_rtx) 2918{ 2919 HOST_WIDE_INT t, f, diff; 2920 machine_mode mode; 2921 rtx target, subtarget, tmp; 2922 2923 mode = GET_MODE (dest); 2924 t = INTVAL (t_rtx); 2925 f = INTVAL (f_rtx); 2926 diff = t - f; 2927 2928 if (((code == NE || code == EQ) && diff < 0) 2929 || (code == GE || code == GT)) 2930 { 2931 code = reverse_condition (code); 2932 diff = t, t = f, f = diff; 2933 diff = t - f; 2934 } 2935 2936 subtarget = target = dest; 2937 if (mode != DImode) 2938 { 2939 target = gen_lowpart (DImode, dest); 2940 if (can_create_pseudo_p ()) 2941 subtarget = gen_reg_rtx (DImode); 2942 else 2943 subtarget = target; 2944 } 2945 /* Below, we must be careful to use copy_rtx on target and subtarget 2946 in intermediate insns, as they may be a subreg rtx, which may not 2947 be shared. */ 2948 2949 if (f == 0 && exact_log2 (diff) > 0 2950 /* On EV6, we've got enough shifters to make non-arithmetic shifts 2951 viable over a longer latency cmove. On EV5, the E0 slot is a 2952 scarce resource, and on EV4 shift has the same latency as a cmove. */ 2953 && (diff <= 8 || alpha_tune == PROCESSOR_EV6)) 2954 { 2955 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); 2956 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); 2957 2958 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget), 2959 GEN_INT (exact_log2 (t))); 2960 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 2961 } 2962 else if (f == 0 && t == -1) 2963 { 2964 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); 2965 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); 2966 2967 emit_insn (gen_negdi2 (target, copy_rtx (subtarget))); 2968 } 2969 else if (diff == 1 || diff == 4 || diff == 8) 2970 { 2971 rtx add_op; 2972 2973 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); 2974 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); 2975 2976 if (diff == 1) 2977 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f))); 2978 else 2979 { 2980 add_op = GEN_INT (f); 2981 if (sext_add_operand (add_op, mode)) 2982 { 2983 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget), 2984 GEN_INT (diff)); 2985 tmp = gen_rtx_PLUS (DImode, tmp, add_op); 2986 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 2987 } 2988 else 2989 return 0; 2990 } 2991 } 2992 else 2993 return 0; 2994 2995 return 1; 2996} 2997 2998/* Look up the function X_floating library function name for the 2999 given operation. */ 3000 3001struct GTY(()) xfloating_op 3002{ 3003 const enum rtx_code code; 3004 const char *const GTY((skip)) osf_func; 3005 const char *const GTY((skip)) vms_func; 3006 rtx libcall; 3007}; 3008 3009static GTY(()) struct xfloating_op xfloating_ops[] = 3010{ 3011 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 }, 3012 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 }, 3013 { MULT, "_OtsMulX", "OTS$MUL_X", 0 }, 3014 { DIV, "_OtsDivX", "OTS$DIV_X", 0 }, 3015 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 }, 3016 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 }, 3017 { LT, "_OtsLssX", "OTS$LSS_X", 0 }, 3018 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 }, 3019 { GT, "_OtsGtrX", "OTS$GTR_X", 0 }, 3020 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 }, 3021 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 }, 3022 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 }, 3023 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 }, 3024 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 }, 3025 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 } 3026}; 3027 3028static GTY(()) struct xfloating_op vax_cvt_ops[] = 3029{ 3030 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 }, 3031 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 } 3032}; 3033 3034static rtx 3035alpha_lookup_xfloating_lib_func (enum rtx_code code) 3036{ 3037 struct xfloating_op *ops = xfloating_ops; 3038 long n = ARRAY_SIZE (xfloating_ops); 3039 long i; 3040 3041 gcc_assert (TARGET_HAS_XFLOATING_LIBS); 3042 3043 /* How irritating. Nothing to key off for the main table. */ 3044 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE)) 3045 { 3046 ops = vax_cvt_ops; 3047 n = ARRAY_SIZE (vax_cvt_ops); 3048 } 3049 3050 for (i = 0; i < n; ++i, ++ops) 3051 if (ops->code == code) 3052 { 3053 rtx func = ops->libcall; 3054 if (!func) 3055 { 3056 func = init_one_libfunc (TARGET_ABI_OPEN_VMS 3057 ? ops->vms_func : ops->osf_func); 3058 ops->libcall = func; 3059 } 3060 return func; 3061 } 3062 3063 gcc_unreachable (); 3064} 3065 3066/* Most X_floating operations take the rounding mode as an argument. 3067 Compute that here. */ 3068 3069static int 3070alpha_compute_xfloating_mode_arg (enum rtx_code code, 3071 enum alpha_fp_rounding_mode round) 3072{ 3073 int mode; 3074 3075 switch (round) 3076 { 3077 case ALPHA_FPRM_NORM: 3078 mode = 2; 3079 break; 3080 case ALPHA_FPRM_MINF: 3081 mode = 1; 3082 break; 3083 case ALPHA_FPRM_CHOP: 3084 mode = 0; 3085 break; 3086 case ALPHA_FPRM_DYN: 3087 mode = 4; 3088 break; 3089 default: 3090 gcc_unreachable (); 3091 3092 /* XXX For reference, round to +inf is mode = 3. */ 3093 } 3094 3095 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N) 3096 mode |= 0x10000; 3097 3098 return mode; 3099} 3100 3101/* Emit an X_floating library function call. 3102 3103 Note that these functions do not follow normal calling conventions: 3104 TFmode arguments are passed in two integer registers (as opposed to 3105 indirect); TFmode return values appear in R16+R17. 3106 3107 FUNC is the function to call. 3108 TARGET is where the output belongs. 3109 OPERANDS are the inputs. 3110 NOPERANDS is the count of inputs. 3111 EQUIV is the expression equivalent for the function. 3112*/ 3113 3114static void 3115alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[], 3116 int noperands, rtx equiv) 3117{ 3118 rtx usage = NULL_RTX, tmp, reg; 3119 int regno = 16, i; 3120 3121 start_sequence (); 3122 3123 for (i = 0; i < noperands; ++i) 3124 { 3125 switch (GET_MODE (operands[i])) 3126 { 3127 case TFmode: 3128 reg = gen_rtx_REG (TFmode, regno); 3129 regno += 2; 3130 break; 3131 3132 case DFmode: 3133 reg = gen_rtx_REG (DFmode, regno + 32); 3134 regno += 1; 3135 break; 3136 3137 case VOIDmode: 3138 gcc_assert (CONST_INT_P (operands[i])); 3139 /* FALLTHRU */ 3140 case DImode: 3141 reg = gen_rtx_REG (DImode, regno); 3142 regno += 1; 3143 break; 3144 3145 default: 3146 gcc_unreachable (); 3147 } 3148 3149 emit_move_insn (reg, operands[i]); 3150 use_reg (&usage, reg); 3151 } 3152 3153 switch (GET_MODE (target)) 3154 { 3155 case TFmode: 3156 reg = gen_rtx_REG (TFmode, 16); 3157 break; 3158 case DFmode: 3159 reg = gen_rtx_REG (DFmode, 32); 3160 break; 3161 case DImode: 3162 reg = gen_rtx_REG (DImode, 0); 3163 break; 3164 default: 3165 gcc_unreachable (); 3166 } 3167 3168 tmp = gen_rtx_MEM (QImode, func); 3169 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx, 3170 const0_rtx, const0_rtx)); 3171 CALL_INSN_FUNCTION_USAGE (tmp) = usage; 3172 RTL_CONST_CALL_P (tmp) = 1; 3173 3174 tmp = get_insns (); 3175 end_sequence (); 3176 3177 emit_libcall_block (tmp, target, reg, equiv); 3178} 3179 3180/* Emit an X_floating library function call for arithmetic (+,-,*,/). */ 3181 3182void 3183alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[]) 3184{ 3185 rtx func; 3186 int mode; 3187 rtx out_operands[3]; 3188 3189 func = alpha_lookup_xfloating_lib_func (code); 3190 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm); 3191 3192 out_operands[0] = operands[1]; 3193 out_operands[1] = operands[2]; 3194 out_operands[2] = GEN_INT (mode); 3195 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3, 3196 gen_rtx_fmt_ee (code, TFmode, operands[1], 3197 operands[2])); 3198} 3199 3200/* Emit an X_floating library function call for a comparison. */ 3201 3202static rtx 3203alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1) 3204{ 3205 enum rtx_code cmp_code, res_code; 3206 rtx func, out, operands[2], note; 3207 3208 /* X_floating library comparison functions return 3209 -1 unordered 3210 0 false 3211 1 true 3212 Convert the compare against the raw return value. */ 3213 3214 cmp_code = *pcode; 3215 switch (cmp_code) 3216 { 3217 case UNORDERED: 3218 cmp_code = EQ; 3219 res_code = LT; 3220 break; 3221 case ORDERED: 3222 cmp_code = EQ; 3223 res_code = GE; 3224 break; 3225 case NE: 3226 res_code = NE; 3227 break; 3228 case EQ: 3229 case LT: 3230 case GT: 3231 case LE: 3232 case GE: 3233 res_code = GT; 3234 break; 3235 default: 3236 gcc_unreachable (); 3237 } 3238 *pcode = res_code; 3239 3240 func = alpha_lookup_xfloating_lib_func (cmp_code); 3241 3242 operands[0] = op0; 3243 operands[1] = op1; 3244 out = gen_reg_rtx (DImode); 3245 3246 /* What's actually returned is -1,0,1, not a proper boolean value. */ 3247 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1); 3248 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE); 3249 alpha_emit_xfloating_libcall (func, out, operands, 2, note); 3250 3251 return out; 3252} 3253 3254/* Emit an X_floating library function call for a conversion. */ 3255 3256void 3257alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[]) 3258{ 3259 int noperands = 1, mode; 3260 rtx out_operands[2]; 3261 rtx func; 3262 enum rtx_code code = orig_code; 3263 3264 if (code == UNSIGNED_FIX) 3265 code = FIX; 3266 3267 func = alpha_lookup_xfloating_lib_func (code); 3268 3269 out_operands[0] = operands[1]; 3270 3271 switch (code) 3272 { 3273 case FIX: 3274 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP); 3275 out_operands[1] = GEN_INT (mode); 3276 noperands = 2; 3277 break; 3278 case FLOAT_TRUNCATE: 3279 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm); 3280 out_operands[1] = GEN_INT (mode); 3281 noperands = 2; 3282 break; 3283 default: 3284 break; 3285 } 3286 3287 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands, 3288 gen_rtx_fmt_e (orig_code, 3289 GET_MODE (operands[0]), 3290 operands[1])); 3291} 3292 3293/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of 3294 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true, 3295 guarantee that the sequence 3296 set (OP[0] OP[2]) 3297 set (OP[1] OP[3]) 3298 is valid. Naturally, output operand ordering is little-endian. 3299 This is used by *movtf_internal and *movti_internal. */ 3300 3301void 3302alpha_split_tmode_pair (rtx operands[4], machine_mode mode, 3303 bool fixup_overlap) 3304{ 3305 switch (GET_CODE (operands[1])) 3306 { 3307 case REG: 3308 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); 3309 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1])); 3310 break; 3311 3312 case MEM: 3313 operands[3] = adjust_address (operands[1], DImode, 8); 3314 operands[2] = adjust_address (operands[1], DImode, 0); 3315 break; 3316 3317 case CONST_INT: 3318 case CONST_DOUBLE: 3319 gcc_assert (operands[1] == CONST0_RTX (mode)); 3320 operands[2] = operands[3] = const0_rtx; 3321 break; 3322 3323 default: 3324 gcc_unreachable (); 3325 } 3326 3327 switch (GET_CODE (operands[0])) 3328 { 3329 case REG: 3330 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1); 3331 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); 3332 break; 3333 3334 case MEM: 3335 operands[1] = adjust_address (operands[0], DImode, 8); 3336 operands[0] = adjust_address (operands[0], DImode, 0); 3337 break; 3338 3339 default: 3340 gcc_unreachable (); 3341 } 3342 3343 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3])) 3344 { 3345 std::swap (operands[0], operands[1]); 3346 std::swap (operands[2], operands[3]); 3347 } 3348} 3349 3350/* Implement negtf2 or abstf2. Op0 is destination, op1 is source, 3351 op2 is a register containing the sign bit, operation is the 3352 logical operation to be performed. */ 3353 3354void 3355alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx)) 3356{ 3357 rtx high_bit = operands[2]; 3358 rtx scratch; 3359 int move; 3360 3361 alpha_split_tmode_pair (operands, TFmode, false); 3362 3363 /* Detect three flavors of operand overlap. */ 3364 move = 1; 3365 if (rtx_equal_p (operands[0], operands[2])) 3366 move = 0; 3367 else if (rtx_equal_p (operands[1], operands[2])) 3368 { 3369 if (rtx_equal_p (operands[0], high_bit)) 3370 move = 2; 3371 else 3372 move = -1; 3373 } 3374 3375 if (move < 0) 3376 emit_move_insn (operands[0], operands[2]); 3377 3378 /* ??? If the destination overlaps both source tf and high_bit, then 3379 assume source tf is dead in its entirety and use the other half 3380 for a scratch register. Otherwise "scratch" is just the proper 3381 destination register. */ 3382 scratch = operands[move < 2 ? 1 : 3]; 3383 3384 emit_insn ((*operation) (scratch, high_bit, operands[3])); 3385 3386 if (move > 0) 3387 { 3388 emit_move_insn (operands[0], operands[2]); 3389 if (move > 1) 3390 emit_move_insn (operands[1], scratch); 3391 } 3392} 3393 3394/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting 3395 unaligned data: 3396 3397 unsigned: signed: 3398 word: ldq_u r1,X(r11) ldq_u r1,X(r11) 3399 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11) 3400 lda r3,X(r11) lda r3,X+2(r11) 3401 extwl r1,r3,r1 extql r1,r3,r1 3402 extwh r2,r3,r2 extqh r2,r3,r2 3403 or r1.r2.r1 or r1,r2,r1 3404 sra r1,48,r1 3405 3406 long: ldq_u r1,X(r11) ldq_u r1,X(r11) 3407 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11) 3408 lda r3,X(r11) lda r3,X(r11) 3409 extll r1,r3,r1 extll r1,r3,r1 3410 extlh r2,r3,r2 extlh r2,r3,r2 3411 or r1.r2.r1 addl r1,r2,r1 3412 3413 quad: ldq_u r1,X(r11) 3414 ldq_u r2,X+7(r11) 3415 lda r3,X(r11) 3416 extql r1,r3,r1 3417 extqh r2,r3,r2 3418 or r1.r2.r1 3419*/ 3420 3421void 3422alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size, 3423 HOST_WIDE_INT ofs, int sign) 3424{ 3425 rtx meml, memh, addr, extl, exth, tmp, mema; 3426 machine_mode mode; 3427 3428 if (TARGET_BWX && size == 2) 3429 { 3430 meml = adjust_address (mem, QImode, ofs); 3431 memh = adjust_address (mem, QImode, ofs+1); 3432 extl = gen_reg_rtx (DImode); 3433 exth = gen_reg_rtx (DImode); 3434 emit_insn (gen_zero_extendqidi2 (extl, meml)); 3435 emit_insn (gen_zero_extendqidi2 (exth, memh)); 3436 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8), 3437 NULL, 1, OPTAB_LIB_WIDEN); 3438 addr = expand_simple_binop (DImode, IOR, extl, exth, 3439 NULL, 1, OPTAB_LIB_WIDEN); 3440 3441 if (sign && GET_MODE (tgt) != HImode) 3442 { 3443 addr = gen_lowpart (HImode, addr); 3444 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0)); 3445 } 3446 else 3447 { 3448 if (GET_MODE (tgt) != DImode) 3449 addr = gen_lowpart (GET_MODE (tgt), addr); 3450 emit_move_insn (tgt, addr); 3451 } 3452 return; 3453 } 3454 3455 meml = gen_reg_rtx (DImode); 3456 memh = gen_reg_rtx (DImode); 3457 addr = gen_reg_rtx (DImode); 3458 extl = gen_reg_rtx (DImode); 3459 exth = gen_reg_rtx (DImode); 3460 3461 mema = XEXP (mem, 0); 3462 if (GET_CODE (mema) == LO_SUM) 3463 mema = force_reg (Pmode, mema); 3464 3465 /* AND addresses cannot be in any alias set, since they may implicitly 3466 alias surrounding code. Ideally we'd have some alias set that 3467 covered all types except those with alignment 8 or higher. */ 3468 3469 tmp = change_address (mem, DImode, 3470 gen_rtx_AND (DImode, 3471 plus_constant (DImode, mema, ofs), 3472 GEN_INT (-8))); 3473 set_mem_alias_set (tmp, 0); 3474 emit_move_insn (meml, tmp); 3475 3476 tmp = change_address (mem, DImode, 3477 gen_rtx_AND (DImode, 3478 plus_constant (DImode, mema, 3479 ofs + size - 1), 3480 GEN_INT (-8))); 3481 set_mem_alias_set (tmp, 0); 3482 emit_move_insn (memh, tmp); 3483 3484 if (sign && size == 2) 3485 { 3486 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2)); 3487 3488 emit_insn (gen_extql (extl, meml, addr)); 3489 emit_insn (gen_extqh (exth, memh, addr)); 3490 3491 /* We must use tgt here for the target. Alpha-vms port fails if we use 3492 addr for the target, because addr is marked as a pointer and combine 3493 knows that pointers are always sign-extended 32-bit values. */ 3494 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN); 3495 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48), 3496 addr, 1, OPTAB_WIDEN); 3497 } 3498 else 3499 { 3500 emit_move_insn (addr, plus_constant (Pmode, mema, ofs)); 3501 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr)); 3502 switch ((int) size) 3503 { 3504 case 2: 3505 emit_insn (gen_extwh (exth, memh, addr)); 3506 mode = HImode; 3507 break; 3508 case 4: 3509 emit_insn (gen_extlh (exth, memh, addr)); 3510 mode = SImode; 3511 break; 3512 case 8: 3513 emit_insn (gen_extqh (exth, memh, addr)); 3514 mode = DImode; 3515 break; 3516 default: 3517 gcc_unreachable (); 3518 } 3519 3520 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl), 3521 gen_lowpart (mode, exth), gen_lowpart (mode, tgt), 3522 sign, OPTAB_WIDEN); 3523 } 3524 3525 if (addr != tgt) 3526 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr)); 3527} 3528 3529/* Similarly, use ins and msk instructions to perform unaligned stores. */ 3530 3531void 3532alpha_expand_unaligned_store (rtx dst, rtx src, 3533 HOST_WIDE_INT size, HOST_WIDE_INT ofs) 3534{ 3535 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta; 3536 3537 if (TARGET_BWX && size == 2) 3538 { 3539 if (src != const0_rtx) 3540 { 3541 dstl = gen_lowpart (QImode, src); 3542 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8), 3543 NULL, 1, OPTAB_LIB_WIDEN); 3544 dsth = gen_lowpart (QImode, dsth); 3545 } 3546 else 3547 dstl = dsth = const0_rtx; 3548 3549 meml = adjust_address (dst, QImode, ofs); 3550 memh = adjust_address (dst, QImode, ofs+1); 3551 3552 emit_move_insn (meml, dstl); 3553 emit_move_insn (memh, dsth); 3554 return; 3555 } 3556 3557 dstl = gen_reg_rtx (DImode); 3558 dsth = gen_reg_rtx (DImode); 3559 insl = gen_reg_rtx (DImode); 3560 insh = gen_reg_rtx (DImode); 3561 3562 dsta = XEXP (dst, 0); 3563 if (GET_CODE (dsta) == LO_SUM) 3564 dsta = force_reg (Pmode, dsta); 3565 3566 /* AND addresses cannot be in any alias set, since they may implicitly 3567 alias surrounding code. Ideally we'd have some alias set that 3568 covered all types except those with alignment 8 or higher. */ 3569 3570 meml = change_address (dst, DImode, 3571 gen_rtx_AND (DImode, 3572 plus_constant (DImode, dsta, ofs), 3573 GEN_INT (-8))); 3574 set_mem_alias_set (meml, 0); 3575 3576 memh = change_address (dst, DImode, 3577 gen_rtx_AND (DImode, 3578 plus_constant (DImode, dsta, 3579 ofs + size - 1), 3580 GEN_INT (-8))); 3581 set_mem_alias_set (memh, 0); 3582 3583 emit_move_insn (dsth, memh); 3584 emit_move_insn (dstl, meml); 3585 3586 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs)); 3587 3588 if (src != CONST0_RTX (GET_MODE (src))) 3589 { 3590 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src), 3591 GEN_INT (size*8), addr)); 3592 3593 switch ((int) size) 3594 { 3595 case 2: 3596 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr)); 3597 break; 3598 case 4: 3599 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr)); 3600 break; 3601 case 8: 3602 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr)); 3603 break; 3604 default: 3605 gcc_unreachable (); 3606 } 3607 } 3608 3609 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr)); 3610 3611 switch ((int) size) 3612 { 3613 case 2: 3614 emit_insn (gen_mskwl (dstl, dstl, addr)); 3615 break; 3616 case 4: 3617 emit_insn (gen_mskll (dstl, dstl, addr)); 3618 break; 3619 case 8: 3620 emit_insn (gen_mskql (dstl, dstl, addr)); 3621 break; 3622 default: 3623 gcc_unreachable (); 3624 } 3625 3626 if (src != CONST0_RTX (GET_MODE (src))) 3627 { 3628 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN); 3629 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN); 3630 } 3631 3632 /* Must store high before low for degenerate case of aligned. */ 3633 emit_move_insn (memh, dsth); 3634 emit_move_insn (meml, dstl); 3635} 3636 3637/* The block move code tries to maximize speed by separating loads and 3638 stores at the expense of register pressure: we load all of the data 3639 before we store it back out. There are two secondary effects worth 3640 mentioning, that this speeds copying to/from aligned and unaligned 3641 buffers, and that it makes the code significantly easier to write. */ 3642 3643#define MAX_MOVE_WORDS 8 3644 3645/* Load an integral number of consecutive unaligned quadwords. */ 3646 3647static void 3648alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem, 3649 HOST_WIDE_INT words, HOST_WIDE_INT ofs) 3650{ 3651 rtx const im8 = GEN_INT (-8); 3652 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1]; 3653 rtx sreg, areg, tmp, smema; 3654 HOST_WIDE_INT i; 3655 3656 smema = XEXP (smem, 0); 3657 if (GET_CODE (smema) == LO_SUM) 3658 smema = force_reg (Pmode, smema); 3659 3660 /* Generate all the tmp registers we need. */ 3661 for (i = 0; i < words; ++i) 3662 { 3663 data_regs[i] = out_regs[i]; 3664 ext_tmps[i] = gen_reg_rtx (DImode); 3665 } 3666 data_regs[words] = gen_reg_rtx (DImode); 3667 3668 if (ofs != 0) 3669 smem = adjust_address (smem, GET_MODE (smem), ofs); 3670 3671 /* Load up all of the source data. */ 3672 for (i = 0; i < words; ++i) 3673 { 3674 tmp = change_address (smem, DImode, 3675 gen_rtx_AND (DImode, 3676 plus_constant (DImode, smema, 8*i), 3677 im8)); 3678 set_mem_alias_set (tmp, 0); 3679 emit_move_insn (data_regs[i], tmp); 3680 } 3681 3682 tmp = change_address (smem, DImode, 3683 gen_rtx_AND (DImode, 3684 plus_constant (DImode, smema, 3685 8*words - 1), 3686 im8)); 3687 set_mem_alias_set (tmp, 0); 3688 emit_move_insn (data_regs[words], tmp); 3689 3690 /* Extract the half-word fragments. Unfortunately DEC decided to make 3691 extxh with offset zero a noop instead of zeroing the register, so 3692 we must take care of that edge condition ourselves with cmov. */ 3693 3694 sreg = copy_addr_to_reg (smema); 3695 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL, 3696 1, OPTAB_WIDEN); 3697 for (i = 0; i < words; ++i) 3698 { 3699 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg)); 3700 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg)); 3701 emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i], 3702 gen_rtx_IF_THEN_ELSE (DImode, 3703 gen_rtx_EQ (DImode, areg, 3704 const0_rtx), 3705 const0_rtx, ext_tmps[i]))); 3706 } 3707 3708 /* Merge the half-words into whole words. */ 3709 for (i = 0; i < words; ++i) 3710 { 3711 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i], 3712 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN); 3713 } 3714} 3715 3716/* Store an integral number of consecutive unaligned quadwords. DATA_REGS 3717 may be NULL to store zeros. */ 3718 3719static void 3720alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem, 3721 HOST_WIDE_INT words, HOST_WIDE_INT ofs) 3722{ 3723 rtx const im8 = GEN_INT (-8); 3724 rtx ins_tmps[MAX_MOVE_WORDS]; 3725 rtx st_tmp_1, st_tmp_2, dreg; 3726 rtx st_addr_1, st_addr_2, dmema; 3727 HOST_WIDE_INT i; 3728 3729 dmema = XEXP (dmem, 0); 3730 if (GET_CODE (dmema) == LO_SUM) 3731 dmema = force_reg (Pmode, dmema); 3732 3733 /* Generate all the tmp registers we need. */ 3734 if (data_regs != NULL) 3735 for (i = 0; i < words; ++i) 3736 ins_tmps[i] = gen_reg_rtx(DImode); 3737 st_tmp_1 = gen_reg_rtx(DImode); 3738 st_tmp_2 = gen_reg_rtx(DImode); 3739 3740 if (ofs != 0) 3741 dmem = adjust_address (dmem, GET_MODE (dmem), ofs); 3742 3743 st_addr_2 = change_address (dmem, DImode, 3744 gen_rtx_AND (DImode, 3745 plus_constant (DImode, dmema, 3746 words*8 - 1), 3747 im8)); 3748 set_mem_alias_set (st_addr_2, 0); 3749 3750 st_addr_1 = change_address (dmem, DImode, 3751 gen_rtx_AND (DImode, dmema, im8)); 3752 set_mem_alias_set (st_addr_1, 0); 3753 3754 /* Load up the destination end bits. */ 3755 emit_move_insn (st_tmp_2, st_addr_2); 3756 emit_move_insn (st_tmp_1, st_addr_1); 3757 3758 /* Shift the input data into place. */ 3759 dreg = copy_addr_to_reg (dmema); 3760 if (data_regs != NULL) 3761 { 3762 for (i = words-1; i >= 0; --i) 3763 { 3764 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg)); 3765 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg)); 3766 } 3767 for (i = words-1; i > 0; --i) 3768 { 3769 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i], 3770 ins_tmps[i-1], ins_tmps[i-1], 1, 3771 OPTAB_WIDEN); 3772 } 3773 } 3774 3775 /* Split and merge the ends with the destination data. */ 3776 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg)); 3777 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg)); 3778 3779 if (data_regs != NULL) 3780 { 3781 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1], 3782 st_tmp_2, 1, OPTAB_WIDEN); 3783 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0], 3784 st_tmp_1, 1, OPTAB_WIDEN); 3785 } 3786 3787 /* Store it all. */ 3788 emit_move_insn (st_addr_2, st_tmp_2); 3789 for (i = words-1; i > 0; --i) 3790 { 3791 rtx tmp = change_address (dmem, DImode, 3792 gen_rtx_AND (DImode, 3793 plus_constant (DImode, 3794 dmema, i*8), 3795 im8)); 3796 set_mem_alias_set (tmp, 0); 3797 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx); 3798 } 3799 emit_move_insn (st_addr_1, st_tmp_1); 3800} 3801 3802 3803/* Expand string/block move operations. 3804 3805 operands[0] is the pointer to the destination. 3806 operands[1] is the pointer to the source. 3807 operands[2] is the number of bytes to move. 3808 operands[3] is the alignment. */ 3809 3810int 3811alpha_expand_block_move (rtx operands[]) 3812{ 3813 rtx bytes_rtx = operands[2]; 3814 rtx align_rtx = operands[3]; 3815 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); 3816 HOST_WIDE_INT bytes = orig_bytes; 3817 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT; 3818 HOST_WIDE_INT dst_align = src_align; 3819 rtx orig_src = operands[1]; 3820 rtx orig_dst = operands[0]; 3821 rtx data_regs[2 * MAX_MOVE_WORDS + 16]; 3822 rtx tmp; 3823 unsigned int i, words, ofs, nregs = 0; 3824 3825 if (orig_bytes <= 0) 3826 return 1; 3827 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) 3828 return 0; 3829 3830 /* Look for additional alignment information from recorded register info. */ 3831 3832 tmp = XEXP (orig_src, 0); 3833 if (REG_P (tmp)) 3834 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp))); 3835 else if (GET_CODE (tmp) == PLUS 3836 && REG_P (XEXP (tmp, 0)) 3837 && CONST_INT_P (XEXP (tmp, 1))) 3838 { 3839 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); 3840 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); 3841 3842 if (a > src_align) 3843 { 3844 if (a >= 64 && c % 8 == 0) 3845 src_align = 64; 3846 else if (a >= 32 && c % 4 == 0) 3847 src_align = 32; 3848 else if (a >= 16 && c % 2 == 0) 3849 src_align = 16; 3850 } 3851 } 3852 3853 tmp = XEXP (orig_dst, 0); 3854 if (REG_P (tmp)) 3855 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp))); 3856 else if (GET_CODE (tmp) == PLUS 3857 && REG_P (XEXP (tmp, 0)) 3858 && CONST_INT_P (XEXP (tmp, 1))) 3859 { 3860 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); 3861 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); 3862 3863 if (a > dst_align) 3864 { 3865 if (a >= 64 && c % 8 == 0) 3866 dst_align = 64; 3867 else if (a >= 32 && c % 4 == 0) 3868 dst_align = 32; 3869 else if (a >= 16 && c % 2 == 0) 3870 dst_align = 16; 3871 } 3872 } 3873 3874 ofs = 0; 3875 if (src_align >= 64 && bytes >= 8) 3876 { 3877 words = bytes / 8; 3878 3879 for (i = 0; i < words; ++i) 3880 data_regs[nregs + i] = gen_reg_rtx (DImode); 3881 3882 for (i = 0; i < words; ++i) 3883 emit_move_insn (data_regs[nregs + i], 3884 adjust_address (orig_src, DImode, ofs + i * 8)); 3885 3886 nregs += words; 3887 bytes -= words * 8; 3888 ofs += words * 8; 3889 } 3890 3891 if (src_align >= 32 && bytes >= 4) 3892 { 3893 words = bytes / 4; 3894 3895 for (i = 0; i < words; ++i) 3896 data_regs[nregs + i] = gen_reg_rtx (SImode); 3897 3898 for (i = 0; i < words; ++i) 3899 emit_move_insn (data_regs[nregs + i], 3900 adjust_address (orig_src, SImode, ofs + i * 4)); 3901 3902 nregs += words; 3903 bytes -= words * 4; 3904 ofs += words * 4; 3905 } 3906 3907 if (bytes >= 8) 3908 { 3909 words = bytes / 8; 3910 3911 for (i = 0; i < words+1; ++i) 3912 data_regs[nregs + i] = gen_reg_rtx (DImode); 3913 3914 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src, 3915 words, ofs); 3916 3917 nregs += words; 3918 bytes -= words * 8; 3919 ofs += words * 8; 3920 } 3921 3922 if (! TARGET_BWX && bytes >= 4) 3923 { 3924 data_regs[nregs++] = tmp = gen_reg_rtx (SImode); 3925 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0); 3926 bytes -= 4; 3927 ofs += 4; 3928 } 3929 3930 if (bytes >= 2) 3931 { 3932 if (src_align >= 16) 3933 { 3934 do { 3935 data_regs[nregs++] = tmp = gen_reg_rtx (HImode); 3936 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs)); 3937 bytes -= 2; 3938 ofs += 2; 3939 } while (bytes >= 2); 3940 } 3941 else if (! TARGET_BWX) 3942 { 3943 data_regs[nregs++] = tmp = gen_reg_rtx (HImode); 3944 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0); 3945 bytes -= 2; 3946 ofs += 2; 3947 } 3948 } 3949 3950 while (bytes > 0) 3951 { 3952 data_regs[nregs++] = tmp = gen_reg_rtx (QImode); 3953 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs)); 3954 bytes -= 1; 3955 ofs += 1; 3956 } 3957 3958 gcc_assert (nregs <= ARRAY_SIZE (data_regs)); 3959 3960 /* Now save it back out again. */ 3961 3962 i = 0, ofs = 0; 3963 3964 /* Write out the data in whatever chunks reading the source allowed. */ 3965 if (dst_align >= 64) 3966 { 3967 while (i < nregs && GET_MODE (data_regs[i]) == DImode) 3968 { 3969 emit_move_insn (adjust_address (orig_dst, DImode, ofs), 3970 data_regs[i]); 3971 ofs += 8; 3972 i++; 3973 } 3974 } 3975 3976 if (dst_align >= 32) 3977 { 3978 /* If the source has remaining DImode regs, write them out in 3979 two pieces. */ 3980 while (i < nregs && GET_MODE (data_regs[i]) == DImode) 3981 { 3982 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32), 3983 NULL_RTX, 1, OPTAB_WIDEN); 3984 3985 emit_move_insn (adjust_address (orig_dst, SImode, ofs), 3986 gen_lowpart (SImode, data_regs[i])); 3987 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4), 3988 gen_lowpart (SImode, tmp)); 3989 ofs += 8; 3990 i++; 3991 } 3992 3993 while (i < nregs && GET_MODE (data_regs[i]) == SImode) 3994 { 3995 emit_move_insn (adjust_address (orig_dst, SImode, ofs), 3996 data_regs[i]); 3997 ofs += 4; 3998 i++; 3999 } 4000 } 4001 4002 if (i < nregs && GET_MODE (data_regs[i]) == DImode) 4003 { 4004 /* Write out a remaining block of words using unaligned methods. */ 4005 4006 for (words = 1; i + words < nregs; words++) 4007 if (GET_MODE (data_regs[i + words]) != DImode) 4008 break; 4009 4010 if (words == 1) 4011 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs); 4012 else 4013 alpha_expand_unaligned_store_words (data_regs + i, orig_dst, 4014 words, ofs); 4015 4016 i += words; 4017 ofs += words * 8; 4018 } 4019 4020 /* Due to the above, this won't be aligned. */ 4021 /* ??? If we have more than one of these, consider constructing full 4022 words in registers and using alpha_expand_unaligned_store_words. */ 4023 while (i < nregs && GET_MODE (data_regs[i]) == SImode) 4024 { 4025 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); 4026 ofs += 4; 4027 i++; 4028 } 4029 4030 if (dst_align >= 16) 4031 while (i < nregs && GET_MODE (data_regs[i]) == HImode) 4032 { 4033 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]); 4034 i++; 4035 ofs += 2; 4036 } 4037 else 4038 while (i < nregs && GET_MODE (data_regs[i]) == HImode) 4039 { 4040 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs); 4041 i++; 4042 ofs += 2; 4043 } 4044 4045 /* The remainder must be byte copies. */ 4046 while (i < nregs) 4047 { 4048 gcc_assert (GET_MODE (data_regs[i]) == QImode); 4049 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]); 4050 i++; 4051 ofs += 1; 4052 } 4053 4054 return 1; 4055} 4056 4057int 4058alpha_expand_block_clear (rtx operands[]) 4059{ 4060 rtx bytes_rtx = operands[1]; 4061 rtx align_rtx = operands[3]; 4062 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); 4063 HOST_WIDE_INT bytes = orig_bytes; 4064 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT; 4065 HOST_WIDE_INT alignofs = 0; 4066 rtx orig_dst = operands[0]; 4067 rtx tmp; 4068 int i, words, ofs = 0; 4069 4070 if (orig_bytes <= 0) 4071 return 1; 4072 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) 4073 return 0; 4074 4075 /* Look for stricter alignment. */ 4076 tmp = XEXP (orig_dst, 0); 4077 if (REG_P (tmp)) 4078 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp))); 4079 else if (GET_CODE (tmp) == PLUS 4080 && REG_P (XEXP (tmp, 0)) 4081 && CONST_INT_P (XEXP (tmp, 1))) 4082 { 4083 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); 4084 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); 4085 4086 if (a > align) 4087 { 4088 if (a >= 64) 4089 align = a, alignofs = 8 - c % 8; 4090 else if (a >= 32) 4091 align = a, alignofs = 4 - c % 4; 4092 else if (a >= 16) 4093 align = a, alignofs = 2 - c % 2; 4094 } 4095 } 4096 4097 /* Handle an unaligned prefix first. */ 4098 4099 if (alignofs > 0) 4100 { 4101#if HOST_BITS_PER_WIDE_INT >= 64 4102 /* Given that alignofs is bounded by align, the only time BWX could 4103 generate three stores is for a 7 byte fill. Prefer two individual 4104 stores over a load/mask/store sequence. */ 4105 if ((!TARGET_BWX || alignofs == 7) 4106 && align >= 32 4107 && !(alignofs == 4 && bytes >= 4)) 4108 { 4109 machine_mode mode = (align >= 64 ? DImode : SImode); 4110 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs; 4111 rtx mem, tmp; 4112 HOST_WIDE_INT mask; 4113 4114 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs); 4115 set_mem_alias_set (mem, 0); 4116 4117 mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8)); 4118 if (bytes < alignofs) 4119 { 4120 mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8); 4121 ofs += bytes; 4122 bytes = 0; 4123 } 4124 else 4125 { 4126 bytes -= alignofs; 4127 ofs += alignofs; 4128 } 4129 alignofs = 0; 4130 4131 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), 4132 NULL_RTX, 1, OPTAB_WIDEN); 4133 4134 emit_move_insn (mem, tmp); 4135 } 4136#endif 4137 4138 if (TARGET_BWX && (alignofs & 1) && bytes >= 1) 4139 { 4140 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); 4141 bytes -= 1; 4142 ofs += 1; 4143 alignofs -= 1; 4144 } 4145 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2) 4146 { 4147 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx); 4148 bytes -= 2; 4149 ofs += 2; 4150 alignofs -= 2; 4151 } 4152 if (alignofs == 4 && bytes >= 4) 4153 { 4154 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); 4155 bytes -= 4; 4156 ofs += 4; 4157 alignofs = 0; 4158 } 4159 4160 /* If we've not used the extra lead alignment information by now, 4161 we won't be able to. Downgrade align to match what's left over. */ 4162 if (alignofs > 0) 4163 { 4164 alignofs = alignofs & -alignofs; 4165 align = MIN (align, alignofs * BITS_PER_UNIT); 4166 } 4167 } 4168 4169 /* Handle a block of contiguous long-words. */ 4170 4171 if (align >= 64 && bytes >= 8) 4172 { 4173 words = bytes / 8; 4174 4175 for (i = 0; i < words; ++i) 4176 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8), 4177 const0_rtx); 4178 4179 bytes -= words * 8; 4180 ofs += words * 8; 4181 } 4182 4183 /* If the block is large and appropriately aligned, emit a single 4184 store followed by a sequence of stq_u insns. */ 4185 4186 if (align >= 32 && bytes > 16) 4187 { 4188 rtx orig_dsta; 4189 4190 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); 4191 bytes -= 4; 4192 ofs += 4; 4193 4194 orig_dsta = XEXP (orig_dst, 0); 4195 if (GET_CODE (orig_dsta) == LO_SUM) 4196 orig_dsta = force_reg (Pmode, orig_dsta); 4197 4198 words = bytes / 8; 4199 for (i = 0; i < words; ++i) 4200 { 4201 rtx mem 4202 = change_address (orig_dst, DImode, 4203 gen_rtx_AND (DImode, 4204 plus_constant (DImode, orig_dsta, 4205 ofs + i*8), 4206 GEN_INT (-8))); 4207 set_mem_alias_set (mem, 0); 4208 emit_move_insn (mem, const0_rtx); 4209 } 4210 4211 /* Depending on the alignment, the first stq_u may have overlapped 4212 with the initial stl, which means that the last stq_u didn't 4213 write as much as it would appear. Leave those questionable bytes 4214 unaccounted for. */ 4215 bytes -= words * 8 - 4; 4216 ofs += words * 8 - 4; 4217 } 4218 4219 /* Handle a smaller block of aligned words. */ 4220 4221 if ((align >= 64 && bytes == 4) 4222 || (align == 32 && bytes >= 4)) 4223 { 4224 words = bytes / 4; 4225 4226 for (i = 0; i < words; ++i) 4227 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4), 4228 const0_rtx); 4229 4230 bytes -= words * 4; 4231 ofs += words * 4; 4232 } 4233 4234 /* An unaligned block uses stq_u stores for as many as possible. */ 4235 4236 if (bytes >= 8) 4237 { 4238 words = bytes / 8; 4239 4240 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs); 4241 4242 bytes -= words * 8; 4243 ofs += words * 8; 4244 } 4245 4246 /* Next clean up any trailing pieces. */ 4247 4248#if HOST_BITS_PER_WIDE_INT >= 64 4249 /* Count the number of bits in BYTES for which aligned stores could 4250 be emitted. */ 4251 words = 0; 4252 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1) 4253 if (bytes & i) 4254 words += 1; 4255 4256 /* If we have appropriate alignment (and it wouldn't take too many 4257 instructions otherwise), mask out the bytes we need. */ 4258 if (TARGET_BWX ? words > 2 : bytes > 0) 4259 { 4260 if (align >= 64) 4261 { 4262 rtx mem, tmp; 4263 HOST_WIDE_INT mask; 4264 4265 mem = adjust_address (orig_dst, DImode, ofs); 4266 set_mem_alias_set (mem, 0); 4267 4268 mask = ~(HOST_WIDE_INT)0 << (bytes * 8); 4269 4270 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask), 4271 NULL_RTX, 1, OPTAB_WIDEN); 4272 4273 emit_move_insn (mem, tmp); 4274 return 1; 4275 } 4276 else if (align >= 32 && bytes < 4) 4277 { 4278 rtx mem, tmp; 4279 HOST_WIDE_INT mask; 4280 4281 mem = adjust_address (orig_dst, SImode, ofs); 4282 set_mem_alias_set (mem, 0); 4283 4284 mask = ~(HOST_WIDE_INT)0 << (bytes * 8); 4285 4286 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask), 4287 NULL_RTX, 1, OPTAB_WIDEN); 4288 4289 emit_move_insn (mem, tmp); 4290 return 1; 4291 } 4292 } 4293#endif 4294 4295 if (!TARGET_BWX && bytes >= 4) 4296 { 4297 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs); 4298 bytes -= 4; 4299 ofs += 4; 4300 } 4301 4302 if (bytes >= 2) 4303 { 4304 if (align >= 16) 4305 { 4306 do { 4307 emit_move_insn (adjust_address (orig_dst, HImode, ofs), 4308 const0_rtx); 4309 bytes -= 2; 4310 ofs += 2; 4311 } while (bytes >= 2); 4312 } 4313 else if (! TARGET_BWX) 4314 { 4315 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs); 4316 bytes -= 2; 4317 ofs += 2; 4318 } 4319 } 4320 4321 while (bytes > 0) 4322 { 4323 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); 4324 bytes -= 1; 4325 ofs += 1; 4326 } 4327 4328 return 1; 4329} 4330 4331/* Returns a mask so that zap(x, value) == x & mask. */ 4332 4333rtx 4334alpha_expand_zap_mask (HOST_WIDE_INT value) 4335{ 4336 rtx result; 4337 int i; 4338 4339 if (HOST_BITS_PER_WIDE_INT >= 64) 4340 { 4341 HOST_WIDE_INT mask = 0; 4342 4343 for (i = 7; i >= 0; --i) 4344 { 4345 mask <<= 8; 4346 if (!((value >> i) & 1)) 4347 mask |= 0xff; 4348 } 4349 4350 result = gen_int_mode (mask, DImode); 4351 } 4352 else 4353 { 4354 HOST_WIDE_INT mask_lo = 0, mask_hi = 0; 4355 4356 gcc_assert (HOST_BITS_PER_WIDE_INT == 32); 4357 4358 for (i = 7; i >= 4; --i) 4359 { 4360 mask_hi <<= 8; 4361 if (!((value >> i) & 1)) 4362 mask_hi |= 0xff; 4363 } 4364 4365 for (i = 3; i >= 0; --i) 4366 { 4367 mask_lo <<= 8; 4368 if (!((value >> i) & 1)) 4369 mask_lo |= 0xff; 4370 } 4371 4372 result = immed_double_const (mask_lo, mask_hi, DImode); 4373 } 4374 4375 return result; 4376} 4377 4378void 4379alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx), 4380 machine_mode mode, 4381 rtx op0, rtx op1, rtx op2) 4382{ 4383 op0 = gen_lowpart (mode, op0); 4384 4385 if (op1 == const0_rtx) 4386 op1 = CONST0_RTX (mode); 4387 else 4388 op1 = gen_lowpart (mode, op1); 4389 4390 if (op2 == const0_rtx) 4391 op2 = CONST0_RTX (mode); 4392 else 4393 op2 = gen_lowpart (mode, op2); 4394 4395 emit_insn ((*gen) (op0, op1, op2)); 4396} 4397 4398/* A subroutine of the atomic operation splitters. Jump to LABEL if 4399 COND is true. Mark the jump as unlikely to be taken. */ 4400 4401static void 4402emit_unlikely_jump (rtx cond, rtx label) 4403{ 4404 int very_unlikely = REG_BR_PROB_BASE / 100 - 1; 4405 rtx x; 4406 4407 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); 4408 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x)); 4409 add_int_reg_note (x, REG_BR_PROB, very_unlikely); 4410} 4411 4412/* A subroutine of the atomic operation splitters. Emit a load-locked 4413 instruction in MODE. */ 4414 4415static void 4416emit_load_locked (machine_mode mode, rtx reg, rtx mem) 4417{ 4418 rtx (*fn) (rtx, rtx) = NULL; 4419 if (mode == SImode) 4420 fn = gen_load_locked_si; 4421 else if (mode == DImode) 4422 fn = gen_load_locked_di; 4423 emit_insn (fn (reg, mem)); 4424} 4425 4426/* A subroutine of the atomic operation splitters. Emit a store-conditional 4427 instruction in MODE. */ 4428 4429static void 4430emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val) 4431{ 4432 rtx (*fn) (rtx, rtx, rtx) = NULL; 4433 if (mode == SImode) 4434 fn = gen_store_conditional_si; 4435 else if (mode == DImode) 4436 fn = gen_store_conditional_di; 4437 emit_insn (fn (res, mem, val)); 4438} 4439 4440/* Subroutines of the atomic operation splitters. Emit barriers 4441 as needed for the memory MODEL. */ 4442 4443static void 4444alpha_pre_atomic_barrier (enum memmodel model) 4445{ 4446 if (need_atomic_barrier_p (model, true)) 4447 emit_insn (gen_memory_barrier ()); 4448} 4449 4450static void 4451alpha_post_atomic_barrier (enum memmodel model) 4452{ 4453 if (need_atomic_barrier_p (model, false)) 4454 emit_insn (gen_memory_barrier ()); 4455} 4456 4457/* A subroutine of the atomic operation splitters. Emit an insxl 4458 instruction in MODE. */ 4459 4460static rtx 4461emit_insxl (machine_mode mode, rtx op1, rtx op2) 4462{ 4463 rtx ret = gen_reg_rtx (DImode); 4464 rtx (*fn) (rtx, rtx, rtx); 4465 4466 switch (mode) 4467 { 4468 case QImode: 4469 fn = gen_insbl; 4470 break; 4471 case HImode: 4472 fn = gen_inswl; 4473 break; 4474 case SImode: 4475 fn = gen_insll; 4476 break; 4477 case DImode: 4478 fn = gen_insql; 4479 break; 4480 default: 4481 gcc_unreachable (); 4482 } 4483 4484 op1 = force_reg (mode, op1); 4485 emit_insn (fn (ret, op1, op2)); 4486 4487 return ret; 4488} 4489 4490/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation 4491 to perform. MEM is the memory on which to operate. VAL is the second 4492 operand of the binary operator. BEFORE and AFTER are optional locations to 4493 return the value of MEM either before of after the operation. SCRATCH is 4494 a scratch register. */ 4495 4496void 4497alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before, 4498 rtx after, rtx scratch, enum memmodel model) 4499{ 4500 machine_mode mode = GET_MODE (mem); 4501 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch)); 4502 4503 alpha_pre_atomic_barrier (model); 4504 4505 label = gen_label_rtx (); 4506 emit_label (label); 4507 label = gen_rtx_LABEL_REF (DImode, label); 4508 4509 if (before == NULL) 4510 before = scratch; 4511 emit_load_locked (mode, before, mem); 4512 4513 if (code == NOT) 4514 { 4515 x = gen_rtx_AND (mode, before, val); 4516 emit_insn (gen_rtx_SET (VOIDmode, val, x)); 4517 4518 x = gen_rtx_NOT (mode, val); 4519 } 4520 else 4521 x = gen_rtx_fmt_ee (code, mode, before, val); 4522 if (after) 4523 emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x))); 4524 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 4525 4526 emit_store_conditional (mode, cond, mem, scratch); 4527 4528 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4529 emit_unlikely_jump (x, label); 4530 4531 alpha_post_atomic_barrier (model); 4532} 4533 4534/* Expand a compare and swap operation. */ 4535 4536void 4537alpha_split_compare_and_swap (rtx operands[]) 4538{ 4539 rtx cond, retval, mem, oldval, newval; 4540 bool is_weak; 4541 enum memmodel mod_s, mod_f; 4542 machine_mode mode; 4543 rtx label1, label2, x; 4544 4545 cond = operands[0]; 4546 retval = operands[1]; 4547 mem = operands[2]; 4548 oldval = operands[3]; 4549 newval = operands[4]; 4550 is_weak = (operands[5] != const0_rtx); 4551 mod_s = memmodel_from_int (INTVAL (operands[6])); 4552 mod_f = memmodel_from_int (INTVAL (operands[7])); 4553 mode = GET_MODE (mem); 4554 4555 alpha_pre_atomic_barrier (mod_s); 4556 4557 label1 = NULL_RTX; 4558 if (!is_weak) 4559 { 4560 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4561 emit_label (XEXP (label1, 0)); 4562 } 4563 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4564 4565 emit_load_locked (mode, retval, mem); 4566 4567 x = gen_lowpart (DImode, retval); 4568 if (oldval == const0_rtx) 4569 { 4570 emit_move_insn (cond, const0_rtx); 4571 x = gen_rtx_NE (DImode, x, const0_rtx); 4572 } 4573 else 4574 { 4575 x = gen_rtx_EQ (DImode, x, oldval); 4576 emit_insn (gen_rtx_SET (VOIDmode, cond, x)); 4577 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4578 } 4579 emit_unlikely_jump (x, label2); 4580 4581 emit_move_insn (cond, newval); 4582 emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond)); 4583 4584 if (!is_weak) 4585 { 4586 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4587 emit_unlikely_jump (x, label1); 4588 } 4589 4590 if (!is_mm_relaxed (mod_f)) 4591 emit_label (XEXP (label2, 0)); 4592 4593 alpha_post_atomic_barrier (mod_s); 4594 4595 if (is_mm_relaxed (mod_f)) 4596 emit_label (XEXP (label2, 0)); 4597} 4598 4599void 4600alpha_expand_compare_and_swap_12 (rtx operands[]) 4601{ 4602 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f; 4603 machine_mode mode; 4604 rtx addr, align, wdst; 4605 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx); 4606 4607 cond = operands[0]; 4608 dst = operands[1]; 4609 mem = operands[2]; 4610 oldval = operands[3]; 4611 newval = operands[4]; 4612 is_weak = operands[5]; 4613 mod_s = operands[6]; 4614 mod_f = operands[7]; 4615 mode = GET_MODE (mem); 4616 4617 /* We forced the address into a register via mem_noofs_operand. */ 4618 addr = XEXP (mem, 0); 4619 gcc_assert (register_operand (addr, DImode)); 4620 4621 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), 4622 NULL_RTX, 1, OPTAB_DIRECT); 4623 4624 oldval = convert_modes (DImode, mode, oldval, 1); 4625 4626 if (newval != const0_rtx) 4627 newval = emit_insxl (mode, newval, addr); 4628 4629 wdst = gen_reg_rtx (DImode); 4630 if (mode == QImode) 4631 gen = gen_atomic_compare_and_swapqi_1; 4632 else 4633 gen = gen_atomic_compare_and_swaphi_1; 4634 emit_insn (gen (cond, wdst, mem, oldval, newval, align, 4635 is_weak, mod_s, mod_f)); 4636 4637 emit_move_insn (dst, gen_lowpart (mode, wdst)); 4638} 4639 4640void 4641alpha_split_compare_and_swap_12 (rtx operands[]) 4642{ 4643 rtx cond, dest, orig_mem, oldval, newval, align, scratch; 4644 machine_mode mode; 4645 bool is_weak; 4646 enum memmodel mod_s, mod_f; 4647 rtx label1, label2, mem, addr, width, mask, x; 4648 4649 cond = operands[0]; 4650 dest = operands[1]; 4651 orig_mem = operands[2]; 4652 oldval = operands[3]; 4653 newval = operands[4]; 4654 align = operands[5]; 4655 is_weak = (operands[6] != const0_rtx); 4656 mod_s = memmodel_from_int (INTVAL (operands[7])); 4657 mod_f = memmodel_from_int (INTVAL (operands[8])); 4658 scratch = operands[9]; 4659 mode = GET_MODE (orig_mem); 4660 addr = XEXP (orig_mem, 0); 4661 4662 mem = gen_rtx_MEM (DImode, align); 4663 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); 4664 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) 4665 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); 4666 4667 alpha_pre_atomic_barrier (mod_s); 4668 4669 label1 = NULL_RTX; 4670 if (!is_weak) 4671 { 4672 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4673 emit_label (XEXP (label1, 0)); 4674 } 4675 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4676 4677 emit_load_locked (DImode, scratch, mem); 4678 4679 width = GEN_INT (GET_MODE_BITSIZE (mode)); 4680 mask = GEN_INT (mode == QImode ? 0xff : 0xffff); 4681 emit_insn (gen_extxl (dest, scratch, width, addr)); 4682 4683 if (oldval == const0_rtx) 4684 { 4685 emit_move_insn (cond, const0_rtx); 4686 x = gen_rtx_NE (DImode, dest, const0_rtx); 4687 } 4688 else 4689 { 4690 x = gen_rtx_EQ (DImode, dest, oldval); 4691 emit_insn (gen_rtx_SET (VOIDmode, cond, x)); 4692 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4693 } 4694 emit_unlikely_jump (x, label2); 4695 4696 emit_insn (gen_mskxl (cond, scratch, mask, addr)); 4697 4698 if (newval != const0_rtx) 4699 emit_insn (gen_iordi3 (cond, cond, newval)); 4700 4701 emit_store_conditional (DImode, cond, mem, cond); 4702 4703 if (!is_weak) 4704 { 4705 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4706 emit_unlikely_jump (x, label1); 4707 } 4708 4709 if (!is_mm_relaxed (mod_f)) 4710 emit_label (XEXP (label2, 0)); 4711 4712 alpha_post_atomic_barrier (mod_s); 4713 4714 if (is_mm_relaxed (mod_f)) 4715 emit_label (XEXP (label2, 0)); 4716} 4717 4718/* Expand an atomic exchange operation. */ 4719 4720void 4721alpha_split_atomic_exchange (rtx operands[]) 4722{ 4723 rtx retval, mem, val, scratch; 4724 enum memmodel model; 4725 machine_mode mode; 4726 rtx label, x, cond; 4727 4728 retval = operands[0]; 4729 mem = operands[1]; 4730 val = operands[2]; 4731 model = (enum memmodel) INTVAL (operands[3]); 4732 scratch = operands[4]; 4733 mode = GET_MODE (mem); 4734 cond = gen_lowpart (DImode, scratch); 4735 4736 alpha_pre_atomic_barrier (model); 4737 4738 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4739 emit_label (XEXP (label, 0)); 4740 4741 emit_load_locked (mode, retval, mem); 4742 emit_move_insn (scratch, val); 4743 emit_store_conditional (mode, cond, mem, scratch); 4744 4745 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4746 emit_unlikely_jump (x, label); 4747 4748 alpha_post_atomic_barrier (model); 4749} 4750 4751void 4752alpha_expand_atomic_exchange_12 (rtx operands[]) 4753{ 4754 rtx dst, mem, val, model; 4755 machine_mode mode; 4756 rtx addr, align, wdst; 4757 rtx (*gen) (rtx, rtx, rtx, rtx, rtx); 4758 4759 dst = operands[0]; 4760 mem = operands[1]; 4761 val = operands[2]; 4762 model = operands[3]; 4763 mode = GET_MODE (mem); 4764 4765 /* We forced the address into a register via mem_noofs_operand. */ 4766 addr = XEXP (mem, 0); 4767 gcc_assert (register_operand (addr, DImode)); 4768 4769 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), 4770 NULL_RTX, 1, OPTAB_DIRECT); 4771 4772 /* Insert val into the correct byte location within the word. */ 4773 if (val != const0_rtx) 4774 val = emit_insxl (mode, val, addr); 4775 4776 wdst = gen_reg_rtx (DImode); 4777 if (mode == QImode) 4778 gen = gen_atomic_exchangeqi_1; 4779 else 4780 gen = gen_atomic_exchangehi_1; 4781 emit_insn (gen (wdst, mem, val, align, model)); 4782 4783 emit_move_insn (dst, gen_lowpart (mode, wdst)); 4784} 4785 4786void 4787alpha_split_atomic_exchange_12 (rtx operands[]) 4788{ 4789 rtx dest, orig_mem, addr, val, align, scratch; 4790 rtx label, mem, width, mask, x; 4791 machine_mode mode; 4792 enum memmodel model; 4793 4794 dest = operands[0]; 4795 orig_mem = operands[1]; 4796 val = operands[2]; 4797 align = operands[3]; 4798 model = (enum memmodel) INTVAL (operands[4]); 4799 scratch = operands[5]; 4800 mode = GET_MODE (orig_mem); 4801 addr = XEXP (orig_mem, 0); 4802 4803 mem = gen_rtx_MEM (DImode, align); 4804 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); 4805 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) 4806 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); 4807 4808 alpha_pre_atomic_barrier (model); 4809 4810 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4811 emit_label (XEXP (label, 0)); 4812 4813 emit_load_locked (DImode, scratch, mem); 4814 4815 width = GEN_INT (GET_MODE_BITSIZE (mode)); 4816 mask = GEN_INT (mode == QImode ? 0xff : 0xffff); 4817 emit_insn (gen_extxl (dest, scratch, width, addr)); 4818 emit_insn (gen_mskxl (scratch, scratch, mask, addr)); 4819 if (val != const0_rtx) 4820 emit_insn (gen_iordi3 (scratch, scratch, val)); 4821 4822 emit_store_conditional (DImode, scratch, mem, scratch); 4823 4824 x = gen_rtx_EQ (DImode, scratch, const0_rtx); 4825 emit_unlikely_jump (x, label); 4826 4827 alpha_post_atomic_barrier (model); 4828} 4829 4830/* Adjust the cost of a scheduling dependency. Return the new cost of 4831 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 4832 4833static int 4834alpha_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost) 4835{ 4836 enum attr_type dep_insn_type; 4837 4838 /* If the dependence is an anti-dependence, there is no cost. For an 4839 output dependence, there is sometimes a cost, but it doesn't seem 4840 worth handling those few cases. */ 4841 if (REG_NOTE_KIND (link) != 0) 4842 return cost; 4843 4844 /* If we can't recognize the insns, we can't really do anything. */ 4845 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 4846 return cost; 4847 4848 dep_insn_type = get_attr_type (dep_insn); 4849 4850 /* Bring in the user-defined memory latency. */ 4851 if (dep_insn_type == TYPE_ILD 4852 || dep_insn_type == TYPE_FLD 4853 || dep_insn_type == TYPE_LDSYM) 4854 cost += alpha_memory_latency-1; 4855 4856 /* Everything else handled in DFA bypasses now. */ 4857 4858 return cost; 4859} 4860 4861/* The number of instructions that can be issued per cycle. */ 4862 4863static int 4864alpha_issue_rate (void) 4865{ 4866 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4); 4867} 4868 4869/* How many alternative schedules to try. This should be as wide as the 4870 scheduling freedom in the DFA, but no wider. Making this value too 4871 large results extra work for the scheduler. 4872 4873 For EV4, loads can be issued to either IB0 or IB1, thus we have 2 4874 alternative schedules. For EV5, we can choose between E0/E1 and 4875 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */ 4876 4877static int 4878alpha_multipass_dfa_lookahead (void) 4879{ 4880 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2); 4881} 4882 4883/* Machine-specific function data. */ 4884 4885struct GTY(()) alpha_links; 4886 4887struct string_traits : default_hashmap_traits 4888{ 4889 static bool equal_keys (const char *const &a, const char *const &b) 4890 { 4891 return strcmp (a, b) == 0; 4892 } 4893}; 4894 4895struct GTY(()) machine_function 4896{ 4897 /* For flag_reorder_blocks_and_partition. */ 4898 rtx gp_save_rtx; 4899 4900 /* For VMS condition handlers. */ 4901 bool uses_condition_handler; 4902 4903 /* Linkage entries. */ 4904 hash_map<const char *, alpha_links *, string_traits> *links; 4905}; 4906 4907/* How to allocate a 'struct machine_function'. */ 4908 4909static struct machine_function * 4910alpha_init_machine_status (void) 4911{ 4912 return ggc_cleared_alloc<machine_function> (); 4913} 4914 4915/* Support for frame based VMS condition handlers. */ 4916 4917/* A VMS condition handler may be established for a function with a call to 4918 __builtin_establish_vms_condition_handler, and cancelled with a call to 4919 __builtin_revert_vms_condition_handler. 4920 4921 The VMS Condition Handling Facility knows about the existence of a handler 4922 from the procedure descriptor .handler field. As the VMS native compilers, 4923 we store the user specified handler's address at a fixed location in the 4924 stack frame and point the procedure descriptor at a common wrapper which 4925 fetches the real handler's address and issues an indirect call. 4926 4927 The indirection wrapper is "__gcc_shell_handler", provided by libgcc. 4928 4929 We force the procedure kind to PT_STACK, and the fixed frame location is 4930 fp+8, just before the register save area. We use the handler_data field in 4931 the procedure descriptor to state the fp offset at which the installed 4932 handler address can be found. */ 4933 4934#define VMS_COND_HANDLER_FP_OFFSET 8 4935 4936/* Expand code to store the currently installed user VMS condition handler 4937 into TARGET and install HANDLER as the new condition handler. */ 4938 4939void 4940alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler) 4941{ 4942 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx, 4943 VMS_COND_HANDLER_FP_OFFSET); 4944 4945 rtx handler_slot 4946 = gen_rtx_MEM (DImode, handler_slot_address); 4947 4948 emit_move_insn (target, handler_slot); 4949 emit_move_insn (handler_slot, handler); 4950 4951 /* Notify the start/prologue/epilogue emitters that the condition handler 4952 slot is needed. In addition to reserving the slot space, this will force 4953 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx 4954 use above is correct. */ 4955 cfun->machine->uses_condition_handler = true; 4956} 4957 4958/* Expand code to store the current VMS condition handler into TARGET and 4959 nullify it. */ 4960 4961void 4962alpha_expand_builtin_revert_vms_condition_handler (rtx target) 4963{ 4964 /* We implement this by establishing a null condition handler, with the tiny 4965 side effect of setting uses_condition_handler. This is a little bit 4966 pessimistic if no actual builtin_establish call is ever issued, which is 4967 not a real problem and expected never to happen anyway. */ 4968 4969 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx); 4970} 4971 4972/* Functions to save and restore alpha_return_addr_rtx. */ 4973 4974/* Start the ball rolling with RETURN_ADDR_RTX. */ 4975 4976rtx 4977alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) 4978{ 4979 if (count != 0) 4980 return const0_rtx; 4981 4982 return get_hard_reg_initial_val (Pmode, REG_RA); 4983} 4984 4985/* Return or create a memory slot containing the gp value for the current 4986 function. Needed only if TARGET_LD_BUGGY_LDGP. */ 4987 4988rtx 4989alpha_gp_save_rtx (void) 4990{ 4991 rtx_insn *seq; 4992 rtx m = cfun->machine->gp_save_rtx; 4993 4994 if (m == NULL) 4995 { 4996 start_sequence (); 4997 4998 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD); 4999 m = validize_mem (m); 5000 emit_move_insn (m, pic_offset_table_rtx); 5001 5002 seq = get_insns (); 5003 end_sequence (); 5004 5005 /* We used to simply emit the sequence after entry_of_function. 5006 However this breaks the CFG if the first instruction in the 5007 first block is not the NOTE_INSN_BASIC_BLOCK, for example a 5008 label. Emit the sequence properly on the edge. We are only 5009 invoked from dw2_build_landing_pads and finish_eh_generation 5010 will call commit_edge_insertions thanks to a kludge. */ 5011 insert_insn_on_edge (seq, 5012 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 5013 5014 cfun->machine->gp_save_rtx = m; 5015 } 5016 5017 return m; 5018} 5019 5020static void 5021alpha_instantiate_decls (void) 5022{ 5023 if (cfun->machine->gp_save_rtx != NULL_RTX) 5024 instantiate_decl_rtl (cfun->machine->gp_save_rtx); 5025} 5026 5027static int 5028alpha_ra_ever_killed (void) 5029{ 5030 rtx_insn *top; 5031 5032 if (!has_hard_reg_initial_val (Pmode, REG_RA)) 5033 return (int)df_regs_ever_live_p (REG_RA); 5034 5035 push_topmost_sequence (); 5036 top = get_insns (); 5037 pop_topmost_sequence (); 5038 5039 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL); 5040} 5041 5042 5043/* Return the trap mode suffix applicable to the current 5044 instruction, or NULL. */ 5045 5046static const char * 5047get_trap_mode_suffix (void) 5048{ 5049 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn); 5050 5051 switch (s) 5052 { 5053 case TRAP_SUFFIX_NONE: 5054 return NULL; 5055 5056 case TRAP_SUFFIX_SU: 5057 if (alpha_fptm >= ALPHA_FPTM_SU) 5058 return "su"; 5059 return NULL; 5060 5061 case TRAP_SUFFIX_SUI: 5062 if (alpha_fptm >= ALPHA_FPTM_SUI) 5063 return "sui"; 5064 return NULL; 5065 5066 case TRAP_SUFFIX_V_SV: 5067 switch (alpha_fptm) 5068 { 5069 case ALPHA_FPTM_N: 5070 return NULL; 5071 case ALPHA_FPTM_U: 5072 return "v"; 5073 case ALPHA_FPTM_SU: 5074 case ALPHA_FPTM_SUI: 5075 return "sv"; 5076 default: 5077 gcc_unreachable (); 5078 } 5079 5080 case TRAP_SUFFIX_V_SV_SVI: 5081 switch (alpha_fptm) 5082 { 5083 case ALPHA_FPTM_N: 5084 return NULL; 5085 case ALPHA_FPTM_U: 5086 return "v"; 5087 case ALPHA_FPTM_SU: 5088 return "sv"; 5089 case ALPHA_FPTM_SUI: 5090 return "svi"; 5091 default: 5092 gcc_unreachable (); 5093 } 5094 break; 5095 5096 case TRAP_SUFFIX_U_SU_SUI: 5097 switch (alpha_fptm) 5098 { 5099 case ALPHA_FPTM_N: 5100 return NULL; 5101 case ALPHA_FPTM_U: 5102 return "u"; 5103 case ALPHA_FPTM_SU: 5104 return "su"; 5105 case ALPHA_FPTM_SUI: 5106 return "sui"; 5107 default: 5108 gcc_unreachable (); 5109 } 5110 break; 5111 5112 default: 5113 gcc_unreachable (); 5114 } 5115 gcc_unreachable (); 5116} 5117 5118/* Return the rounding mode suffix applicable to the current 5119 instruction, or NULL. */ 5120 5121static const char * 5122get_round_mode_suffix (void) 5123{ 5124 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn); 5125 5126 switch (s) 5127 { 5128 case ROUND_SUFFIX_NONE: 5129 return NULL; 5130 case ROUND_SUFFIX_NORMAL: 5131 switch (alpha_fprm) 5132 { 5133 case ALPHA_FPRM_NORM: 5134 return NULL; 5135 case ALPHA_FPRM_MINF: 5136 return "m"; 5137 case ALPHA_FPRM_CHOP: 5138 return "c"; 5139 case ALPHA_FPRM_DYN: 5140 return "d"; 5141 default: 5142 gcc_unreachable (); 5143 } 5144 break; 5145 5146 case ROUND_SUFFIX_C: 5147 return "c"; 5148 5149 default: 5150 gcc_unreachable (); 5151 } 5152 gcc_unreachable (); 5153} 5154 5155/* Print an operand. Recognize special options, documented below. */ 5156 5157void 5158print_operand (FILE *file, rtx x, int code) 5159{ 5160 int i; 5161 5162 switch (code) 5163 { 5164 case '~': 5165 /* Print the assembler name of the current function. */ 5166 assemble_name (file, alpha_fnname); 5167 break; 5168 5169 case '&': 5170 if (const char *name = get_some_local_dynamic_name ()) 5171 assemble_name (file, name); 5172 else 5173 output_operand_lossage ("'%%&' used without any " 5174 "local dynamic TLS references"); 5175 break; 5176 5177 case '/': 5178 { 5179 const char *trap = get_trap_mode_suffix (); 5180 const char *round = get_round_mode_suffix (); 5181 5182 if (trap || round) 5183 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : "")); 5184 break; 5185 } 5186 5187 case ',': 5188 /* Generates single precision instruction suffix. */ 5189 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file); 5190 break; 5191 5192 case '-': 5193 /* Generates double precision instruction suffix. */ 5194 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file); 5195 break; 5196 5197 case '#': 5198 if (alpha_this_literal_sequence_number == 0) 5199 alpha_this_literal_sequence_number = alpha_next_sequence_number++; 5200 fprintf (file, "%d", alpha_this_literal_sequence_number); 5201 break; 5202 5203 case '*': 5204 if (alpha_this_gpdisp_sequence_number == 0) 5205 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++; 5206 fprintf (file, "%d", alpha_this_gpdisp_sequence_number); 5207 break; 5208 5209 case 'H': 5210 if (GET_CODE (x) == HIGH) 5211 output_addr_const (file, XEXP (x, 0)); 5212 else 5213 output_operand_lossage ("invalid %%H value"); 5214 break; 5215 5216 case 'J': 5217 { 5218 const char *lituse; 5219 5220 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL) 5221 { 5222 x = XVECEXP (x, 0, 0); 5223 lituse = "lituse_tlsgd"; 5224 } 5225 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL) 5226 { 5227 x = XVECEXP (x, 0, 0); 5228 lituse = "lituse_tlsldm"; 5229 } 5230 else if (CONST_INT_P (x)) 5231 lituse = "lituse_jsr"; 5232 else 5233 { 5234 output_operand_lossage ("invalid %%J value"); 5235 break; 5236 } 5237 5238 if (x != const0_rtx) 5239 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); 5240 } 5241 break; 5242 5243 case 'j': 5244 { 5245 const char *lituse; 5246 5247#ifdef HAVE_AS_JSRDIRECT_RELOCS 5248 lituse = "lituse_jsrdirect"; 5249#else 5250 lituse = "lituse_jsr"; 5251#endif 5252 5253 gcc_assert (INTVAL (x) != 0); 5254 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); 5255 } 5256 break; 5257 case 'r': 5258 /* If this operand is the constant zero, write it as "$31". */ 5259 if (REG_P (x)) 5260 fprintf (file, "%s", reg_names[REGNO (x)]); 5261 else if (x == CONST0_RTX (GET_MODE (x))) 5262 fprintf (file, "$31"); 5263 else 5264 output_operand_lossage ("invalid %%r value"); 5265 break; 5266 5267 case 'R': 5268 /* Similar, but for floating-point. */ 5269 if (REG_P (x)) 5270 fprintf (file, "%s", reg_names[REGNO (x)]); 5271 else if (x == CONST0_RTX (GET_MODE (x))) 5272 fprintf (file, "$f31"); 5273 else 5274 output_operand_lossage ("invalid %%R value"); 5275 break; 5276 5277 case 'N': 5278 /* Write the 1's complement of a constant. */ 5279 if (!CONST_INT_P (x)) 5280 output_operand_lossage ("invalid %%N value"); 5281 5282 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x)); 5283 break; 5284 5285 case 'P': 5286 /* Write 1 << C, for a constant C. */ 5287 if (!CONST_INT_P (x)) 5288 output_operand_lossage ("invalid %%P value"); 5289 5290 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x)); 5291 break; 5292 5293 case 'h': 5294 /* Write the high-order 16 bits of a constant, sign-extended. */ 5295 if (!CONST_INT_P (x)) 5296 output_operand_lossage ("invalid %%h value"); 5297 5298 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16); 5299 break; 5300 5301 case 'L': 5302 /* Write the low-order 16 bits of a constant, sign-extended. */ 5303 if (!CONST_INT_P (x)) 5304 output_operand_lossage ("invalid %%L value"); 5305 5306 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 5307 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000)); 5308 break; 5309 5310 case 'm': 5311 /* Write mask for ZAP insn. */ 5312 if (GET_CODE (x) == CONST_DOUBLE) 5313 { 5314 HOST_WIDE_INT mask = 0; 5315 HOST_WIDE_INT value; 5316 5317 value = CONST_DOUBLE_LOW (x); 5318 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; 5319 i++, value >>= 8) 5320 if (value & 0xff) 5321 mask |= (1 << i); 5322 5323 value = CONST_DOUBLE_HIGH (x); 5324 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; 5325 i++, value >>= 8) 5326 if (value & 0xff) 5327 mask |= (1 << (i + sizeof (int))); 5328 5329 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff); 5330 } 5331 5332 else if (CONST_INT_P (x)) 5333 { 5334 HOST_WIDE_INT mask = 0, value = INTVAL (x); 5335 5336 for (i = 0; i < 8; i++, value >>= 8) 5337 if (value & 0xff) 5338 mask |= (1 << i); 5339 5340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask); 5341 } 5342 else 5343 output_operand_lossage ("invalid %%m value"); 5344 break; 5345 5346 case 'M': 5347 /* 'b', 'w', 'l', or 'q' as the value of the constant. */ 5348 if (!CONST_INT_P (x) 5349 || (INTVAL (x) != 8 && INTVAL (x) != 16 5350 && INTVAL (x) != 32 && INTVAL (x) != 64)) 5351 output_operand_lossage ("invalid %%M value"); 5352 5353 fprintf (file, "%s", 5354 (INTVAL (x) == 8 ? "b" 5355 : INTVAL (x) == 16 ? "w" 5356 : INTVAL (x) == 32 ? "l" 5357 : "q")); 5358 break; 5359 5360 case 'U': 5361 /* Similar, except do it from the mask. */ 5362 if (CONST_INT_P (x)) 5363 { 5364 HOST_WIDE_INT value = INTVAL (x); 5365 5366 if (value == 0xff) 5367 { 5368 fputc ('b', file); 5369 break; 5370 } 5371 if (value == 0xffff) 5372 { 5373 fputc ('w', file); 5374 break; 5375 } 5376 if (value == 0xffffffff) 5377 { 5378 fputc ('l', file); 5379 break; 5380 } 5381 if (value == -1) 5382 { 5383 fputc ('q', file); 5384 break; 5385 } 5386 } 5387 else if (HOST_BITS_PER_WIDE_INT == 32 5388 && GET_CODE (x) == CONST_DOUBLE 5389 && CONST_DOUBLE_LOW (x) == 0xffffffff 5390 && CONST_DOUBLE_HIGH (x) == 0) 5391 { 5392 fputc ('l', file); 5393 break; 5394 } 5395 output_operand_lossage ("invalid %%U value"); 5396 break; 5397 5398 case 's': 5399 /* Write the constant value divided by 8. */ 5400 if (!CONST_INT_P (x) 5401 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64 5402 || (INTVAL (x) & 7) != 0) 5403 output_operand_lossage ("invalid %%s value"); 5404 5405 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8); 5406 break; 5407 5408 case 'S': 5409 /* Same, except compute (64 - c) / 8 */ 5410 5411 if (!CONST_INT_P (x) 5412 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64 5413 && (INTVAL (x) & 7) != 8) 5414 output_operand_lossage ("invalid %%s value"); 5415 5416 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8); 5417 break; 5418 5419 case 'C': case 'D': case 'c': case 'd': 5420 /* Write out comparison name. */ 5421 { 5422 enum rtx_code c = GET_CODE (x); 5423 5424 if (!COMPARISON_P (x)) 5425 output_operand_lossage ("invalid %%C value"); 5426 5427 else if (code == 'D') 5428 c = reverse_condition (c); 5429 else if (code == 'c') 5430 c = swap_condition (c); 5431 else if (code == 'd') 5432 c = swap_condition (reverse_condition (c)); 5433 5434 if (c == LEU) 5435 fprintf (file, "ule"); 5436 else if (c == LTU) 5437 fprintf (file, "ult"); 5438 else if (c == UNORDERED) 5439 fprintf (file, "un"); 5440 else 5441 fprintf (file, "%s", GET_RTX_NAME (c)); 5442 } 5443 break; 5444 5445 case 'E': 5446 /* Write the divide or modulus operator. */ 5447 switch (GET_CODE (x)) 5448 { 5449 case DIV: 5450 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q"); 5451 break; 5452 case UDIV: 5453 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q"); 5454 break; 5455 case MOD: 5456 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q"); 5457 break; 5458 case UMOD: 5459 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q"); 5460 break; 5461 default: 5462 output_operand_lossage ("invalid %%E value"); 5463 break; 5464 } 5465 break; 5466 5467 case 'A': 5468 /* Write "_u" for unaligned access. */ 5469 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) 5470 fprintf (file, "_u"); 5471 break; 5472 5473 case 0: 5474 if (REG_P (x)) 5475 fprintf (file, "%s", reg_names[REGNO (x)]); 5476 else if (MEM_P (x)) 5477 output_address (XEXP (x, 0)); 5478 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC) 5479 { 5480 switch (XINT (XEXP (x, 0), 1)) 5481 { 5482 case UNSPEC_DTPREL: 5483 case UNSPEC_TPREL: 5484 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0)); 5485 break; 5486 default: 5487 output_operand_lossage ("unknown relocation unspec"); 5488 break; 5489 } 5490 } 5491 else 5492 output_addr_const (file, x); 5493 break; 5494 5495 default: 5496 output_operand_lossage ("invalid %%xn code"); 5497 } 5498} 5499 5500void 5501print_operand_address (FILE *file, rtx addr) 5502{ 5503 int basereg = 31; 5504 HOST_WIDE_INT offset = 0; 5505 5506 if (GET_CODE (addr) == AND) 5507 addr = XEXP (addr, 0); 5508 5509 if (GET_CODE (addr) == PLUS 5510 && CONST_INT_P (XEXP (addr, 1))) 5511 { 5512 offset = INTVAL (XEXP (addr, 1)); 5513 addr = XEXP (addr, 0); 5514 } 5515 5516 if (GET_CODE (addr) == LO_SUM) 5517 { 5518 const char *reloc16, *reloclo; 5519 rtx op1 = XEXP (addr, 1); 5520 5521 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC) 5522 { 5523 op1 = XEXP (op1, 0); 5524 switch (XINT (op1, 1)) 5525 { 5526 case UNSPEC_DTPREL: 5527 reloc16 = NULL; 5528 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello"); 5529 break; 5530 case UNSPEC_TPREL: 5531 reloc16 = NULL; 5532 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello"); 5533 break; 5534 default: 5535 output_operand_lossage ("unknown relocation unspec"); 5536 return; 5537 } 5538 5539 output_addr_const (file, XVECEXP (op1, 0, 0)); 5540 } 5541 else 5542 { 5543 reloc16 = "gprel"; 5544 reloclo = "gprellow"; 5545 output_addr_const (file, op1); 5546 } 5547 5548 if (offset) 5549 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); 5550 5551 addr = XEXP (addr, 0); 5552 switch (GET_CODE (addr)) 5553 { 5554 case REG: 5555 basereg = REGNO (addr); 5556 break; 5557 5558 case SUBREG: 5559 basereg = subreg_regno (addr); 5560 break; 5561 5562 default: 5563 gcc_unreachable (); 5564 } 5565 5566 fprintf (file, "($%d)\t\t!%s", basereg, 5567 (basereg == 29 ? reloc16 : reloclo)); 5568 return; 5569 } 5570 5571 switch (GET_CODE (addr)) 5572 { 5573 case REG: 5574 basereg = REGNO (addr); 5575 break; 5576 5577 case SUBREG: 5578 basereg = subreg_regno (addr); 5579 break; 5580 5581 case CONST_INT: 5582 offset = INTVAL (addr); 5583 break; 5584 5585 case SYMBOL_REF: 5586 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands); 5587 fprintf (file, "%s", XSTR (addr, 0)); 5588 return; 5589 5590 case CONST: 5591 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands); 5592 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS 5593 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF); 5594 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC, 5595 XSTR (XEXP (XEXP (addr, 0), 0), 0), 5596 INTVAL (XEXP (XEXP (addr, 0), 1))); 5597 return; 5598 5599 default: 5600 output_operand_lossage ("invalid operand address"); 5601 return; 5602 } 5603 5604 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg); 5605} 5606 5607/* Emit RTL insns to initialize the variable parts of a trampoline at 5608 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx 5609 for the static chain value for the function. */ 5610 5611static void 5612alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 5613{ 5614 rtx fnaddr, mem, word1, word2; 5615 5616 fnaddr = XEXP (DECL_RTL (fndecl), 0); 5617 5618#ifdef POINTERS_EXTEND_UNSIGNED 5619 fnaddr = convert_memory_address (Pmode, fnaddr); 5620 chain_value = convert_memory_address (Pmode, chain_value); 5621#endif 5622 5623 if (TARGET_ABI_OPEN_VMS) 5624 { 5625 const char *fnname; 5626 char *trname; 5627 5628 /* Construct the name of the trampoline entry point. */ 5629 fnname = XSTR (fnaddr, 0); 5630 trname = (char *) alloca (strlen (fnname) + 5); 5631 strcpy (trname, fnname); 5632 strcat (trname, "..tr"); 5633 fnname = ggc_alloc_string (trname, strlen (trname) + 1); 5634 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname); 5635 5636 /* Trampoline (or "bounded") procedure descriptor is constructed from 5637 the function's procedure descriptor with certain fields zeroed IAW 5638 the VMS calling standard. This is stored in the first quadword. */ 5639 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr)); 5640 word1 = expand_and (DImode, word1, 5641 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)), 5642 NULL); 5643 } 5644 else 5645 { 5646 /* These 4 instructions are: 5647 ldq $1,24($27) 5648 ldq $27,16($27) 5649 jmp $31,($27),0 5650 nop 5651 We don't bother setting the HINT field of the jump; the nop 5652 is merely there for padding. */ 5653 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018)); 5654 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000)); 5655 } 5656 5657 /* Store the first two words, as computed above. */ 5658 mem = adjust_address (m_tramp, DImode, 0); 5659 emit_move_insn (mem, word1); 5660 mem = adjust_address (m_tramp, DImode, 8); 5661 emit_move_insn (mem, word2); 5662 5663 /* Store function address and static chain value. */ 5664 mem = adjust_address (m_tramp, Pmode, 16); 5665 emit_move_insn (mem, fnaddr); 5666 mem = adjust_address (m_tramp, Pmode, 24); 5667 emit_move_insn (mem, chain_value); 5668 5669 if (TARGET_ABI_OSF) 5670 { 5671 emit_insn (gen_imb ()); 5672#ifdef HAVE_ENABLE_EXECUTE_STACK 5673 emit_library_call (init_one_libfunc ("__enable_execute_stack"), 5674 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); 5675#endif 5676 } 5677} 5678 5679/* Determine where to put an argument to a function. 5680 Value is zero to push the argument on the stack, 5681 or a hard register in which to store the argument. 5682 5683 MODE is the argument's machine mode. 5684 TYPE is the data type of the argument (as a tree). 5685 This is null for libcalls where that information may 5686 not be available. 5687 CUM is a variable of type CUMULATIVE_ARGS which gives info about 5688 the preceding args and about the function being called. 5689 NAMED is nonzero if this argument is a named parameter 5690 (otherwise it is an extra parameter matching an ellipsis). 5691 5692 On Alpha the first 6 words of args are normally in registers 5693 and the rest are pushed. */ 5694 5695static rtx 5696alpha_function_arg (cumulative_args_t cum_v, machine_mode mode, 5697 const_tree type, bool named ATTRIBUTE_UNUSED) 5698{ 5699 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5700 int basereg; 5701 int num_args; 5702 5703 /* Don't get confused and pass small structures in FP registers. */ 5704 if (type && AGGREGATE_TYPE_P (type)) 5705 basereg = 16; 5706 else 5707 { 5708#ifdef ENABLE_CHECKING 5709 /* With alpha_split_complex_arg, we shouldn't see any raw complex 5710 values here. */ 5711 gcc_assert (!COMPLEX_MODE_P (mode)); 5712#endif 5713 5714 /* Set up defaults for FP operands passed in FP registers, and 5715 integral operands passed in integer registers. */ 5716 if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT) 5717 basereg = 32 + 16; 5718 else 5719 basereg = 16; 5720 } 5721 5722 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for 5723 the two platforms, so we can't avoid conditional compilation. */ 5724#if TARGET_ABI_OPEN_VMS 5725 { 5726 if (mode == VOIDmode) 5727 return alpha_arg_info_reg_val (*cum); 5728 5729 num_args = cum->num_args; 5730 if (num_args >= 6 5731 || targetm.calls.must_pass_in_stack (mode, type)) 5732 return NULL_RTX; 5733 } 5734#elif TARGET_ABI_OSF 5735 { 5736 if (*cum >= 6) 5737 return NULL_RTX; 5738 num_args = *cum; 5739 5740 /* VOID is passed as a special flag for "last argument". */ 5741 if (type == void_type_node) 5742 basereg = 16; 5743 else if (targetm.calls.must_pass_in_stack (mode, type)) 5744 return NULL_RTX; 5745 } 5746#else 5747#error Unhandled ABI 5748#endif 5749 5750 return gen_rtx_REG (mode, num_args + basereg); 5751} 5752 5753/* Update the data in CUM to advance over an argument 5754 of mode MODE and data type TYPE. 5755 (TYPE is null for libcalls where that information may not be available.) */ 5756 5757static void 5758alpha_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, 5759 const_tree type, bool named ATTRIBUTE_UNUSED) 5760{ 5761 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5762 bool onstack = targetm.calls.must_pass_in_stack (mode, type); 5763 int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named); 5764 5765#if TARGET_ABI_OSF 5766 *cum += increment; 5767#else 5768 if (!onstack && cum->num_args < 6) 5769 cum->atypes[cum->num_args] = alpha_arg_type (mode); 5770 cum->num_args += increment; 5771#endif 5772} 5773 5774static int 5775alpha_arg_partial_bytes (cumulative_args_t cum_v, 5776 machine_mode mode ATTRIBUTE_UNUSED, 5777 tree type ATTRIBUTE_UNUSED, 5778 bool named ATTRIBUTE_UNUSED) 5779{ 5780 int words = 0; 5781 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v); 5782 5783#if TARGET_ABI_OPEN_VMS 5784 if (cum->num_args < 6 5785 && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named)) 5786 words = 6 - cum->num_args; 5787#elif TARGET_ABI_OSF 5788 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named)) 5789 words = 6 - *cum; 5790#else 5791#error Unhandled ABI 5792#endif 5793 5794 return words * UNITS_PER_WORD; 5795} 5796 5797 5798/* Return true if TYPE must be returned in memory, instead of in registers. */ 5799 5800static bool 5801alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) 5802{ 5803 machine_mode mode = VOIDmode; 5804 int size; 5805 5806 if (type) 5807 { 5808 mode = TYPE_MODE (type); 5809 5810 /* All aggregates are returned in memory, except on OpenVMS where 5811 records that fit 64 bits should be returned by immediate value 5812 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */ 5813 if (TARGET_ABI_OPEN_VMS 5814 && TREE_CODE (type) != ARRAY_TYPE 5815 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8) 5816 return false; 5817 5818 if (AGGREGATE_TYPE_P (type)) 5819 return true; 5820 } 5821 5822 size = GET_MODE_SIZE (mode); 5823 switch (GET_MODE_CLASS (mode)) 5824 { 5825 case MODE_VECTOR_FLOAT: 5826 /* Pass all float vectors in memory, like an aggregate. */ 5827 return true; 5828 5829 case MODE_COMPLEX_FLOAT: 5830 /* We judge complex floats on the size of their element, 5831 not the size of the whole type. */ 5832 size = GET_MODE_UNIT_SIZE (mode); 5833 break; 5834 5835 case MODE_INT: 5836 case MODE_FLOAT: 5837 case MODE_COMPLEX_INT: 5838 case MODE_VECTOR_INT: 5839 break; 5840 5841 default: 5842 /* ??? We get called on all sorts of random stuff from 5843 aggregate_value_p. We must return something, but it's not 5844 clear what's safe to return. Pretend it's a struct I 5845 guess. */ 5846 return true; 5847 } 5848 5849 /* Otherwise types must fit in one register. */ 5850 return size > UNITS_PER_WORD; 5851} 5852 5853/* Return true if TYPE should be passed by invisible reference. */ 5854 5855static bool 5856alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, 5857 machine_mode mode, 5858 const_tree type ATTRIBUTE_UNUSED, 5859 bool named ATTRIBUTE_UNUSED) 5860{ 5861 return mode == TFmode || mode == TCmode; 5862} 5863 5864/* Define how to find the value returned by a function. VALTYPE is the 5865 data type of the value (as a tree). If the precise function being 5866 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0. 5867 MODE is set instead of VALTYPE for libcalls. 5868 5869 On Alpha the value is found in $0 for integer functions and 5870 $f0 for floating-point functions. */ 5871 5872rtx 5873function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, 5874 machine_mode mode) 5875{ 5876 unsigned int regnum, dummy ATTRIBUTE_UNUSED; 5877 enum mode_class mclass; 5878 5879 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func)); 5880 5881 if (valtype) 5882 mode = TYPE_MODE (valtype); 5883 5884 mclass = GET_MODE_CLASS (mode); 5885 switch (mclass) 5886 { 5887 case MODE_INT: 5888 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS, 5889 where we have them returning both SImode and DImode. */ 5890 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype))) 5891 PROMOTE_MODE (mode, dummy, valtype); 5892 /* FALLTHRU */ 5893 5894 case MODE_COMPLEX_INT: 5895 case MODE_VECTOR_INT: 5896 regnum = 0; 5897 break; 5898 5899 case MODE_FLOAT: 5900 regnum = 32; 5901 break; 5902 5903 case MODE_COMPLEX_FLOAT: 5904 { 5905 machine_mode cmode = GET_MODE_INNER (mode); 5906 5907 return gen_rtx_PARALLEL 5908 (VOIDmode, 5909 gen_rtvec (2, 5910 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32), 5911 const0_rtx), 5912 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33), 5913 GEN_INT (GET_MODE_SIZE (cmode))))); 5914 } 5915 5916 case MODE_RANDOM: 5917 /* We should only reach here for BLKmode on VMS. */ 5918 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode); 5919 regnum = 0; 5920 break; 5921 5922 default: 5923 gcc_unreachable (); 5924 } 5925 5926 return gen_rtx_REG (mode, regnum); 5927} 5928 5929/* TCmode complex values are passed by invisible reference. We 5930 should not split these values. */ 5931 5932static bool 5933alpha_split_complex_arg (const_tree type) 5934{ 5935 return TYPE_MODE (type) != TCmode; 5936} 5937 5938static tree 5939alpha_build_builtin_va_list (void) 5940{ 5941 tree base, ofs, space, record, type_decl; 5942 5943 if (TARGET_ABI_OPEN_VMS) 5944 return ptr_type_node; 5945 5946 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 5947 type_decl = build_decl (BUILTINS_LOCATION, 5948 TYPE_DECL, get_identifier ("__va_list_tag"), record); 5949 TYPE_STUB_DECL (record) = type_decl; 5950 TYPE_NAME (record) = type_decl; 5951 5952 /* C++? SET_IS_AGGR_TYPE (record, 1); */ 5953 5954 /* Dummy field to prevent alignment warnings. */ 5955 space = build_decl (BUILTINS_LOCATION, 5956 FIELD_DECL, NULL_TREE, integer_type_node); 5957 DECL_FIELD_CONTEXT (space) = record; 5958 DECL_ARTIFICIAL (space) = 1; 5959 DECL_IGNORED_P (space) = 1; 5960 5961 ofs = build_decl (BUILTINS_LOCATION, 5962 FIELD_DECL, get_identifier ("__offset"), 5963 integer_type_node); 5964 DECL_FIELD_CONTEXT (ofs) = record; 5965 DECL_CHAIN (ofs) = space; 5966 /* ??? This is a hack, __offset is marked volatile to prevent 5967 DCE that confuses stdarg optimization and results in 5968 gcc.c-torture/execute/stdarg-1.c failure. See PR 41089. */ 5969 TREE_THIS_VOLATILE (ofs) = 1; 5970 5971 base = build_decl (BUILTINS_LOCATION, 5972 FIELD_DECL, get_identifier ("__base"), 5973 ptr_type_node); 5974 DECL_FIELD_CONTEXT (base) = record; 5975 DECL_CHAIN (base) = ofs; 5976 5977 TYPE_FIELDS (record) = base; 5978 layout_type (record); 5979 5980 va_list_gpr_counter_field = ofs; 5981 return record; 5982} 5983 5984#if TARGET_ABI_OSF 5985/* Helper function for alpha_stdarg_optimize_hook. Skip over casts 5986 and constant additions. */ 5987 5988static gimple 5989va_list_skip_additions (tree lhs) 5990{ 5991 gimple stmt; 5992 5993 for (;;) 5994 { 5995 enum tree_code code; 5996 5997 stmt = SSA_NAME_DEF_STMT (lhs); 5998 5999 if (gimple_code (stmt) == GIMPLE_PHI) 6000 return stmt; 6001 6002 if (!is_gimple_assign (stmt) 6003 || gimple_assign_lhs (stmt) != lhs) 6004 return NULL; 6005 6006 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME) 6007 return stmt; 6008 code = gimple_assign_rhs_code (stmt); 6009 if (!CONVERT_EXPR_CODE_P (code) 6010 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR) 6011 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST 6012 || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt)))) 6013 return stmt; 6014 6015 lhs = gimple_assign_rhs1 (stmt); 6016 } 6017} 6018 6019/* Check if LHS = RHS statement is 6020 LHS = *(ap.__base + ap.__offset + cst) 6021 or 6022 LHS = *(ap.__base 6023 + ((ap.__offset + cst <= 47) 6024 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2). 6025 If the former, indicate that GPR registers are needed, 6026 if the latter, indicate that FPR registers are needed. 6027 6028 Also look for LHS = (*ptr).field, where ptr is one of the forms 6029 listed above. 6030 6031 On alpha, cfun->va_list_gpr_size is used as size of the needed 6032 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR 6033 registers are needed and bit 1 set if FPR registers are needed. 6034 Return true if va_list references should not be scanned for the 6035 current statement. */ 6036 6037static bool 6038alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt) 6039{ 6040 tree base, offset, rhs; 6041 int offset_arg = 1; 6042 gimple base_stmt; 6043 6044 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) 6045 != GIMPLE_SINGLE_RHS) 6046 return false; 6047 6048 rhs = gimple_assign_rhs1 (stmt); 6049 while (handled_component_p (rhs)) 6050 rhs = TREE_OPERAND (rhs, 0); 6051 if (TREE_CODE (rhs) != MEM_REF 6052 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME) 6053 return false; 6054 6055 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0)); 6056 if (stmt == NULL 6057 || !is_gimple_assign (stmt) 6058 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR) 6059 return false; 6060 6061 base = gimple_assign_rhs1 (stmt); 6062 if (TREE_CODE (base) == SSA_NAME) 6063 { 6064 base_stmt = va_list_skip_additions (base); 6065 if (base_stmt 6066 && is_gimple_assign (base_stmt) 6067 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) 6068 base = gimple_assign_rhs1 (base_stmt); 6069 } 6070 6071 if (TREE_CODE (base) != COMPONENT_REF 6072 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) 6073 { 6074 base = gimple_assign_rhs2 (stmt); 6075 if (TREE_CODE (base) == SSA_NAME) 6076 { 6077 base_stmt = va_list_skip_additions (base); 6078 if (base_stmt 6079 && is_gimple_assign (base_stmt) 6080 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) 6081 base = gimple_assign_rhs1 (base_stmt); 6082 } 6083 6084 if (TREE_CODE (base) != COMPONENT_REF 6085 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) 6086 return false; 6087 6088 offset_arg = 0; 6089 } 6090 6091 base = get_base_address (base); 6092 if (TREE_CODE (base) != VAR_DECL 6093 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names)) 6094 return false; 6095 6096 offset = gimple_op (stmt, 1 + offset_arg); 6097 if (TREE_CODE (offset) == SSA_NAME) 6098 { 6099 gimple offset_stmt = va_list_skip_additions (offset); 6100 6101 if (offset_stmt 6102 && gimple_code (offset_stmt) == GIMPLE_PHI) 6103 { 6104 HOST_WIDE_INT sub; 6105 gimple arg1_stmt, arg2_stmt; 6106 tree arg1, arg2; 6107 enum tree_code code1, code2; 6108 6109 if (gimple_phi_num_args (offset_stmt) != 2) 6110 goto escapes; 6111 6112 arg1_stmt 6113 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0)); 6114 arg2_stmt 6115 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1)); 6116 if (arg1_stmt == NULL 6117 || !is_gimple_assign (arg1_stmt) 6118 || arg2_stmt == NULL 6119 || !is_gimple_assign (arg2_stmt)) 6120 goto escapes; 6121 6122 code1 = gimple_assign_rhs_code (arg1_stmt); 6123 code2 = gimple_assign_rhs_code (arg2_stmt); 6124 if (code1 == COMPONENT_REF 6125 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR)) 6126 /* Do nothing. */; 6127 else if (code2 == COMPONENT_REF 6128 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR)) 6129 { 6130 gimple tem = arg1_stmt; 6131 code2 = code1; 6132 arg1_stmt = arg2_stmt; 6133 arg2_stmt = tem; 6134 } 6135 else 6136 goto escapes; 6137 6138 if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt))) 6139 goto escapes; 6140 6141 sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt)); 6142 if (code2 == MINUS_EXPR) 6143 sub = -sub; 6144 if (sub < -48 || sub > -32) 6145 goto escapes; 6146 6147 arg1 = gimple_assign_rhs1 (arg1_stmt); 6148 arg2 = gimple_assign_rhs1 (arg2_stmt); 6149 if (TREE_CODE (arg2) == SSA_NAME) 6150 { 6151 arg2_stmt = va_list_skip_additions (arg2); 6152 if (arg2_stmt == NULL 6153 || !is_gimple_assign (arg2_stmt) 6154 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF) 6155 goto escapes; 6156 arg2 = gimple_assign_rhs1 (arg2_stmt); 6157 } 6158 if (arg1 != arg2) 6159 goto escapes; 6160 6161 if (TREE_CODE (arg1) != COMPONENT_REF 6162 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field 6163 || get_base_address (arg1) != base) 6164 goto escapes; 6165 6166 /* Need floating point regs. */ 6167 cfun->va_list_fpr_size |= 2; 6168 return false; 6169 } 6170 if (offset_stmt 6171 && is_gimple_assign (offset_stmt) 6172 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF) 6173 offset = gimple_assign_rhs1 (offset_stmt); 6174 } 6175 if (TREE_CODE (offset) != COMPONENT_REF 6176 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field 6177 || get_base_address (offset) != base) 6178 goto escapes; 6179 else 6180 /* Need general regs. */ 6181 cfun->va_list_fpr_size |= 1; 6182 return false; 6183 6184escapes: 6185 si->va_list_escapes = true; 6186 return false; 6187} 6188#endif 6189 6190/* Perform any needed actions needed for a function that is receiving a 6191 variable number of arguments. */ 6192 6193static void 6194alpha_setup_incoming_varargs (cumulative_args_t pcum, machine_mode mode, 6195 tree type, int *pretend_size, int no_rtl) 6196{ 6197 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum); 6198 6199 /* Skip the current argument. */ 6200 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type, 6201 true); 6202 6203#if TARGET_ABI_OPEN_VMS 6204 /* For VMS, we allocate space for all 6 arg registers plus a count. 6205 6206 However, if NO registers need to be saved, don't allocate any space. 6207 This is not only because we won't need the space, but because AP 6208 includes the current_pretend_args_size and we don't want to mess up 6209 any ap-relative addresses already made. */ 6210 if (cum.num_args < 6) 6211 { 6212 if (!no_rtl) 6213 { 6214 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx); 6215 emit_insn (gen_arg_home ()); 6216 } 6217 *pretend_size = 7 * UNITS_PER_WORD; 6218 } 6219#else 6220 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but 6221 only push those that are remaining. However, if NO registers need to 6222 be saved, don't allocate any space. This is not only because we won't 6223 need the space, but because AP includes the current_pretend_args_size 6224 and we don't want to mess up any ap-relative addresses already made. 6225 6226 If we are not to use the floating-point registers, save the integer 6227 registers where we would put the floating-point registers. This is 6228 not the most efficient way to implement varargs with just one register 6229 class, but it isn't worth doing anything more efficient in this rare 6230 case. */ 6231 if (cum >= 6) 6232 return; 6233 6234 if (!no_rtl) 6235 { 6236 int count; 6237 alias_set_type set = get_varargs_alias_set (); 6238 rtx tmp; 6239 6240 count = cfun->va_list_gpr_size / UNITS_PER_WORD; 6241 if (count > 6 - cum) 6242 count = 6 - cum; 6243 6244 /* Detect whether integer registers or floating-point registers 6245 are needed by the detected va_arg statements. See above for 6246 how these values are computed. Note that the "escape" value 6247 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of 6248 these bits set. */ 6249 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3); 6250 6251 if (cfun->va_list_fpr_size & 1) 6252 { 6253 tmp = gen_rtx_MEM (BLKmode, 6254 plus_constant (Pmode, virtual_incoming_args_rtx, 6255 (cum + 6) * UNITS_PER_WORD)); 6256 MEM_NOTRAP_P (tmp) = 1; 6257 set_mem_alias_set (tmp, set); 6258 move_block_from_reg (16 + cum, tmp, count); 6259 } 6260 6261 if (cfun->va_list_fpr_size & 2) 6262 { 6263 tmp = gen_rtx_MEM (BLKmode, 6264 plus_constant (Pmode, virtual_incoming_args_rtx, 6265 cum * UNITS_PER_WORD)); 6266 MEM_NOTRAP_P (tmp) = 1; 6267 set_mem_alias_set (tmp, set); 6268 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count); 6269 } 6270 } 6271 *pretend_size = 12 * UNITS_PER_WORD; 6272#endif 6273} 6274 6275static void 6276alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) 6277{ 6278 HOST_WIDE_INT offset; 6279 tree t, offset_field, base_field; 6280 6281 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK) 6282 return; 6283 6284 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base 6285 up by 48, storing fp arg registers in the first 48 bytes, and the 6286 integer arg registers in the next 48 bytes. This is only done, 6287 however, if any integer registers need to be stored. 6288 6289 If no integer registers need be stored, then we must subtract 48 6290 in order to account for the integer arg registers which are counted 6291 in argsize above, but which are not actually stored on the stack. 6292 Must further be careful here about structures straddling the last 6293 integer argument register; that futzes with pretend_args_size, 6294 which changes the meaning of AP. */ 6295 6296 if (NUM_ARGS < 6) 6297 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD; 6298 else 6299 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size; 6300 6301 if (TARGET_ABI_OPEN_VMS) 6302 { 6303 t = make_tree (ptr_type_node, virtual_incoming_args_rtx); 6304 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD); 6305 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t); 6306 TREE_SIDE_EFFECTS (t) = 1; 6307 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6308 } 6309 else 6310 { 6311 base_field = TYPE_FIELDS (TREE_TYPE (valist)); 6312 offset_field = DECL_CHAIN (base_field); 6313 6314 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), 6315 valist, base_field, NULL_TREE); 6316 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), 6317 valist, offset_field, NULL_TREE); 6318 6319 t = make_tree (ptr_type_node, virtual_incoming_args_rtx); 6320 t = fold_build_pointer_plus_hwi (t, offset); 6321 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t); 6322 TREE_SIDE_EFFECTS (t) = 1; 6323 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6324 6325 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD); 6326 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t); 6327 TREE_SIDE_EFFECTS (t) = 1; 6328 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6329 } 6330} 6331 6332static tree 6333alpha_gimplify_va_arg_1 (tree type, tree base, tree offset, 6334 gimple_seq *pre_p) 6335{ 6336 tree type_size, ptr_type, addend, t, addr; 6337 gimple_seq internal_post; 6338 6339 /* If the type could not be passed in registers, skip the block 6340 reserved for the registers. */ 6341 if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type)) 6342 { 6343 t = build_int_cst (TREE_TYPE (offset), 6*8); 6344 gimplify_assign (offset, 6345 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t), 6346 pre_p); 6347 } 6348 6349 addend = offset; 6350 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true); 6351 6352 if (TREE_CODE (type) == COMPLEX_TYPE) 6353 { 6354 tree real_part, imag_part, real_temp; 6355 6356 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base, 6357 offset, pre_p); 6358 6359 /* Copy the value into a new temporary, lest the formal temporary 6360 be reused out from under us. */ 6361 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL); 6362 6363 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base, 6364 offset, pre_p); 6365 6366 return build2 (COMPLEX_EXPR, type, real_temp, imag_part); 6367 } 6368 else if (TREE_CODE (type) == REAL_TYPE) 6369 { 6370 tree fpaddend, cond, fourtyeight; 6371 6372 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8); 6373 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend), 6374 addend, fourtyeight); 6375 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight); 6376 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond, 6377 fpaddend, addend); 6378 } 6379 6380 /* Build the final address and force that value into a temporary. */ 6381 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend); 6382 internal_post = NULL; 6383 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue); 6384 gimple_seq_add_seq (pre_p, internal_post); 6385 6386 /* Update the offset field. */ 6387 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type)); 6388 if (type_size == NULL || TREE_OVERFLOW (type_size)) 6389 t = size_zero_node; 6390 else 6391 { 6392 t = size_binop (PLUS_EXPR, type_size, size_int (7)); 6393 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8)); 6394 t = size_binop (MULT_EXPR, t, size_int (8)); 6395 } 6396 t = fold_convert (TREE_TYPE (offset), t); 6397 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t), 6398 pre_p); 6399 6400 return build_va_arg_indirect_ref (addr); 6401} 6402 6403static tree 6404alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 6405 gimple_seq *post_p) 6406{ 6407 tree offset_field, base_field, offset, base, t, r; 6408 bool indirect; 6409 6410 if (TARGET_ABI_OPEN_VMS) 6411 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 6412 6413 base_field = TYPE_FIELDS (va_list_type_node); 6414 offset_field = DECL_CHAIN (base_field); 6415 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), 6416 valist, base_field, NULL_TREE); 6417 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), 6418 valist, offset_field, NULL_TREE); 6419 6420 /* Pull the fields of the structure out into temporaries. Since we never 6421 modify the base field, we can use a formal temporary. Sign-extend the 6422 offset field so that it's the proper width for pointer arithmetic. */ 6423 base = get_formal_tmp_var (base_field, pre_p); 6424 6425 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field); 6426 offset = get_initialized_tmp_var (t, pre_p, NULL); 6427 6428 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); 6429 if (indirect) 6430 type = build_pointer_type_for_mode (type, ptr_mode, true); 6431 6432 /* Find the value. Note that this will be a stable indirection, or 6433 a composite of stable indirections in the case of complex. */ 6434 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p); 6435 6436 /* Stuff the offset temporary back into its field. */ 6437 gimplify_assign (unshare_expr (offset_field), 6438 fold_convert (TREE_TYPE (offset_field), offset), pre_p); 6439 6440 if (indirect) 6441 r = build_va_arg_indirect_ref (r); 6442 6443 return r; 6444} 6445 6446/* Builtins. */ 6447 6448enum alpha_builtin 6449{ 6450 ALPHA_BUILTIN_CMPBGE, 6451 ALPHA_BUILTIN_EXTBL, 6452 ALPHA_BUILTIN_EXTWL, 6453 ALPHA_BUILTIN_EXTLL, 6454 ALPHA_BUILTIN_EXTQL, 6455 ALPHA_BUILTIN_EXTWH, 6456 ALPHA_BUILTIN_EXTLH, 6457 ALPHA_BUILTIN_EXTQH, 6458 ALPHA_BUILTIN_INSBL, 6459 ALPHA_BUILTIN_INSWL, 6460 ALPHA_BUILTIN_INSLL, 6461 ALPHA_BUILTIN_INSQL, 6462 ALPHA_BUILTIN_INSWH, 6463 ALPHA_BUILTIN_INSLH, 6464 ALPHA_BUILTIN_INSQH, 6465 ALPHA_BUILTIN_MSKBL, 6466 ALPHA_BUILTIN_MSKWL, 6467 ALPHA_BUILTIN_MSKLL, 6468 ALPHA_BUILTIN_MSKQL, 6469 ALPHA_BUILTIN_MSKWH, 6470 ALPHA_BUILTIN_MSKLH, 6471 ALPHA_BUILTIN_MSKQH, 6472 ALPHA_BUILTIN_UMULH, 6473 ALPHA_BUILTIN_ZAP, 6474 ALPHA_BUILTIN_ZAPNOT, 6475 ALPHA_BUILTIN_AMASK, 6476 ALPHA_BUILTIN_IMPLVER, 6477 ALPHA_BUILTIN_RPCC, 6478 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER, 6479 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 6480 6481 /* TARGET_MAX */ 6482 ALPHA_BUILTIN_MINUB8, 6483 ALPHA_BUILTIN_MINSB8, 6484 ALPHA_BUILTIN_MINUW4, 6485 ALPHA_BUILTIN_MINSW4, 6486 ALPHA_BUILTIN_MAXUB8, 6487 ALPHA_BUILTIN_MAXSB8, 6488 ALPHA_BUILTIN_MAXUW4, 6489 ALPHA_BUILTIN_MAXSW4, 6490 ALPHA_BUILTIN_PERR, 6491 ALPHA_BUILTIN_PKLB, 6492 ALPHA_BUILTIN_PKWB, 6493 ALPHA_BUILTIN_UNPKBL, 6494 ALPHA_BUILTIN_UNPKBW, 6495 6496 /* TARGET_CIX */ 6497 ALPHA_BUILTIN_CTTZ, 6498 ALPHA_BUILTIN_CTLZ, 6499 ALPHA_BUILTIN_CTPOP, 6500 6501 ALPHA_BUILTIN_max 6502}; 6503 6504static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = { 6505 CODE_FOR_builtin_cmpbge, 6506 CODE_FOR_extbl, 6507 CODE_FOR_extwl, 6508 CODE_FOR_extll, 6509 CODE_FOR_extql, 6510 CODE_FOR_extwh, 6511 CODE_FOR_extlh, 6512 CODE_FOR_extqh, 6513 CODE_FOR_builtin_insbl, 6514 CODE_FOR_builtin_inswl, 6515 CODE_FOR_builtin_insll, 6516 CODE_FOR_insql, 6517 CODE_FOR_inswh, 6518 CODE_FOR_inslh, 6519 CODE_FOR_insqh, 6520 CODE_FOR_mskbl, 6521 CODE_FOR_mskwl, 6522 CODE_FOR_mskll, 6523 CODE_FOR_mskql, 6524 CODE_FOR_mskwh, 6525 CODE_FOR_msklh, 6526 CODE_FOR_mskqh, 6527 CODE_FOR_umuldi3_highpart, 6528 CODE_FOR_builtin_zap, 6529 CODE_FOR_builtin_zapnot, 6530 CODE_FOR_builtin_amask, 6531 CODE_FOR_builtin_implver, 6532 CODE_FOR_builtin_rpcc, 6533 CODE_FOR_builtin_establish_vms_condition_handler, 6534 CODE_FOR_builtin_revert_vms_condition_handler, 6535 6536 /* TARGET_MAX */ 6537 CODE_FOR_builtin_minub8, 6538 CODE_FOR_builtin_minsb8, 6539 CODE_FOR_builtin_minuw4, 6540 CODE_FOR_builtin_minsw4, 6541 CODE_FOR_builtin_maxub8, 6542 CODE_FOR_builtin_maxsb8, 6543 CODE_FOR_builtin_maxuw4, 6544 CODE_FOR_builtin_maxsw4, 6545 CODE_FOR_builtin_perr, 6546 CODE_FOR_builtin_pklb, 6547 CODE_FOR_builtin_pkwb, 6548 CODE_FOR_builtin_unpkbl, 6549 CODE_FOR_builtin_unpkbw, 6550 6551 /* TARGET_CIX */ 6552 CODE_FOR_ctzdi2, 6553 CODE_FOR_clzdi2, 6554 CODE_FOR_popcountdi2 6555}; 6556 6557struct alpha_builtin_def 6558{ 6559 const char *name; 6560 enum alpha_builtin code; 6561 unsigned int target_mask; 6562 bool is_const; 6563}; 6564 6565static struct alpha_builtin_def const zero_arg_builtins[] = { 6566 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true }, 6567 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false } 6568}; 6569 6570static struct alpha_builtin_def const one_arg_builtins[] = { 6571 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true }, 6572 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true }, 6573 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true }, 6574 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true }, 6575 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true }, 6576 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true }, 6577 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true }, 6578 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true } 6579}; 6580 6581static struct alpha_builtin_def const two_arg_builtins[] = { 6582 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true }, 6583 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true }, 6584 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true }, 6585 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true }, 6586 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true }, 6587 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true }, 6588 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true }, 6589 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true }, 6590 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true }, 6591 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true }, 6592 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true }, 6593 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true }, 6594 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true }, 6595 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true }, 6596 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true }, 6597 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true }, 6598 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true }, 6599 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true }, 6600 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true }, 6601 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true }, 6602 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true }, 6603 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true }, 6604 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true }, 6605 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true }, 6606 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true }, 6607 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true }, 6608 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true }, 6609 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true }, 6610 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true }, 6611 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true }, 6612 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true }, 6613 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true }, 6614 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true }, 6615 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true } 6616}; 6617 6618static GTY(()) tree alpha_dimode_u; 6619static GTY(()) tree alpha_v8qi_u; 6620static GTY(()) tree alpha_v8qi_s; 6621static GTY(()) tree alpha_v4hi_u; 6622static GTY(()) tree alpha_v4hi_s; 6623 6624static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max]; 6625 6626/* Return the alpha builtin for CODE. */ 6627 6628static tree 6629alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 6630{ 6631 if (code >= ALPHA_BUILTIN_max) 6632 return error_mark_node; 6633 return alpha_builtins[code]; 6634} 6635 6636/* Helper function of alpha_init_builtins. Add the built-in specified 6637 by NAME, TYPE, CODE, and ECF. */ 6638 6639static void 6640alpha_builtin_function (const char *name, tree ftype, 6641 enum alpha_builtin code, unsigned ecf) 6642{ 6643 tree decl = add_builtin_function (name, ftype, (int) code, 6644 BUILT_IN_MD, NULL, NULL_TREE); 6645 6646 if (ecf & ECF_CONST) 6647 TREE_READONLY (decl) = 1; 6648 if (ecf & ECF_NOTHROW) 6649 TREE_NOTHROW (decl) = 1; 6650 6651 alpha_builtins [(int) code] = decl; 6652} 6653 6654/* Helper function of alpha_init_builtins. Add the COUNT built-in 6655 functions pointed to by P, with function type FTYPE. */ 6656 6657static void 6658alpha_add_builtins (const struct alpha_builtin_def *p, size_t count, 6659 tree ftype) 6660{ 6661 size_t i; 6662 6663 for (i = 0; i < count; ++i, ++p) 6664 if ((target_flags & p->target_mask) == p->target_mask) 6665 alpha_builtin_function (p->name, ftype, p->code, 6666 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW); 6667} 6668 6669static void 6670alpha_init_builtins (void) 6671{ 6672 tree ftype; 6673 6674 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1); 6675 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8); 6676 alpha_v8qi_s = build_vector_type (intQI_type_node, 8); 6677 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4); 6678 alpha_v4hi_s = build_vector_type (intHI_type_node, 4); 6679 6680 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE); 6681 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype); 6682 6683 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE); 6684 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype); 6685 6686 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, 6687 alpha_dimode_u, NULL_TREE); 6688 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype); 6689 6690 if (TARGET_ABI_OPEN_VMS) 6691 { 6692 ftype = build_function_type_list (ptr_type_node, ptr_type_node, 6693 NULL_TREE); 6694 alpha_builtin_function ("__builtin_establish_vms_condition_handler", 6695 ftype, 6696 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER, 6697 0); 6698 6699 ftype = build_function_type_list (ptr_type_node, void_type_node, 6700 NULL_TREE); 6701 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype, 6702 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0); 6703 6704 vms_patch_builtins (); 6705 } 6706} 6707 6708/* Expand an expression EXP that calls a built-in function, 6709 with result going to TARGET if that's convenient 6710 (and in mode MODE if that's convenient). 6711 SUBTARGET may be used as the target for computing one of EXP's operands. 6712 IGNORE is nonzero if the value is to be ignored. */ 6713 6714static rtx 6715alpha_expand_builtin (tree exp, rtx target, 6716 rtx subtarget ATTRIBUTE_UNUSED, 6717 machine_mode mode ATTRIBUTE_UNUSED, 6718 int ignore ATTRIBUTE_UNUSED) 6719{ 6720#define MAX_ARGS 2 6721 6722 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 6723 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 6724 tree arg; 6725 call_expr_arg_iterator iter; 6726 enum insn_code icode; 6727 rtx op[MAX_ARGS], pat; 6728 int arity; 6729 bool nonvoid; 6730 6731 if (fcode >= ALPHA_BUILTIN_max) 6732 internal_error ("bad builtin fcode"); 6733 icode = code_for_builtin[fcode]; 6734 if (icode == 0) 6735 internal_error ("bad builtin fcode"); 6736 6737 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 6738 6739 arity = 0; 6740 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) 6741 { 6742 const struct insn_operand_data *insn_op; 6743 6744 if (arg == error_mark_node) 6745 return NULL_RTX; 6746 if (arity > MAX_ARGS) 6747 return NULL_RTX; 6748 6749 insn_op = &insn_data[icode].operand[arity + nonvoid]; 6750 6751 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); 6752 6753 if (!(*insn_op->predicate) (op[arity], insn_op->mode)) 6754 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); 6755 arity++; 6756 } 6757 6758 if (nonvoid) 6759 { 6760 machine_mode tmode = insn_data[icode].operand[0].mode; 6761 if (!target 6762 || GET_MODE (target) != tmode 6763 || !(*insn_data[icode].operand[0].predicate) (target, tmode)) 6764 target = gen_reg_rtx (tmode); 6765 } 6766 6767 switch (arity) 6768 { 6769 case 0: 6770 pat = GEN_FCN (icode) (target); 6771 break; 6772 case 1: 6773 if (nonvoid) 6774 pat = GEN_FCN (icode) (target, op[0]); 6775 else 6776 pat = GEN_FCN (icode) (op[0]); 6777 break; 6778 case 2: 6779 pat = GEN_FCN (icode) (target, op[0], op[1]); 6780 break; 6781 default: 6782 gcc_unreachable (); 6783 } 6784 if (!pat) 6785 return NULL_RTX; 6786 emit_insn (pat); 6787 6788 if (nonvoid) 6789 return target; 6790 else 6791 return const0_rtx; 6792} 6793 6794 6795/* Several bits below assume HWI >= 64 bits. This should be enforced 6796 by config.gcc. */ 6797#if HOST_BITS_PER_WIDE_INT < 64 6798# error "HOST_WIDE_INT too small" 6799#endif 6800 6801/* Fold the builtin for the CMPBGE instruction. This is a vector comparison 6802 with an 8-bit output vector. OPINT contains the integer operands; bit N 6803 of OP_CONST is set if OPINT[N] is valid. */ 6804 6805static tree 6806alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const) 6807{ 6808 if (op_const == 3) 6809 { 6810 int i, val; 6811 for (i = 0, val = 0; i < 8; ++i) 6812 { 6813 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff; 6814 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff; 6815 if (c0 >= c1) 6816 val |= 1 << i; 6817 } 6818 return build_int_cst (alpha_dimode_u, val); 6819 } 6820 else if (op_const == 2 && opint[1] == 0) 6821 return build_int_cst (alpha_dimode_u, 0xff); 6822 return NULL; 6823} 6824 6825/* Fold the builtin for the ZAPNOT instruction. This is essentially a 6826 specialized form of an AND operation. Other byte manipulation instructions 6827 are defined in terms of this instruction, so this is also used as a 6828 subroutine for other builtins. 6829 6830 OP contains the tree operands; OPINT contains the extracted integer values. 6831 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only 6832 OPINT may be considered. */ 6833 6834static tree 6835alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[], 6836 long op_const) 6837{ 6838 if (op_const & 2) 6839 { 6840 unsigned HOST_WIDE_INT mask = 0; 6841 int i; 6842 6843 for (i = 0; i < 8; ++i) 6844 if ((opint[1] >> i) & 1) 6845 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8); 6846 6847 if (op_const & 1) 6848 return build_int_cst (alpha_dimode_u, opint[0] & mask); 6849 6850 if (op) 6851 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0], 6852 build_int_cst (alpha_dimode_u, mask)); 6853 } 6854 else if ((op_const & 1) && opint[0] == 0) 6855 return build_int_cst (alpha_dimode_u, 0); 6856 return NULL; 6857} 6858 6859/* Fold the builtins for the EXT family of instructions. */ 6860 6861static tree 6862alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[], 6863 long op_const, unsigned HOST_WIDE_INT bytemask, 6864 bool is_high) 6865{ 6866 long zap_const = 2; 6867 tree *zap_op = NULL; 6868 6869 if (op_const & 2) 6870 { 6871 unsigned HOST_WIDE_INT loc; 6872 6873 loc = opint[1] & 7; 6874 loc *= BITS_PER_UNIT; 6875 6876 if (loc != 0) 6877 { 6878 if (op_const & 1) 6879 { 6880 unsigned HOST_WIDE_INT temp = opint[0]; 6881 if (is_high) 6882 temp <<= loc; 6883 else 6884 temp >>= loc; 6885 opint[0] = temp; 6886 zap_const = 3; 6887 } 6888 } 6889 else 6890 zap_op = op; 6891 } 6892 6893 opint[1] = bytemask; 6894 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const); 6895} 6896 6897/* Fold the builtins for the INS family of instructions. */ 6898 6899static tree 6900alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[], 6901 long op_const, unsigned HOST_WIDE_INT bytemask, 6902 bool is_high) 6903{ 6904 if ((op_const & 1) && opint[0] == 0) 6905 return build_int_cst (alpha_dimode_u, 0); 6906 6907 if (op_const & 2) 6908 { 6909 unsigned HOST_WIDE_INT temp, loc, byteloc; 6910 tree *zap_op = NULL; 6911 6912 loc = opint[1] & 7; 6913 bytemask <<= loc; 6914 6915 temp = opint[0]; 6916 if (is_high) 6917 { 6918 byteloc = (64 - (loc * 8)) & 0x3f; 6919 if (byteloc == 0) 6920 zap_op = op; 6921 else 6922 temp >>= byteloc; 6923 bytemask >>= 8; 6924 } 6925 else 6926 { 6927 byteloc = loc * 8; 6928 if (byteloc == 0) 6929 zap_op = op; 6930 else 6931 temp <<= byteloc; 6932 } 6933 6934 opint[0] = temp; 6935 opint[1] = bytemask; 6936 return alpha_fold_builtin_zapnot (zap_op, opint, op_const); 6937 } 6938 6939 return NULL; 6940} 6941 6942static tree 6943alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[], 6944 long op_const, unsigned HOST_WIDE_INT bytemask, 6945 bool is_high) 6946{ 6947 if (op_const & 2) 6948 { 6949 unsigned HOST_WIDE_INT loc; 6950 6951 loc = opint[1] & 7; 6952 bytemask <<= loc; 6953 6954 if (is_high) 6955 bytemask >>= 8; 6956 6957 opint[1] = bytemask ^ 0xff; 6958 } 6959 6960 return alpha_fold_builtin_zapnot (op, opint, op_const); 6961} 6962 6963static tree 6964alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype) 6965{ 6966 tree op0 = fold_convert (vtype, op[0]); 6967 tree op1 = fold_convert (vtype, op[1]); 6968 tree val = fold_build2 (code, vtype, op0, op1); 6969 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val); 6970} 6971 6972static tree 6973alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const) 6974{ 6975 unsigned HOST_WIDE_INT temp = 0; 6976 int i; 6977 6978 if (op_const != 3) 6979 return NULL; 6980 6981 for (i = 0; i < 8; ++i) 6982 { 6983 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff; 6984 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff; 6985 if (a >= b) 6986 temp += a - b; 6987 else 6988 temp += b - a; 6989 } 6990 6991 return build_int_cst (alpha_dimode_u, temp); 6992} 6993 6994static tree 6995alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const) 6996{ 6997 unsigned HOST_WIDE_INT temp; 6998 6999 if (op_const == 0) 7000 return NULL; 7001 7002 temp = opint[0] & 0xff; 7003 temp |= (opint[0] >> 24) & 0xff00; 7004 7005 return build_int_cst (alpha_dimode_u, temp); 7006} 7007 7008static tree 7009alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const) 7010{ 7011 unsigned HOST_WIDE_INT temp; 7012 7013 if (op_const == 0) 7014 return NULL; 7015 7016 temp = opint[0] & 0xff; 7017 temp |= (opint[0] >> 8) & 0xff00; 7018 temp |= (opint[0] >> 16) & 0xff0000; 7019 temp |= (opint[0] >> 24) & 0xff000000; 7020 7021 return build_int_cst (alpha_dimode_u, temp); 7022} 7023 7024static tree 7025alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const) 7026{ 7027 unsigned HOST_WIDE_INT temp; 7028 7029 if (op_const == 0) 7030 return NULL; 7031 7032 temp = opint[0] & 0xff; 7033 temp |= (opint[0] & 0xff00) << 24; 7034 7035 return build_int_cst (alpha_dimode_u, temp); 7036} 7037 7038static tree 7039alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const) 7040{ 7041 unsigned HOST_WIDE_INT temp; 7042 7043 if (op_const == 0) 7044 return NULL; 7045 7046 temp = opint[0] & 0xff; 7047 temp |= (opint[0] & 0x0000ff00) << 8; 7048 temp |= (opint[0] & 0x00ff0000) << 16; 7049 temp |= (opint[0] & 0xff000000) << 24; 7050 7051 return build_int_cst (alpha_dimode_u, temp); 7052} 7053 7054static tree 7055alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const) 7056{ 7057 unsigned HOST_WIDE_INT temp; 7058 7059 if (op_const == 0) 7060 return NULL; 7061 7062 if (opint[0] == 0) 7063 temp = 64; 7064 else 7065 temp = exact_log2 (opint[0] & -opint[0]); 7066 7067 return build_int_cst (alpha_dimode_u, temp); 7068} 7069 7070static tree 7071alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const) 7072{ 7073 unsigned HOST_WIDE_INT temp; 7074 7075 if (op_const == 0) 7076 return NULL; 7077 7078 if (opint[0] == 0) 7079 temp = 64; 7080 else 7081 temp = 64 - floor_log2 (opint[0]) - 1; 7082 7083 return build_int_cst (alpha_dimode_u, temp); 7084} 7085 7086static tree 7087alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const) 7088{ 7089 unsigned HOST_WIDE_INT temp, op; 7090 7091 if (op_const == 0) 7092 return NULL; 7093 7094 op = opint[0]; 7095 temp = 0; 7096 while (op) 7097 temp++, op &= op - 1; 7098 7099 return build_int_cst (alpha_dimode_u, temp); 7100} 7101 7102/* Fold one of our builtin functions. */ 7103 7104static tree 7105alpha_fold_builtin (tree fndecl, int n_args, tree *op, 7106 bool ignore ATTRIBUTE_UNUSED) 7107{ 7108 unsigned HOST_WIDE_INT opint[MAX_ARGS]; 7109 long op_const = 0; 7110 int i; 7111 7112 if (n_args > MAX_ARGS) 7113 return NULL; 7114 7115 for (i = 0; i < n_args; i++) 7116 { 7117 tree arg = op[i]; 7118 if (arg == error_mark_node) 7119 return NULL; 7120 7121 opint[i] = 0; 7122 if (TREE_CODE (arg) == INTEGER_CST) 7123 { 7124 op_const |= 1L << i; 7125 opint[i] = int_cst_value (arg); 7126 } 7127 } 7128 7129 switch (DECL_FUNCTION_CODE (fndecl)) 7130 { 7131 case ALPHA_BUILTIN_CMPBGE: 7132 return alpha_fold_builtin_cmpbge (opint, op_const); 7133 7134 case ALPHA_BUILTIN_EXTBL: 7135 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false); 7136 case ALPHA_BUILTIN_EXTWL: 7137 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false); 7138 case ALPHA_BUILTIN_EXTLL: 7139 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false); 7140 case ALPHA_BUILTIN_EXTQL: 7141 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false); 7142 case ALPHA_BUILTIN_EXTWH: 7143 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true); 7144 case ALPHA_BUILTIN_EXTLH: 7145 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true); 7146 case ALPHA_BUILTIN_EXTQH: 7147 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true); 7148 7149 case ALPHA_BUILTIN_INSBL: 7150 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false); 7151 case ALPHA_BUILTIN_INSWL: 7152 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false); 7153 case ALPHA_BUILTIN_INSLL: 7154 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false); 7155 case ALPHA_BUILTIN_INSQL: 7156 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false); 7157 case ALPHA_BUILTIN_INSWH: 7158 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true); 7159 case ALPHA_BUILTIN_INSLH: 7160 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true); 7161 case ALPHA_BUILTIN_INSQH: 7162 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true); 7163 7164 case ALPHA_BUILTIN_MSKBL: 7165 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false); 7166 case ALPHA_BUILTIN_MSKWL: 7167 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false); 7168 case ALPHA_BUILTIN_MSKLL: 7169 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false); 7170 case ALPHA_BUILTIN_MSKQL: 7171 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false); 7172 case ALPHA_BUILTIN_MSKWH: 7173 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true); 7174 case ALPHA_BUILTIN_MSKLH: 7175 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true); 7176 case ALPHA_BUILTIN_MSKQH: 7177 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true); 7178 7179 case ALPHA_BUILTIN_ZAP: 7180 opint[1] ^= 0xff; 7181 /* FALLTHRU */ 7182 case ALPHA_BUILTIN_ZAPNOT: 7183 return alpha_fold_builtin_zapnot (op, opint, op_const); 7184 7185 case ALPHA_BUILTIN_MINUB8: 7186 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u); 7187 case ALPHA_BUILTIN_MINSB8: 7188 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s); 7189 case ALPHA_BUILTIN_MINUW4: 7190 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u); 7191 case ALPHA_BUILTIN_MINSW4: 7192 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s); 7193 case ALPHA_BUILTIN_MAXUB8: 7194 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u); 7195 case ALPHA_BUILTIN_MAXSB8: 7196 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s); 7197 case ALPHA_BUILTIN_MAXUW4: 7198 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u); 7199 case ALPHA_BUILTIN_MAXSW4: 7200 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s); 7201 7202 case ALPHA_BUILTIN_PERR: 7203 return alpha_fold_builtin_perr (opint, op_const); 7204 case ALPHA_BUILTIN_PKLB: 7205 return alpha_fold_builtin_pklb (opint, op_const); 7206 case ALPHA_BUILTIN_PKWB: 7207 return alpha_fold_builtin_pkwb (opint, op_const); 7208 case ALPHA_BUILTIN_UNPKBL: 7209 return alpha_fold_builtin_unpkbl (opint, op_const); 7210 case ALPHA_BUILTIN_UNPKBW: 7211 return alpha_fold_builtin_unpkbw (opint, op_const); 7212 7213 case ALPHA_BUILTIN_CTTZ: 7214 return alpha_fold_builtin_cttz (opint, op_const); 7215 case ALPHA_BUILTIN_CTLZ: 7216 return alpha_fold_builtin_ctlz (opint, op_const); 7217 case ALPHA_BUILTIN_CTPOP: 7218 return alpha_fold_builtin_ctpop (opint, op_const); 7219 7220 case ALPHA_BUILTIN_AMASK: 7221 case ALPHA_BUILTIN_IMPLVER: 7222 case ALPHA_BUILTIN_RPCC: 7223 /* None of these are foldable at compile-time. */ 7224 default: 7225 return NULL; 7226 } 7227} 7228 7229bool 7230alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi) 7231{ 7232 bool changed = false; 7233 gimple stmt = gsi_stmt (*gsi); 7234 tree call = gimple_call_fn (stmt); 7235 gimple new_stmt = NULL; 7236 7237 if (call) 7238 { 7239 tree fndecl = gimple_call_fndecl (stmt); 7240 7241 if (fndecl) 7242 { 7243 tree arg0, arg1; 7244 7245 switch (DECL_FUNCTION_CODE (fndecl)) 7246 { 7247 case ALPHA_BUILTIN_UMULH: 7248 arg0 = gimple_call_arg (stmt, 0); 7249 arg1 = gimple_call_arg (stmt, 1); 7250 7251 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), 7252 MULT_HIGHPART_EXPR, arg0, arg1); 7253 break; 7254 default: 7255 break; 7256 } 7257 } 7258 } 7259 7260 if (new_stmt) 7261 { 7262 gsi_replace (gsi, new_stmt, true); 7263 changed = true; 7264 } 7265 7266 return changed; 7267} 7268 7269/* This page contains routines that are used to determine what the function 7270 prologue and epilogue code will do and write them out. */ 7271 7272/* Compute the size of the save area in the stack. */ 7273 7274/* These variables are used for communication between the following functions. 7275 They indicate various things about the current function being compiled 7276 that are used to tell what kind of prologue, epilogue and procedure 7277 descriptor to generate. */ 7278 7279/* Nonzero if we need a stack procedure. */ 7280enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2}; 7281static enum alpha_procedure_types alpha_procedure_type; 7282 7283/* Register number (either FP or SP) that is used to unwind the frame. */ 7284static int vms_unwind_regno; 7285 7286/* Register number used to save FP. We need not have one for RA since 7287 we don't modify it for register procedures. This is only defined 7288 for register frame procedures. */ 7289static int vms_save_fp_regno; 7290 7291/* Register number used to reference objects off our PV. */ 7292static int vms_base_regno; 7293 7294/* Compute register masks for saved registers. */ 7295 7296static void 7297alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP) 7298{ 7299 unsigned long imask = 0; 7300 unsigned long fmask = 0; 7301 unsigned int i; 7302 7303 /* When outputting a thunk, we don't have valid register life info, 7304 but assemble_start_function wants to output .frame and .mask 7305 directives. */ 7306 if (cfun->is_thunk) 7307 { 7308 *imaskP = 0; 7309 *fmaskP = 0; 7310 return; 7311 } 7312 7313 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) 7314 imask |= (1UL << HARD_FRAME_POINTER_REGNUM); 7315 7316 /* One for every register we have to save. */ 7317 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 7318 if (! fixed_regs[i] && ! call_used_regs[i] 7319 && df_regs_ever_live_p (i) && i != REG_RA) 7320 { 7321 if (i < 32) 7322 imask |= (1UL << i); 7323 else 7324 fmask |= (1UL << (i - 32)); 7325 } 7326 7327 /* We need to restore these for the handler. */ 7328 if (crtl->calls_eh_return) 7329 { 7330 for (i = 0; ; ++i) 7331 { 7332 unsigned regno = EH_RETURN_DATA_REGNO (i); 7333 if (regno == INVALID_REGNUM) 7334 break; 7335 imask |= 1UL << regno; 7336 } 7337 } 7338 7339 /* If any register spilled, then spill the return address also. */ 7340 /* ??? This is required by the Digital stack unwind specification 7341 and isn't needed if we're doing Dwarf2 unwinding. */ 7342 if (imask || fmask || alpha_ra_ever_killed ()) 7343 imask |= (1UL << REG_RA); 7344 7345 *imaskP = imask; 7346 *fmaskP = fmask; 7347} 7348 7349int 7350alpha_sa_size (void) 7351{ 7352 unsigned long mask[2]; 7353 int sa_size = 0; 7354 int i, j; 7355 7356 alpha_sa_mask (&mask[0], &mask[1]); 7357 7358 for (j = 0; j < 2; ++j) 7359 for (i = 0; i < 32; ++i) 7360 if ((mask[j] >> i) & 1) 7361 sa_size++; 7362 7363 if (TARGET_ABI_OPEN_VMS) 7364 { 7365 /* Start with a stack procedure if we make any calls (REG_RA used), or 7366 need a frame pointer, with a register procedure if we otherwise need 7367 at least a slot, and with a null procedure in other cases. */ 7368 if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed) 7369 alpha_procedure_type = PT_STACK; 7370 else if (get_frame_size() != 0) 7371 alpha_procedure_type = PT_REGISTER; 7372 else 7373 alpha_procedure_type = PT_NULL; 7374 7375 /* Don't reserve space for saving FP & RA yet. Do that later after we've 7376 made the final decision on stack procedure vs register procedure. */ 7377 if (alpha_procedure_type == PT_STACK) 7378 sa_size -= 2; 7379 7380 /* Decide whether to refer to objects off our PV via FP or PV. 7381 If we need FP for something else or if we receive a nonlocal 7382 goto (which expects PV to contain the value), we must use PV. 7383 Otherwise, start by assuming we can use FP. */ 7384 7385 vms_base_regno 7386 = (frame_pointer_needed 7387 || cfun->has_nonlocal_label 7388 || alpha_procedure_type == PT_STACK 7389 || crtl->outgoing_args_size) 7390 ? REG_PV : HARD_FRAME_POINTER_REGNUM; 7391 7392 /* If we want to copy PV into FP, we need to find some register 7393 in which to save FP. */ 7394 7395 vms_save_fp_regno = -1; 7396 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM) 7397 for (i = 0; i < 32; i++) 7398 if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i)) 7399 vms_save_fp_regno = i; 7400 7401 /* A VMS condition handler requires a stack procedure in our 7402 implementation. (not required by the calling standard). */ 7403 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER) 7404 || cfun->machine->uses_condition_handler) 7405 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK; 7406 else if (alpha_procedure_type == PT_NULL) 7407 vms_base_regno = REG_PV; 7408 7409 /* Stack unwinding should be done via FP unless we use it for PV. */ 7410 vms_unwind_regno = (vms_base_regno == REG_PV 7411 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM); 7412 7413 /* If this is a stack procedure, allow space for saving FP, RA and 7414 a condition handler slot if needed. */ 7415 if (alpha_procedure_type == PT_STACK) 7416 sa_size += 2 + cfun->machine->uses_condition_handler; 7417 } 7418 else 7419 { 7420 /* Our size must be even (multiple of 16 bytes). */ 7421 if (sa_size & 1) 7422 sa_size++; 7423 } 7424 7425 return sa_size * 8; 7426} 7427 7428/* Define the offset between two registers, one to be eliminated, 7429 and the other its replacement, at the start of a routine. */ 7430 7431HOST_WIDE_INT 7432alpha_initial_elimination_offset (unsigned int from, 7433 unsigned int to ATTRIBUTE_UNUSED) 7434{ 7435 HOST_WIDE_INT ret; 7436 7437 ret = alpha_sa_size (); 7438 ret += ALPHA_ROUND (crtl->outgoing_args_size); 7439 7440 switch (from) 7441 { 7442 case FRAME_POINTER_REGNUM: 7443 break; 7444 7445 case ARG_POINTER_REGNUM: 7446 ret += (ALPHA_ROUND (get_frame_size () 7447 + crtl->args.pretend_args_size) 7448 - crtl->args.pretend_args_size); 7449 break; 7450 7451 default: 7452 gcc_unreachable (); 7453 } 7454 7455 return ret; 7456} 7457 7458#if TARGET_ABI_OPEN_VMS 7459 7460/* Worker function for TARGET_CAN_ELIMINATE. */ 7461 7462static bool 7463alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 7464{ 7465 /* We need the alpha_procedure_type to decide. Evaluate it now. */ 7466 alpha_sa_size (); 7467 7468 switch (alpha_procedure_type) 7469 { 7470 case PT_NULL: 7471 /* NULL procedures have no frame of their own and we only 7472 know how to resolve from the current stack pointer. */ 7473 return to == STACK_POINTER_REGNUM; 7474 7475 case PT_REGISTER: 7476 case PT_STACK: 7477 /* We always eliminate except to the stack pointer if there is no 7478 usable frame pointer at hand. */ 7479 return (to != STACK_POINTER_REGNUM 7480 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM); 7481 } 7482 7483 gcc_unreachable (); 7484} 7485 7486/* FROM is to be eliminated for TO. Return the offset so that TO+offset 7487 designates the same location as FROM. */ 7488 7489HOST_WIDE_INT 7490alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to) 7491{ 7492 /* The only possible attempts we ever expect are ARG or FRAME_PTR to 7493 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide 7494 on the proper computations and will need the register save area size 7495 in most cases. */ 7496 7497 HOST_WIDE_INT sa_size = alpha_sa_size (); 7498 7499 /* PT_NULL procedures have no frame of their own and we only allow 7500 elimination to the stack pointer. This is the argument pointer and we 7501 resolve the soft frame pointer to that as well. */ 7502 7503 if (alpha_procedure_type == PT_NULL) 7504 return 0; 7505 7506 /* For a PT_STACK procedure the frame layout looks as follows 7507 7508 -----> decreasing addresses 7509 7510 < size rounded up to 16 | likewise > 7511 --------------#------------------------------+++--------------+++-------# 7512 incoming args # pretended args | "frame" | regs sa | PV | outgoing args # 7513 --------------#---------------------------------------------------------# 7514 ^ ^ ^ ^ 7515 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR 7516 7517 7518 PT_REGISTER procedures are similar in that they may have a frame of their 7519 own. They have no regs-sa/pv/outgoing-args area. 7520 7521 We first compute offset to HARD_FRAME_PTR, then add what we need to get 7522 to STACK_PTR if need be. */ 7523 7524 { 7525 HOST_WIDE_INT offset; 7526 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0; 7527 7528 switch (from) 7529 { 7530 case FRAME_POINTER_REGNUM: 7531 offset = ALPHA_ROUND (sa_size + pv_save_size); 7532 break; 7533 case ARG_POINTER_REGNUM: 7534 offset = (ALPHA_ROUND (sa_size + pv_save_size 7535 + get_frame_size () 7536 + crtl->args.pretend_args_size) 7537 - crtl->args.pretend_args_size); 7538 break; 7539 default: 7540 gcc_unreachable (); 7541 } 7542 7543 if (to == STACK_POINTER_REGNUM) 7544 offset += ALPHA_ROUND (crtl->outgoing_args_size); 7545 7546 return offset; 7547 } 7548} 7549 7550#define COMMON_OBJECT "common_object" 7551 7552static tree 7553common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED, 7554 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED, 7555 bool *no_add_attrs ATTRIBUTE_UNUSED) 7556{ 7557 tree decl = *node; 7558 gcc_assert (DECL_P (decl)); 7559 7560 DECL_COMMON (decl) = 1; 7561 return NULL_TREE; 7562} 7563 7564static const struct attribute_spec vms_attribute_table[] = 7565{ 7566 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, 7567 affects_type_identity } */ 7568 { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler, false }, 7569 { NULL, 0, 0, false, false, false, NULL, false } 7570}; 7571 7572void 7573vms_output_aligned_decl_common(FILE *file, tree decl, const char *name, 7574 unsigned HOST_WIDE_INT size, 7575 unsigned int align) 7576{ 7577 tree attr = DECL_ATTRIBUTES (decl); 7578 fprintf (file, "%s", COMMON_ASM_OP); 7579 assemble_name (file, name); 7580 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size); 7581 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */ 7582 fprintf (file, ",%u", align / BITS_PER_UNIT); 7583 if (attr) 7584 { 7585 attr = lookup_attribute (COMMON_OBJECT, attr); 7586 if (attr) 7587 fprintf (file, ",%s", 7588 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr)))); 7589 } 7590 fputc ('\n', file); 7591} 7592 7593#undef COMMON_OBJECT 7594 7595#endif 7596 7597bool 7598alpha_find_lo_sum_using_gp (rtx insn) 7599{ 7600 subrtx_iterator::array_type array; 7601 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 7602 { 7603 const_rtx x = *iter; 7604 if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx) 7605 return true; 7606 } 7607 return false; 7608} 7609 7610static int 7611alpha_does_function_need_gp (void) 7612{ 7613 rtx_insn *insn; 7614 7615 /* The GP being variable is an OSF abi thing. */ 7616 if (! TARGET_ABI_OSF) 7617 return 0; 7618 7619 /* We need the gp to load the address of __mcount. */ 7620 if (TARGET_PROFILING_NEEDS_GP && crtl->profile) 7621 return 1; 7622 7623 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */ 7624 if (cfun->is_thunk) 7625 return 1; 7626 7627 /* The nonlocal receiver pattern assumes that the gp is valid for 7628 the nested function. Reasonable because it's almost always set 7629 correctly already. For the cases where that's wrong, make sure 7630 the nested function loads its gp on entry. */ 7631 if (crtl->has_nonlocal_goto) 7632 return 1; 7633 7634 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first. 7635 Even if we are a static function, we still need to do this in case 7636 our address is taken and passed to something like qsort. */ 7637 7638 push_topmost_sequence (); 7639 insn = get_insns (); 7640 pop_topmost_sequence (); 7641 7642 for (; insn; insn = NEXT_INSN (insn)) 7643 if (NONDEBUG_INSN_P (insn) 7644 && GET_CODE (PATTERN (insn)) != USE 7645 && GET_CODE (PATTERN (insn)) != CLOBBER 7646 && get_attr_usegp (insn)) 7647 return 1; 7648 7649 return 0; 7650} 7651 7652 7653/* Helper function to set RTX_FRAME_RELATED_P on instructions, including 7654 sequences. */ 7655 7656static rtx_insn * 7657set_frame_related_p (void) 7658{ 7659 rtx_insn *seq = get_insns (); 7660 rtx_insn *insn; 7661 7662 end_sequence (); 7663 7664 if (!seq) 7665 return NULL; 7666 7667 if (INSN_P (seq)) 7668 { 7669 insn = seq; 7670 while (insn != NULL_RTX) 7671 { 7672 RTX_FRAME_RELATED_P (insn) = 1; 7673 insn = NEXT_INSN (insn); 7674 } 7675 seq = emit_insn (seq); 7676 } 7677 else 7678 { 7679 seq = emit_insn (seq); 7680 RTX_FRAME_RELATED_P (seq) = 1; 7681 } 7682 return seq; 7683} 7684 7685#define FRP(exp) (start_sequence (), exp, set_frame_related_p ()) 7686 7687/* Generates a store with the proper unwind info attached. VALUE is 7688 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG 7689 contains SP+FRAME_BIAS, and that is the unwind info that should be 7690 generated. If FRAME_REG != VALUE, then VALUE is being stored on 7691 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */ 7692 7693static void 7694emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias, 7695 HOST_WIDE_INT base_ofs, rtx frame_reg) 7696{ 7697 rtx addr, mem; 7698 rtx_insn *insn; 7699 7700 addr = plus_constant (Pmode, base_reg, base_ofs); 7701 mem = gen_frame_mem (DImode, addr); 7702 7703 insn = emit_move_insn (mem, value); 7704 RTX_FRAME_RELATED_P (insn) = 1; 7705 7706 if (frame_bias || value != frame_reg) 7707 { 7708 if (frame_bias) 7709 { 7710 addr = plus_constant (Pmode, stack_pointer_rtx, 7711 frame_bias + base_ofs); 7712 mem = gen_rtx_MEM (DImode, addr); 7713 } 7714 7715 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 7716 gen_rtx_SET (VOIDmode, mem, frame_reg)); 7717 } 7718} 7719 7720static void 7721emit_frame_store (unsigned int regno, rtx base_reg, 7722 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs) 7723{ 7724 rtx reg = gen_rtx_REG (DImode, regno); 7725 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg); 7726} 7727 7728/* Compute the frame size. SIZE is the size of the "naked" frame 7729 and SA_SIZE is the size of the register save area. */ 7730 7731static HOST_WIDE_INT 7732compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size) 7733{ 7734 if (TARGET_ABI_OPEN_VMS) 7735 return ALPHA_ROUND (sa_size 7736 + (alpha_procedure_type == PT_STACK ? 8 : 0) 7737 + size 7738 + crtl->args.pretend_args_size); 7739 else 7740 return ALPHA_ROUND (crtl->outgoing_args_size) 7741 + sa_size 7742 + ALPHA_ROUND (size 7743 + crtl->args.pretend_args_size); 7744} 7745 7746/* Write function prologue. */ 7747 7748/* On vms we have two kinds of functions: 7749 7750 - stack frame (PROC_STACK) 7751 these are 'normal' functions with local vars and which are 7752 calling other functions 7753 - register frame (PROC_REGISTER) 7754 keeps all data in registers, needs no stack 7755 7756 We must pass this to the assembler so it can generate the 7757 proper pdsc (procedure descriptor) 7758 This is done with the '.pdesc' command. 7759 7760 On not-vms, we don't really differentiate between the two, as we can 7761 simply allocate stack without saving registers. */ 7762 7763void 7764alpha_expand_prologue (void) 7765{ 7766 /* Registers to save. */ 7767 unsigned long imask = 0; 7768 unsigned long fmask = 0; 7769 /* Stack space needed for pushing registers clobbered by us. */ 7770 HOST_WIDE_INT sa_size, sa_bias; 7771 /* Complete stack size needed. */ 7772 HOST_WIDE_INT frame_size; 7773 /* Probed stack size; it additionally includes the size of 7774 the "reserve region" if any. */ 7775 HOST_WIDE_INT probed_size; 7776 /* Offset from base reg to register save area. */ 7777 HOST_WIDE_INT reg_offset; 7778 rtx sa_reg; 7779 int i; 7780 7781 sa_size = alpha_sa_size (); 7782 frame_size = compute_frame_size (get_frame_size (), sa_size); 7783 7784 if (flag_stack_usage_info) 7785 current_function_static_stack_size = frame_size; 7786 7787 if (TARGET_ABI_OPEN_VMS) 7788 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; 7789 else 7790 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); 7791 7792 alpha_sa_mask (&imask, &fmask); 7793 7794 /* Emit an insn to reload GP, if needed. */ 7795 if (TARGET_ABI_OSF) 7796 { 7797 alpha_function_needs_gp = alpha_does_function_need_gp (); 7798 if (alpha_function_needs_gp) 7799 emit_insn (gen_prologue_ldgp ()); 7800 } 7801 7802 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert 7803 the call to mcount ourselves, rather than having the linker do it 7804 magically in response to -pg. Since _mcount has special linkage, 7805 don't represent the call as a call. */ 7806 if (TARGET_PROFILING_NEEDS_GP && crtl->profile) 7807 emit_insn (gen_prologue_mcount ()); 7808 7809 /* Adjust the stack by the frame size. If the frame size is > 4096 7810 bytes, we need to be sure we probe somewhere in the first and last 7811 4096 bytes (we can probably get away without the latter test) and 7812 every 8192 bytes in between. If the frame size is > 32768, we 7813 do this in a loop. Otherwise, we generate the explicit probe 7814 instructions. 7815 7816 Note that we are only allowed to adjust sp once in the prologue. */ 7817 7818 probed_size = frame_size; 7819 if (flag_stack_check) 7820 probed_size += STACK_CHECK_PROTECT; 7821 7822 if (probed_size <= 32768) 7823 { 7824 if (probed_size > 4096) 7825 { 7826 int probed; 7827 7828 for (probed = 4096; probed < probed_size; probed += 8192) 7829 emit_insn (gen_probe_stack (GEN_INT (-probed))); 7830 7831 /* We only have to do this probe if we aren't saving registers or 7832 if we are probing beyond the frame because of -fstack-check. */ 7833 if ((sa_size == 0 && probed_size > probed - 4096) 7834 || flag_stack_check) 7835 emit_insn (gen_probe_stack (GEN_INT (-probed_size))); 7836 } 7837 7838 if (frame_size != 0) 7839 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, 7840 GEN_INT (-frame_size)))); 7841 } 7842 else 7843 { 7844 /* Here we generate code to set R22 to SP + 4096 and set R23 to the 7845 number of 8192 byte blocks to probe. We then probe each block 7846 in the loop and then set SP to the proper location. If the 7847 amount remaining is > 4096, we have to do one more probe if we 7848 are not saving any registers or if we are probing beyond the 7849 frame because of -fstack-check. */ 7850 7851 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192; 7852 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192; 7853 rtx ptr = gen_rtx_REG (DImode, 22); 7854 rtx count = gen_rtx_REG (DImode, 23); 7855 rtx seq; 7856 7857 emit_move_insn (count, GEN_INT (blocks)); 7858 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096))); 7859 7860 /* Because of the difficulty in emitting a new basic block this 7861 late in the compilation, generate the loop as a single insn. */ 7862 emit_insn (gen_prologue_stack_probe_loop (count, ptr)); 7863 7864 if ((leftover > 4096 && sa_size == 0) || flag_stack_check) 7865 { 7866 rtx last = gen_rtx_MEM (DImode, 7867 plus_constant (Pmode, ptr, -leftover)); 7868 MEM_VOLATILE_P (last) = 1; 7869 emit_move_insn (last, const0_rtx); 7870 } 7871 7872 if (flag_stack_check) 7873 { 7874 /* If -fstack-check is specified we have to load the entire 7875 constant into a register and subtract from the sp in one go, 7876 because the probed stack size is not equal to the frame size. */ 7877 HOST_WIDE_INT lo, hi; 7878 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; 7879 hi = frame_size - lo; 7880 7881 emit_move_insn (ptr, GEN_INT (hi)); 7882 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo))); 7883 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, 7884 ptr)); 7885 } 7886 else 7887 { 7888 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr, 7889 GEN_INT (-leftover))); 7890 } 7891 7892 /* This alternative is special, because the DWARF code cannot 7893 possibly intuit through the loop above. So we invent this 7894 note it looks at instead. */ 7895 RTX_FRAME_RELATED_P (seq) = 1; 7896 add_reg_note (seq, REG_FRAME_RELATED_EXPR, 7897 gen_rtx_SET (VOIDmode, stack_pointer_rtx, 7898 plus_constant (Pmode, stack_pointer_rtx, 7899 -frame_size))); 7900 } 7901 7902 /* Cope with very large offsets to the register save area. */ 7903 sa_bias = 0; 7904 sa_reg = stack_pointer_rtx; 7905 if (reg_offset + sa_size > 0x8000) 7906 { 7907 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; 7908 rtx sa_bias_rtx; 7909 7910 if (low + sa_size <= 0x8000) 7911 sa_bias = reg_offset - low, reg_offset = low; 7912 else 7913 sa_bias = reg_offset, reg_offset = 0; 7914 7915 sa_reg = gen_rtx_REG (DImode, 24); 7916 sa_bias_rtx = GEN_INT (sa_bias); 7917 7918 if (add_operand (sa_bias_rtx, DImode)) 7919 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx)); 7920 else 7921 { 7922 emit_move_insn (sa_reg, sa_bias_rtx); 7923 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg)); 7924 } 7925 } 7926 7927 /* Save regs in stack order. Beginning with VMS PV. */ 7928 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) 7929 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0); 7930 7931 /* Save register RA next. */ 7932 if (imask & (1UL << REG_RA)) 7933 { 7934 emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset); 7935 imask &= ~(1UL << REG_RA); 7936 reg_offset += 8; 7937 } 7938 7939 /* Now save any other registers required to be saved. */ 7940 for (i = 0; i < 31; i++) 7941 if (imask & (1UL << i)) 7942 { 7943 emit_frame_store (i, sa_reg, sa_bias, reg_offset); 7944 reg_offset += 8; 7945 } 7946 7947 for (i = 0; i < 31; i++) 7948 if (fmask & (1UL << i)) 7949 { 7950 emit_frame_store (i+32, sa_reg, sa_bias, reg_offset); 7951 reg_offset += 8; 7952 } 7953 7954 if (TARGET_ABI_OPEN_VMS) 7955 { 7956 /* Register frame procedures save the fp. */ 7957 if (alpha_procedure_type == PT_REGISTER) 7958 { 7959 rtx_insn *insn = 7960 emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno), 7961 hard_frame_pointer_rtx); 7962 add_reg_note (insn, REG_CFA_REGISTER, NULL); 7963 RTX_FRAME_RELATED_P (insn) = 1; 7964 } 7965 7966 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV) 7967 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno), 7968 gen_rtx_REG (DImode, REG_PV))); 7969 7970 if (alpha_procedure_type != PT_NULL 7971 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM) 7972 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)); 7973 7974 /* If we have to allocate space for outgoing args, do it now. */ 7975 if (crtl->outgoing_args_size != 0) 7976 { 7977 rtx_insn *seq 7978 = emit_move_insn (stack_pointer_rtx, 7979 plus_constant 7980 (Pmode, hard_frame_pointer_rtx, 7981 - (ALPHA_ROUND 7982 (crtl->outgoing_args_size)))); 7983 7984 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted 7985 if ! frame_pointer_needed. Setting the bit will change the CFA 7986 computation rule to use sp again, which would be wrong if we had 7987 frame_pointer_needed, as this means sp might move unpredictably 7988 later on. 7989 7990 Also, note that 7991 frame_pointer_needed 7992 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM 7993 and 7994 crtl->outgoing_args_size != 0 7995 => alpha_procedure_type != PT_NULL, 7996 7997 so when we are not setting the bit here, we are guaranteed to 7998 have emitted an FRP frame pointer update just before. */ 7999 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed; 8000 } 8001 } 8002 else 8003 { 8004 /* If we need a frame pointer, set it from the stack pointer. */ 8005 if (frame_pointer_needed) 8006 { 8007 if (TARGET_CAN_FAULT_IN_PROLOGUE) 8008 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)); 8009 else 8010 /* This must always be the last instruction in the 8011 prologue, thus we emit a special move + clobber. */ 8012 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx, 8013 stack_pointer_rtx, sa_reg))); 8014 } 8015 } 8016 8017 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into 8018 the prologue, for exception handling reasons, we cannot do this for 8019 any insn that might fault. We could prevent this for mems with a 8020 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we 8021 have to prevent all such scheduling with a blockage. 8022 8023 Linux, on the other hand, never bothered to implement OSF/1's 8024 exception handling, and so doesn't care about such things. Anyone 8025 planning to use dwarf2 frame-unwind info can also omit the blockage. */ 8026 8027 if (! TARGET_CAN_FAULT_IN_PROLOGUE) 8028 emit_insn (gen_blockage ()); 8029} 8030 8031/* Count the number of .file directives, so that .loc is up to date. */ 8032int num_source_filenames = 0; 8033 8034/* Output the textual info surrounding the prologue. */ 8035 8036void 8037alpha_start_function (FILE *file, const char *fnname, 8038 tree decl ATTRIBUTE_UNUSED) 8039{ 8040 unsigned long imask = 0; 8041 unsigned long fmask = 0; 8042 /* Stack space needed for pushing registers clobbered by us. */ 8043 HOST_WIDE_INT sa_size; 8044 /* Complete stack size needed. */ 8045 unsigned HOST_WIDE_INT frame_size; 8046 /* The maximum debuggable frame size. */ 8047 unsigned HOST_WIDE_INT max_frame_size = 1UL << 31; 8048 /* Offset from base reg to register save area. */ 8049 HOST_WIDE_INT reg_offset; 8050 char *entry_label = (char *) alloca (strlen (fnname) + 6); 8051 char *tramp_label = (char *) alloca (strlen (fnname) + 6); 8052 int i; 8053 8054#if TARGET_ABI_OPEN_VMS 8055 vms_start_function (fnname); 8056#endif 8057 8058 alpha_fnname = fnname; 8059 sa_size = alpha_sa_size (); 8060 frame_size = compute_frame_size (get_frame_size (), sa_size); 8061 8062 if (TARGET_ABI_OPEN_VMS) 8063 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; 8064 else 8065 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); 8066 8067 alpha_sa_mask (&imask, &fmask); 8068 8069 /* Issue function start and label. */ 8070 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive) 8071 { 8072 fputs ("\t.ent ", file); 8073 assemble_name (file, fnname); 8074 putc ('\n', file); 8075 8076 /* If the function needs GP, we'll write the "..ng" label there. 8077 Otherwise, do it here. */ 8078 if (TARGET_ABI_OSF 8079 && ! alpha_function_needs_gp 8080 && ! cfun->is_thunk) 8081 { 8082 putc ('$', file); 8083 assemble_name (file, fnname); 8084 fputs ("..ng:\n", file); 8085 } 8086 } 8087 /* Nested functions on VMS that are potentially called via trampoline 8088 get a special transfer entry point that loads the called functions 8089 procedure descriptor and static chain. */ 8090 if (TARGET_ABI_OPEN_VMS 8091 && !TREE_PUBLIC (decl) 8092 && DECL_CONTEXT (decl) 8093 && !TYPE_P (DECL_CONTEXT (decl)) 8094 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL) 8095 { 8096 strcpy (tramp_label, fnname); 8097 strcat (tramp_label, "..tr"); 8098 ASM_OUTPUT_LABEL (file, tramp_label); 8099 fprintf (file, "\tldq $1,24($27)\n"); 8100 fprintf (file, "\tldq $27,16($27)\n"); 8101 } 8102 8103 strcpy (entry_label, fnname); 8104 if (TARGET_ABI_OPEN_VMS) 8105 strcat (entry_label, "..en"); 8106 8107 ASM_OUTPUT_LABEL (file, entry_label); 8108 inside_function = TRUE; 8109 8110 if (TARGET_ABI_OPEN_VMS) 8111 fprintf (file, "\t.base $%d\n", vms_base_regno); 8112 8113 if (TARGET_ABI_OSF 8114 && TARGET_IEEE_CONFORMANT 8115 && !flag_inhibit_size_directive) 8116 { 8117 /* Set flags in procedure descriptor to request IEEE-conformant 8118 math-library routines. The value we set it to is PDSC_EXC_IEEE 8119 (/usr/include/pdsc.h). */ 8120 fputs ("\t.eflag 48\n", file); 8121 } 8122 8123 /* Set up offsets to alpha virtual arg/local debugging pointer. */ 8124 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size; 8125 alpha_arg_offset = -frame_size + 48; 8126 8127 /* Describe our frame. If the frame size is larger than an integer, 8128 print it as zero to avoid an assembler error. We won't be 8129 properly describing such a frame, but that's the best we can do. */ 8130 if (TARGET_ABI_OPEN_VMS) 8131 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26," 8132 HOST_WIDE_INT_PRINT_DEC "\n", 8133 vms_unwind_regno, 8134 frame_size >= (1UL << 31) ? 0 : frame_size, 8135 reg_offset); 8136 else if (!flag_inhibit_size_directive) 8137 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n", 8138 (frame_pointer_needed 8139 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM), 8140 frame_size >= max_frame_size ? 0 : frame_size, 8141 crtl->args.pretend_args_size); 8142 8143 /* Describe which registers were spilled. */ 8144 if (TARGET_ABI_OPEN_VMS) 8145 { 8146 if (imask) 8147 /* ??? Does VMS care if mask contains ra? The old code didn't 8148 set it, so I don't here. */ 8149 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA)); 8150 if (fmask) 8151 fprintf (file, "\t.fmask 0x%lx,0\n", fmask); 8152 if (alpha_procedure_type == PT_REGISTER) 8153 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno); 8154 } 8155 else if (!flag_inhibit_size_directive) 8156 { 8157 if (imask) 8158 { 8159 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask, 8160 frame_size >= max_frame_size ? 0 : reg_offset - frame_size); 8161 8162 for (i = 0; i < 32; ++i) 8163 if (imask & (1UL << i)) 8164 reg_offset += 8; 8165 } 8166 8167 if (fmask) 8168 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask, 8169 frame_size >= max_frame_size ? 0 : reg_offset - frame_size); 8170 } 8171 8172#if TARGET_ABI_OPEN_VMS 8173 /* If a user condition handler has been installed at some point, emit 8174 the procedure descriptor bits to point the Condition Handling Facility 8175 at the indirection wrapper, and state the fp offset at which the user 8176 handler may be found. */ 8177 if (cfun->machine->uses_condition_handler) 8178 { 8179 fprintf (file, "\t.handler __gcc_shell_handler\n"); 8180 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET); 8181 } 8182 8183#ifdef TARGET_VMS_CRASH_DEBUG 8184 /* Support of minimal traceback info. */ 8185 switch_to_section (readonly_data_section); 8186 fprintf (file, "\t.align 3\n"); 8187 assemble_name (file, fnname); fputs ("..na:\n", file); 8188 fputs ("\t.ascii \"", file); 8189 assemble_name (file, fnname); 8190 fputs ("\\0\"\n", file); 8191 switch_to_section (text_section); 8192#endif 8193#endif /* TARGET_ABI_OPEN_VMS */ 8194} 8195 8196/* Emit the .prologue note at the scheduled end of the prologue. */ 8197 8198static void 8199alpha_output_function_end_prologue (FILE *file) 8200{ 8201 if (TARGET_ABI_OPEN_VMS) 8202 fputs ("\t.prologue\n", file); 8203 else if (!flag_inhibit_size_directive) 8204 fprintf (file, "\t.prologue %d\n", 8205 alpha_function_needs_gp || cfun->is_thunk); 8206} 8207 8208/* Write function epilogue. */ 8209 8210void 8211alpha_expand_epilogue (void) 8212{ 8213 /* Registers to save. */ 8214 unsigned long imask = 0; 8215 unsigned long fmask = 0; 8216 /* Stack space needed for pushing registers clobbered by us. */ 8217 HOST_WIDE_INT sa_size; 8218 /* Complete stack size needed. */ 8219 HOST_WIDE_INT frame_size; 8220 /* Offset from base reg to register save area. */ 8221 HOST_WIDE_INT reg_offset; 8222 int fp_is_frame_pointer, fp_offset; 8223 rtx sa_reg, sa_reg_exp = NULL; 8224 rtx sp_adj1, sp_adj2, mem, reg, insn; 8225 rtx eh_ofs; 8226 rtx cfa_restores = NULL_RTX; 8227 int i; 8228 8229 sa_size = alpha_sa_size (); 8230 frame_size = compute_frame_size (get_frame_size (), sa_size); 8231 8232 if (TARGET_ABI_OPEN_VMS) 8233 { 8234 if (alpha_procedure_type == PT_STACK) 8235 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; 8236 else 8237 reg_offset = 0; 8238 } 8239 else 8240 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); 8241 8242 alpha_sa_mask (&imask, &fmask); 8243 8244 fp_is_frame_pointer 8245 = (TARGET_ABI_OPEN_VMS 8246 ? alpha_procedure_type == PT_STACK 8247 : frame_pointer_needed); 8248 fp_offset = 0; 8249 sa_reg = stack_pointer_rtx; 8250 8251 if (crtl->calls_eh_return) 8252 eh_ofs = EH_RETURN_STACKADJ_RTX; 8253 else 8254 eh_ofs = NULL_RTX; 8255 8256 if (sa_size) 8257 { 8258 /* If we have a frame pointer, restore SP from it. */ 8259 if (TARGET_ABI_OPEN_VMS 8260 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM 8261 : frame_pointer_needed) 8262 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); 8263 8264 /* Cope with very large offsets to the register save area. */ 8265 if (reg_offset + sa_size > 0x8000) 8266 { 8267 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; 8268 HOST_WIDE_INT bias; 8269 8270 if (low + sa_size <= 0x8000) 8271 bias = reg_offset - low, reg_offset = low; 8272 else 8273 bias = reg_offset, reg_offset = 0; 8274 8275 sa_reg = gen_rtx_REG (DImode, 22); 8276 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias); 8277 8278 emit_move_insn (sa_reg, sa_reg_exp); 8279 } 8280 8281 /* Restore registers in order, excepting a true frame pointer. */ 8282 8283 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset)); 8284 reg = gen_rtx_REG (DImode, REG_RA); 8285 emit_move_insn (reg, mem); 8286 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); 8287 8288 reg_offset += 8; 8289 imask &= ~(1UL << REG_RA); 8290 8291 for (i = 0; i < 31; ++i) 8292 if (imask & (1UL << i)) 8293 { 8294 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer) 8295 fp_offset = reg_offset; 8296 else 8297 { 8298 mem = gen_frame_mem (DImode, 8299 plus_constant (Pmode, sa_reg, 8300 reg_offset)); 8301 reg = gen_rtx_REG (DImode, i); 8302 emit_move_insn (reg, mem); 8303 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, 8304 cfa_restores); 8305 } 8306 reg_offset += 8; 8307 } 8308 8309 for (i = 0; i < 31; ++i) 8310 if (fmask & (1UL << i)) 8311 { 8312 mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg, 8313 reg_offset)); 8314 reg = gen_rtx_REG (DFmode, i+32); 8315 emit_move_insn (reg, mem); 8316 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); 8317 reg_offset += 8; 8318 } 8319 } 8320 8321 if (frame_size || eh_ofs) 8322 { 8323 sp_adj1 = stack_pointer_rtx; 8324 8325 if (eh_ofs) 8326 { 8327 sp_adj1 = gen_rtx_REG (DImode, 23); 8328 emit_move_insn (sp_adj1, 8329 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs)); 8330 } 8331 8332 /* If the stack size is large, begin computation into a temporary 8333 register so as not to interfere with a potential fp restore, 8334 which must be consecutive with an SP restore. */ 8335 if (frame_size < 32768 && !cfun->calls_alloca) 8336 sp_adj2 = GEN_INT (frame_size); 8337 else if (frame_size < 0x40007fffL) 8338 { 8339 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; 8340 8341 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low); 8342 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2)) 8343 sp_adj1 = sa_reg; 8344 else 8345 { 8346 sp_adj1 = gen_rtx_REG (DImode, 23); 8347 emit_move_insn (sp_adj1, sp_adj2); 8348 } 8349 sp_adj2 = GEN_INT (low); 8350 } 8351 else 8352 { 8353 rtx tmp = gen_rtx_REG (DImode, 23); 8354 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false); 8355 if (!sp_adj2) 8356 { 8357 /* We can't drop new things to memory this late, afaik, 8358 so build it up by pieces. */ 8359 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size, 8360 -(frame_size < 0)); 8361 gcc_assert (sp_adj2); 8362 } 8363 } 8364 8365 /* From now on, things must be in order. So emit blockages. */ 8366 8367 /* Restore the frame pointer. */ 8368 if (fp_is_frame_pointer) 8369 { 8370 emit_insn (gen_blockage ()); 8371 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, 8372 fp_offset)); 8373 emit_move_insn (hard_frame_pointer_rtx, mem); 8374 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, 8375 hard_frame_pointer_rtx, cfa_restores); 8376 } 8377 else if (TARGET_ABI_OPEN_VMS) 8378 { 8379 emit_insn (gen_blockage ()); 8380 emit_move_insn (hard_frame_pointer_rtx, 8381 gen_rtx_REG (DImode, vms_save_fp_regno)); 8382 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, 8383 hard_frame_pointer_rtx, cfa_restores); 8384 } 8385 8386 /* Restore the stack pointer. */ 8387 emit_insn (gen_blockage ()); 8388 if (sp_adj2 == const0_rtx) 8389 insn = emit_move_insn (stack_pointer_rtx, sp_adj1); 8390 else 8391 insn = emit_move_insn (stack_pointer_rtx, 8392 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2)); 8393 REG_NOTES (insn) = cfa_restores; 8394 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx); 8395 RTX_FRAME_RELATED_P (insn) = 1; 8396 } 8397 else 8398 { 8399 gcc_assert (cfa_restores == NULL); 8400 8401 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER) 8402 { 8403 emit_insn (gen_blockage ()); 8404 insn = emit_move_insn (hard_frame_pointer_rtx, 8405 gen_rtx_REG (DImode, vms_save_fp_regno)); 8406 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); 8407 RTX_FRAME_RELATED_P (insn) = 1; 8408 } 8409 } 8410} 8411 8412/* Output the rest of the textual info surrounding the epilogue. */ 8413 8414void 8415alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED) 8416{ 8417 rtx_insn *insn; 8418 8419 /* We output a nop after noreturn calls at the very end of the function to 8420 ensure that the return address always remains in the caller's code range, 8421 as not doing so might confuse unwinding engines. */ 8422 insn = get_last_insn (); 8423 if (!INSN_P (insn)) 8424 insn = prev_active_insn (insn); 8425 if (insn && CALL_P (insn)) 8426 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL); 8427 8428#if TARGET_ABI_OPEN_VMS 8429 /* Write the linkage entries. */ 8430 alpha_write_linkage (file, fnname); 8431#endif 8432 8433 /* End the function. */ 8434 if (TARGET_ABI_OPEN_VMS 8435 || !flag_inhibit_size_directive) 8436 { 8437 fputs ("\t.end ", file); 8438 assemble_name (file, fnname); 8439 putc ('\n', file); 8440 } 8441 inside_function = FALSE; 8442} 8443 8444#if TARGET_ABI_OSF 8445/* Emit a tail call to FUNCTION after adjusting THIS by DELTA. 8446 8447 In order to avoid the hordes of differences between generated code 8448 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating 8449 lots of code loading up large constants, generate rtl and emit it 8450 instead of going straight to text. 8451 8452 Not sure why this idea hasn't been explored before... */ 8453 8454static void 8455alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 8456 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 8457 tree function) 8458{ 8459 HOST_WIDE_INT hi, lo; 8460 rtx this_rtx, funexp; 8461 rtx_insn *insn; 8462 8463 /* We always require a valid GP. */ 8464 emit_insn (gen_prologue_ldgp ()); 8465 emit_note (NOTE_INSN_PROLOGUE_END); 8466 8467 /* Find the "this" pointer. If the function returns a structure, 8468 the structure return pointer is in $16. */ 8469 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 8470 this_rtx = gen_rtx_REG (Pmode, 17); 8471 else 8472 this_rtx = gen_rtx_REG (Pmode, 16); 8473 8474 /* Add DELTA. When possible we use ldah+lda. Otherwise load the 8475 entire constant for the add. */ 8476 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000; 8477 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; 8478 if (hi + lo == delta) 8479 { 8480 if (hi) 8481 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi))); 8482 if (lo) 8483 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo))); 8484 } 8485 else 8486 { 8487 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), 8488 delta, -(delta < 0)); 8489 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); 8490 } 8491 8492 /* Add a delta stored in the vtable at VCALL_OFFSET. */ 8493 if (vcall_offset) 8494 { 8495 rtx tmp, tmp2; 8496 8497 tmp = gen_rtx_REG (Pmode, 0); 8498 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); 8499 8500 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000; 8501 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; 8502 if (hi + lo == vcall_offset) 8503 { 8504 if (hi) 8505 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi))); 8506 } 8507 else 8508 { 8509 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1), 8510 vcall_offset, -(vcall_offset < 0)); 8511 emit_insn (gen_adddi3 (tmp, tmp, tmp2)); 8512 lo = 0; 8513 } 8514 if (lo) 8515 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo)); 8516 else 8517 tmp2 = tmp; 8518 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2)); 8519 8520 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); 8521 } 8522 8523 /* Generate a tail call to the target function. */ 8524 if (! TREE_USED (function)) 8525 { 8526 assemble_external (function); 8527 TREE_USED (function) = 1; 8528 } 8529 funexp = XEXP (DECL_RTL (function), 0); 8530 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 8531 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx)); 8532 SIBLING_CALL_P (insn) = 1; 8533 8534 /* Run just enough of rest_of_compilation to get the insns emitted. 8535 There's not really enough bulk here to make other passes such as 8536 instruction scheduling worth while. Note that use_thunk calls 8537 assemble_start_function and assemble_end_function. */ 8538 insn = get_insns (); 8539 shorten_branches (insn); 8540 final_start_function (insn, file, 1); 8541 final (insn, file, 1); 8542 final_end_function (); 8543} 8544#endif /* TARGET_ABI_OSF */ 8545 8546/* Debugging support. */ 8547 8548#include "gstab.h" 8549 8550/* Name of the file containing the current function. */ 8551 8552static const char *current_function_file = ""; 8553 8554/* Offsets to alpha virtual arg/local debugging pointers. */ 8555 8556long alpha_arg_offset; 8557long alpha_auto_offset; 8558 8559/* Emit a new filename to a stream. */ 8560 8561void 8562alpha_output_filename (FILE *stream, const char *name) 8563{ 8564 static int first_time = TRUE; 8565 8566 if (first_time) 8567 { 8568 first_time = FALSE; 8569 ++num_source_filenames; 8570 current_function_file = name; 8571 fprintf (stream, "\t.file\t%d ", num_source_filenames); 8572 output_quoted_string (stream, name); 8573 fprintf (stream, "\n"); 8574 } 8575 8576 else if (name != current_function_file 8577 && strcmp (name, current_function_file) != 0) 8578 { 8579 ++num_source_filenames; 8580 current_function_file = name; 8581 fprintf (stream, "\t.file\t%d ", num_source_filenames); 8582 8583 output_quoted_string (stream, name); 8584 fprintf (stream, "\n"); 8585 } 8586} 8587 8588/* Structure to show the current status of registers and memory. */ 8589 8590struct shadow_summary 8591{ 8592 struct { 8593 unsigned int i : 31; /* Mask of int regs */ 8594 unsigned int fp : 31; /* Mask of fp regs */ 8595 unsigned int mem : 1; /* mem == imem | fpmem */ 8596 } used, defd; 8597}; 8598 8599/* Summary the effects of expression X on the machine. Update SUM, a pointer 8600 to the summary structure. SET is nonzero if the insn is setting the 8601 object, otherwise zero. */ 8602 8603static void 8604summarize_insn (rtx x, struct shadow_summary *sum, int set) 8605{ 8606 const char *format_ptr; 8607 int i, j; 8608 8609 if (x == 0) 8610 return; 8611 8612 switch (GET_CODE (x)) 8613 { 8614 /* ??? Note that this case would be incorrect if the Alpha had a 8615 ZERO_EXTRACT in SET_DEST. */ 8616 case SET: 8617 summarize_insn (SET_SRC (x), sum, 0); 8618 summarize_insn (SET_DEST (x), sum, 1); 8619 break; 8620 8621 case CLOBBER: 8622 summarize_insn (XEXP (x, 0), sum, 1); 8623 break; 8624 8625 case USE: 8626 summarize_insn (XEXP (x, 0), sum, 0); 8627 break; 8628 8629 case ASM_OPERANDS: 8630 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--) 8631 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0); 8632 break; 8633 8634 case PARALLEL: 8635 for (i = XVECLEN (x, 0) - 1; i >= 0; i--) 8636 summarize_insn (XVECEXP (x, 0, i), sum, 0); 8637 break; 8638 8639 case SUBREG: 8640 summarize_insn (SUBREG_REG (x), sum, 0); 8641 break; 8642 8643 case REG: 8644 { 8645 int regno = REGNO (x); 8646 unsigned long mask = ((unsigned long) 1) << (regno % 32); 8647 8648 if (regno == 31 || regno == 63) 8649 break; 8650 8651 if (set) 8652 { 8653 if (regno < 32) 8654 sum->defd.i |= mask; 8655 else 8656 sum->defd.fp |= mask; 8657 } 8658 else 8659 { 8660 if (regno < 32) 8661 sum->used.i |= mask; 8662 else 8663 sum->used.fp |= mask; 8664 } 8665 } 8666 break; 8667 8668 case MEM: 8669 if (set) 8670 sum->defd.mem = 1; 8671 else 8672 sum->used.mem = 1; 8673 8674 /* Find the regs used in memory address computation: */ 8675 summarize_insn (XEXP (x, 0), sum, 0); 8676 break; 8677 8678 case CONST_INT: case CONST_DOUBLE: 8679 case SYMBOL_REF: case LABEL_REF: case CONST: 8680 case SCRATCH: case ASM_INPUT: 8681 break; 8682 8683 /* Handle common unary and binary ops for efficiency. */ 8684 case COMPARE: case PLUS: case MINUS: case MULT: case DIV: 8685 case MOD: case UDIV: case UMOD: case AND: case IOR: 8686 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: 8687 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: 8688 case NE: case EQ: case GE: case GT: case LE: 8689 case LT: case GEU: case GTU: case LEU: case LTU: 8690 summarize_insn (XEXP (x, 0), sum, 0); 8691 summarize_insn (XEXP (x, 1), sum, 0); 8692 break; 8693 8694 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: 8695 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: 8696 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: 8697 case SQRT: case FFS: 8698 summarize_insn (XEXP (x, 0), sum, 0); 8699 break; 8700 8701 default: 8702 format_ptr = GET_RTX_FORMAT (GET_CODE (x)); 8703 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 8704 switch (format_ptr[i]) 8705 { 8706 case 'e': 8707 summarize_insn (XEXP (x, i), sum, 0); 8708 break; 8709 8710 case 'E': 8711 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 8712 summarize_insn (XVECEXP (x, i, j), sum, 0); 8713 break; 8714 8715 case 'i': 8716 break; 8717 8718 default: 8719 gcc_unreachable (); 8720 } 8721 } 8722} 8723 8724/* Ensure a sufficient number of `trapb' insns are in the code when 8725 the user requests code with a trap precision of functions or 8726 instructions. 8727 8728 In naive mode, when the user requests a trap-precision of 8729 "instruction", a trapb is needed after every instruction that may 8730 generate a trap. This ensures that the code is resumption safe but 8731 it is also slow. 8732 8733 When optimizations are turned on, we delay issuing a trapb as long 8734 as possible. In this context, a trap shadow is the sequence of 8735 instructions that starts with a (potentially) trap generating 8736 instruction and extends to the next trapb or call_pal instruction 8737 (but GCC never generates call_pal by itself). We can delay (and 8738 therefore sometimes omit) a trapb subject to the following 8739 conditions: 8740 8741 (a) On entry to the trap shadow, if any Alpha register or memory 8742 location contains a value that is used as an operand value by some 8743 instruction in the trap shadow (live on entry), then no instruction 8744 in the trap shadow may modify the register or memory location. 8745 8746 (b) Within the trap shadow, the computation of the base register 8747 for a memory load or store instruction may not involve using the 8748 result of an instruction that might generate an UNPREDICTABLE 8749 result. 8750 8751 (c) Within the trap shadow, no register may be used more than once 8752 as a destination register. (This is to make life easier for the 8753 trap-handler.) 8754 8755 (d) The trap shadow may not include any branch instructions. */ 8756 8757static void 8758alpha_handle_trap_shadows (void) 8759{ 8760 struct shadow_summary shadow; 8761 int trap_pending, exception_nesting; 8762 rtx_insn *i, *n; 8763 8764 trap_pending = 0; 8765 exception_nesting = 0; 8766 shadow.used.i = 0; 8767 shadow.used.fp = 0; 8768 shadow.used.mem = 0; 8769 shadow.defd = shadow.used; 8770 8771 for (i = get_insns (); i ; i = NEXT_INSN (i)) 8772 { 8773 if (NOTE_P (i)) 8774 { 8775 switch (NOTE_KIND (i)) 8776 { 8777 case NOTE_INSN_EH_REGION_BEG: 8778 exception_nesting++; 8779 if (trap_pending) 8780 goto close_shadow; 8781 break; 8782 8783 case NOTE_INSN_EH_REGION_END: 8784 exception_nesting--; 8785 if (trap_pending) 8786 goto close_shadow; 8787 break; 8788 8789 case NOTE_INSN_EPILOGUE_BEG: 8790 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC) 8791 goto close_shadow; 8792 break; 8793 } 8794 } 8795 else if (trap_pending) 8796 { 8797 if (alpha_tp == ALPHA_TP_FUNC) 8798 { 8799 if (JUMP_P (i) 8800 && GET_CODE (PATTERN (i)) == RETURN) 8801 goto close_shadow; 8802 } 8803 else if (alpha_tp == ALPHA_TP_INSN) 8804 { 8805 if (optimize > 0) 8806 { 8807 struct shadow_summary sum; 8808 8809 sum.used.i = 0; 8810 sum.used.fp = 0; 8811 sum.used.mem = 0; 8812 sum.defd = sum.used; 8813 8814 switch (GET_CODE (i)) 8815 { 8816 case INSN: 8817 /* Annoyingly, get_attr_trap will die on these. */ 8818 if (GET_CODE (PATTERN (i)) == USE 8819 || GET_CODE (PATTERN (i)) == CLOBBER) 8820 break; 8821 8822 summarize_insn (PATTERN (i), &sum, 0); 8823 8824 if ((sum.defd.i & shadow.defd.i) 8825 || (sum.defd.fp & shadow.defd.fp)) 8826 { 8827 /* (c) would be violated */ 8828 goto close_shadow; 8829 } 8830 8831 /* Combine shadow with summary of current insn: */ 8832 shadow.used.i |= sum.used.i; 8833 shadow.used.fp |= sum.used.fp; 8834 shadow.used.mem |= sum.used.mem; 8835 shadow.defd.i |= sum.defd.i; 8836 shadow.defd.fp |= sum.defd.fp; 8837 shadow.defd.mem |= sum.defd.mem; 8838 8839 if ((sum.defd.i & shadow.used.i) 8840 || (sum.defd.fp & shadow.used.fp) 8841 || (sum.defd.mem & shadow.used.mem)) 8842 { 8843 /* (a) would be violated (also takes care of (b)) */ 8844 gcc_assert (get_attr_trap (i) != TRAP_YES 8845 || (!(sum.defd.i & sum.used.i) 8846 && !(sum.defd.fp & sum.used.fp))); 8847 8848 goto close_shadow; 8849 } 8850 break; 8851 8852 case BARRIER: 8853 /* __builtin_unreachable can expand to no code at all, 8854 leaving (barrier) RTXes in the instruction stream. */ 8855 goto close_shadow_notrapb; 8856 8857 case JUMP_INSN: 8858 case CALL_INSN: 8859 case CODE_LABEL: 8860 goto close_shadow; 8861 8862 default: 8863 gcc_unreachable (); 8864 } 8865 } 8866 else 8867 { 8868 close_shadow: 8869 n = emit_insn_before (gen_trapb (), i); 8870 PUT_MODE (n, TImode); 8871 PUT_MODE (i, TImode); 8872 close_shadow_notrapb: 8873 trap_pending = 0; 8874 shadow.used.i = 0; 8875 shadow.used.fp = 0; 8876 shadow.used.mem = 0; 8877 shadow.defd = shadow.used; 8878 } 8879 } 8880 } 8881 8882 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC) 8883 && NONJUMP_INSN_P (i) 8884 && GET_CODE (PATTERN (i)) != USE 8885 && GET_CODE (PATTERN (i)) != CLOBBER 8886 && get_attr_trap (i) == TRAP_YES) 8887 { 8888 if (optimize && !trap_pending) 8889 summarize_insn (PATTERN (i), &shadow, 0); 8890 trap_pending = 1; 8891 } 8892 } 8893} 8894 8895/* Alpha can only issue instruction groups simultaneously if they are 8896 suitably aligned. This is very processor-specific. */ 8897/* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe 8898 that are marked "fake". These instructions do not exist on that target, 8899 but it is possible to see these insns with deranged combinations of 8900 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting, 8901 choose a result at random. */ 8902 8903enum alphaev4_pipe { 8904 EV4_STOP = 0, 8905 EV4_IB0 = 1, 8906 EV4_IB1 = 2, 8907 EV4_IBX = 4 8908}; 8909 8910enum alphaev5_pipe { 8911 EV5_STOP = 0, 8912 EV5_NONE = 1, 8913 EV5_E01 = 2, 8914 EV5_E0 = 4, 8915 EV5_E1 = 8, 8916 EV5_FAM = 16, 8917 EV5_FA = 32, 8918 EV5_FM = 64 8919}; 8920 8921static enum alphaev4_pipe 8922alphaev4_insn_pipe (rtx_insn *insn) 8923{ 8924 if (recog_memoized (insn) < 0) 8925 return EV4_STOP; 8926 if (get_attr_length (insn) != 4) 8927 return EV4_STOP; 8928 8929 switch (get_attr_type (insn)) 8930 { 8931 case TYPE_ILD: 8932 case TYPE_LDSYM: 8933 case TYPE_FLD: 8934 case TYPE_LD_L: 8935 return EV4_IBX; 8936 8937 case TYPE_IADD: 8938 case TYPE_ILOG: 8939 case TYPE_ICMOV: 8940 case TYPE_ICMP: 8941 case TYPE_FST: 8942 case TYPE_SHIFT: 8943 case TYPE_IMUL: 8944 case TYPE_FBR: 8945 case TYPE_MVI: /* fake */ 8946 return EV4_IB0; 8947 8948 case TYPE_IST: 8949 case TYPE_MISC: 8950 case TYPE_IBR: 8951 case TYPE_JSR: 8952 case TYPE_CALLPAL: 8953 case TYPE_FCPYS: 8954 case TYPE_FCMOV: 8955 case TYPE_FADD: 8956 case TYPE_FDIV: 8957 case TYPE_FMUL: 8958 case TYPE_ST_C: 8959 case TYPE_MB: 8960 case TYPE_FSQRT: /* fake */ 8961 case TYPE_FTOI: /* fake */ 8962 case TYPE_ITOF: /* fake */ 8963 return EV4_IB1; 8964 8965 default: 8966 gcc_unreachable (); 8967 } 8968} 8969 8970static enum alphaev5_pipe 8971alphaev5_insn_pipe (rtx_insn *insn) 8972{ 8973 if (recog_memoized (insn) < 0) 8974 return EV5_STOP; 8975 if (get_attr_length (insn) != 4) 8976 return EV5_STOP; 8977 8978 switch (get_attr_type (insn)) 8979 { 8980 case TYPE_ILD: 8981 case TYPE_FLD: 8982 case TYPE_LDSYM: 8983 case TYPE_IADD: 8984 case TYPE_ILOG: 8985 case TYPE_ICMOV: 8986 case TYPE_ICMP: 8987 return EV5_E01; 8988 8989 case TYPE_IST: 8990 case TYPE_FST: 8991 case TYPE_SHIFT: 8992 case TYPE_IMUL: 8993 case TYPE_MISC: 8994 case TYPE_MVI: 8995 case TYPE_LD_L: 8996 case TYPE_ST_C: 8997 case TYPE_MB: 8998 case TYPE_FTOI: /* fake */ 8999 case TYPE_ITOF: /* fake */ 9000 return EV5_E0; 9001 9002 case TYPE_IBR: 9003 case TYPE_JSR: 9004 case TYPE_CALLPAL: 9005 return EV5_E1; 9006 9007 case TYPE_FCPYS: 9008 return EV5_FAM; 9009 9010 case TYPE_FBR: 9011 case TYPE_FCMOV: 9012 case TYPE_FADD: 9013 case TYPE_FDIV: 9014 case TYPE_FSQRT: /* fake */ 9015 return EV5_FA; 9016 9017 case TYPE_FMUL: 9018 return EV5_FM; 9019 9020 default: 9021 gcc_unreachable (); 9022 } 9023} 9024 9025/* IN_USE is a mask of the slots currently filled within the insn group. 9026 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then 9027 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1. 9028 9029 LEN is, of course, the length of the group in bytes. */ 9030 9031static rtx_insn * 9032alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen) 9033{ 9034 int len, in_use; 9035 9036 len = in_use = 0; 9037 9038 if (! INSN_P (insn) 9039 || GET_CODE (PATTERN (insn)) == CLOBBER 9040 || GET_CODE (PATTERN (insn)) == USE) 9041 goto next_and_done; 9042 9043 while (1) 9044 { 9045 enum alphaev4_pipe pipe; 9046 9047 pipe = alphaev4_insn_pipe (insn); 9048 switch (pipe) 9049 { 9050 case EV4_STOP: 9051 /* Force complex instructions to start new groups. */ 9052 if (in_use) 9053 goto done; 9054 9055 /* If this is a completely unrecognized insn, it's an asm. 9056 We don't know how long it is, so record length as -1 to 9057 signal a needed realignment. */ 9058 if (recog_memoized (insn) < 0) 9059 len = -1; 9060 else 9061 len = get_attr_length (insn); 9062 goto next_and_done; 9063 9064 case EV4_IBX: 9065 if (in_use & EV4_IB0) 9066 { 9067 if (in_use & EV4_IB1) 9068 goto done; 9069 in_use |= EV4_IB1; 9070 } 9071 else 9072 in_use |= EV4_IB0 | EV4_IBX; 9073 break; 9074 9075 case EV4_IB0: 9076 if (in_use & EV4_IB0) 9077 { 9078 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1)) 9079 goto done; 9080 in_use |= EV4_IB1; 9081 } 9082 in_use |= EV4_IB0; 9083 break; 9084 9085 case EV4_IB1: 9086 if (in_use & EV4_IB1) 9087 goto done; 9088 in_use |= EV4_IB1; 9089 break; 9090 9091 default: 9092 gcc_unreachable (); 9093 } 9094 len += 4; 9095 9096 /* Haifa doesn't do well scheduling branches. */ 9097 if (JUMP_P (insn)) 9098 goto next_and_done; 9099 9100 next: 9101 insn = next_nonnote_insn (insn); 9102 9103 if (!insn || ! INSN_P (insn)) 9104 goto done; 9105 9106 /* Let Haifa tell us where it thinks insn group boundaries are. */ 9107 if (GET_MODE (insn) == TImode) 9108 goto done; 9109 9110 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE) 9111 goto next; 9112 } 9113 9114 next_and_done: 9115 insn = next_nonnote_insn (insn); 9116 9117 done: 9118 *plen = len; 9119 *pin_use = in_use; 9120 return insn; 9121} 9122 9123/* IN_USE is a mask of the slots currently filled within the insn group. 9124 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then 9125 the insn in EV5_E0 can be swapped by the hardware into EV5_E1. 9126 9127 LEN is, of course, the length of the group in bytes. */ 9128 9129static rtx_insn * 9130alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen) 9131{ 9132 int len, in_use; 9133 9134 len = in_use = 0; 9135 9136 if (! INSN_P (insn) 9137 || GET_CODE (PATTERN (insn)) == CLOBBER 9138 || GET_CODE (PATTERN (insn)) == USE) 9139 goto next_and_done; 9140 9141 while (1) 9142 { 9143 enum alphaev5_pipe pipe; 9144 9145 pipe = alphaev5_insn_pipe (insn); 9146 switch (pipe) 9147 { 9148 case EV5_STOP: 9149 /* Force complex instructions to start new groups. */ 9150 if (in_use) 9151 goto done; 9152 9153 /* If this is a completely unrecognized insn, it's an asm. 9154 We don't know how long it is, so record length as -1 to 9155 signal a needed realignment. */ 9156 if (recog_memoized (insn) < 0) 9157 len = -1; 9158 else 9159 len = get_attr_length (insn); 9160 goto next_and_done; 9161 9162 /* ??? Most of the places below, we would like to assert never 9163 happen, as it would indicate an error either in Haifa, or 9164 in the scheduling description. Unfortunately, Haifa never 9165 schedules the last instruction of the BB, so we don't have 9166 an accurate TI bit to go off. */ 9167 case EV5_E01: 9168 if (in_use & EV5_E0) 9169 { 9170 if (in_use & EV5_E1) 9171 goto done; 9172 in_use |= EV5_E1; 9173 } 9174 else 9175 in_use |= EV5_E0 | EV5_E01; 9176 break; 9177 9178 case EV5_E0: 9179 if (in_use & EV5_E0) 9180 { 9181 if (!(in_use & EV5_E01) || (in_use & EV5_E1)) 9182 goto done; 9183 in_use |= EV5_E1; 9184 } 9185 in_use |= EV5_E0; 9186 break; 9187 9188 case EV5_E1: 9189 if (in_use & EV5_E1) 9190 goto done; 9191 in_use |= EV5_E1; 9192 break; 9193 9194 case EV5_FAM: 9195 if (in_use & EV5_FA) 9196 { 9197 if (in_use & EV5_FM) 9198 goto done; 9199 in_use |= EV5_FM; 9200 } 9201 else 9202 in_use |= EV5_FA | EV5_FAM; 9203 break; 9204 9205 case EV5_FA: 9206 if (in_use & EV5_FA) 9207 goto done; 9208 in_use |= EV5_FA; 9209 break; 9210 9211 case EV5_FM: 9212 if (in_use & EV5_FM) 9213 goto done; 9214 in_use |= EV5_FM; 9215 break; 9216 9217 case EV5_NONE: 9218 break; 9219 9220 default: 9221 gcc_unreachable (); 9222 } 9223 len += 4; 9224 9225 /* Haifa doesn't do well scheduling branches. */ 9226 /* ??? If this is predicted not-taken, slotting continues, except 9227 that no more IBR, FBR, or JSR insns may be slotted. */ 9228 if (JUMP_P (insn)) 9229 goto next_and_done; 9230 9231 next: 9232 insn = next_nonnote_insn (insn); 9233 9234 if (!insn || ! INSN_P (insn)) 9235 goto done; 9236 9237 /* Let Haifa tell us where it thinks insn group boundaries are. */ 9238 if (GET_MODE (insn) == TImode) 9239 goto done; 9240 9241 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE) 9242 goto next; 9243 } 9244 9245 next_and_done: 9246 insn = next_nonnote_insn (insn); 9247 9248 done: 9249 *plen = len; 9250 *pin_use = in_use; 9251 return insn; 9252} 9253 9254static rtx 9255alphaev4_next_nop (int *pin_use) 9256{ 9257 int in_use = *pin_use; 9258 rtx nop; 9259 9260 if (!(in_use & EV4_IB0)) 9261 { 9262 in_use |= EV4_IB0; 9263 nop = gen_nop (); 9264 } 9265 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX) 9266 { 9267 in_use |= EV4_IB1; 9268 nop = gen_nop (); 9269 } 9270 else if (TARGET_FP && !(in_use & EV4_IB1)) 9271 { 9272 in_use |= EV4_IB1; 9273 nop = gen_fnop (); 9274 } 9275 else 9276 nop = gen_unop (); 9277 9278 *pin_use = in_use; 9279 return nop; 9280} 9281 9282static rtx 9283alphaev5_next_nop (int *pin_use) 9284{ 9285 int in_use = *pin_use; 9286 rtx nop; 9287 9288 if (!(in_use & EV5_E1)) 9289 { 9290 in_use |= EV5_E1; 9291 nop = gen_nop (); 9292 } 9293 else if (TARGET_FP && !(in_use & EV5_FA)) 9294 { 9295 in_use |= EV5_FA; 9296 nop = gen_fnop (); 9297 } 9298 else if (TARGET_FP && !(in_use & EV5_FM)) 9299 { 9300 in_use |= EV5_FM; 9301 nop = gen_fnop (); 9302 } 9303 else 9304 nop = gen_unop (); 9305 9306 *pin_use = in_use; 9307 return nop; 9308} 9309 9310/* The instruction group alignment main loop. */ 9311 9312static void 9313alpha_align_insns_1 (unsigned int max_align, 9314 rtx_insn *(*next_group) (rtx_insn *, int *, int *), 9315 rtx (*next_nop) (int *)) 9316{ 9317 /* ALIGN is the known alignment for the insn group. */ 9318 unsigned int align; 9319 /* OFS is the offset of the current insn in the insn group. */ 9320 int ofs; 9321 int prev_in_use, in_use, len, ldgp; 9322 rtx_insn *i, *next; 9323 9324 /* Let shorten branches care for assigning alignments to code labels. */ 9325 shorten_branches (get_insns ()); 9326 9327 if (align_functions < 4) 9328 align = 4; 9329 else if ((unsigned int) align_functions < max_align) 9330 align = align_functions; 9331 else 9332 align = max_align; 9333 9334 ofs = prev_in_use = 0; 9335 i = get_insns (); 9336 if (NOTE_P (i)) 9337 i = next_nonnote_insn (i); 9338 9339 ldgp = alpha_function_needs_gp ? 8 : 0; 9340 9341 while (i) 9342 { 9343 next = (*next_group) (i, &in_use, &len); 9344 9345 /* When we see a label, resync alignment etc. */ 9346 if (LABEL_P (i)) 9347 { 9348 unsigned int new_align = 1 << label_to_alignment (i); 9349 9350 if (new_align >= align) 9351 { 9352 align = new_align < max_align ? new_align : max_align; 9353 ofs = 0; 9354 } 9355 9356 else if (ofs & (new_align-1)) 9357 ofs = (ofs | (new_align-1)) + 1; 9358 gcc_assert (!len); 9359 } 9360 9361 /* Handle complex instructions special. */ 9362 else if (in_use == 0) 9363 { 9364 /* Asms will have length < 0. This is a signal that we have 9365 lost alignment knowledge. Assume, however, that the asm 9366 will not mis-align instructions. */ 9367 if (len < 0) 9368 { 9369 ofs = 0; 9370 align = 4; 9371 len = 0; 9372 } 9373 } 9374 9375 /* If the known alignment is smaller than the recognized insn group, 9376 realign the output. */ 9377 else if ((int) align < len) 9378 { 9379 unsigned int new_log_align = len > 8 ? 4 : 3; 9380 rtx_insn *prev, *where; 9381 9382 where = prev = prev_nonnote_insn (i); 9383 if (!where || !LABEL_P (where)) 9384 where = i; 9385 9386 /* Can't realign between a call and its gp reload. */ 9387 if (! (TARGET_EXPLICIT_RELOCS 9388 && prev && CALL_P (prev))) 9389 { 9390 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where); 9391 align = 1 << new_log_align; 9392 ofs = 0; 9393 } 9394 } 9395 9396 /* We may not insert padding inside the initial ldgp sequence. */ 9397 else if (ldgp > 0) 9398 ldgp -= len; 9399 9400 /* If the group won't fit in the same INT16 as the previous, 9401 we need to add padding to keep the group together. Rather 9402 than simply leaving the insn filling to the assembler, we 9403 can make use of the knowledge of what sorts of instructions 9404 were issued in the previous group to make sure that all of 9405 the added nops are really free. */ 9406 else if (ofs + len > (int) align) 9407 { 9408 int nop_count = (align - ofs) / 4; 9409 rtx_insn *where; 9410 9411 /* Insert nops before labels, branches, and calls to truly merge 9412 the execution of the nops with the previous instruction group. */ 9413 where = prev_nonnote_insn (i); 9414 if (where) 9415 { 9416 if (LABEL_P (where)) 9417 { 9418 rtx_insn *where2 = prev_nonnote_insn (where); 9419 if (where2 && JUMP_P (where2)) 9420 where = where2; 9421 } 9422 else if (NONJUMP_INSN_P (where)) 9423 where = i; 9424 } 9425 else 9426 where = i; 9427 9428 do 9429 emit_insn_before ((*next_nop)(&prev_in_use), where); 9430 while (--nop_count); 9431 ofs = 0; 9432 } 9433 9434 ofs = (ofs + len) & (align - 1); 9435 prev_in_use = in_use; 9436 i = next; 9437 } 9438} 9439 9440static void 9441alpha_align_insns (void) 9442{ 9443 if (alpha_tune == PROCESSOR_EV4) 9444 alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop); 9445 else if (alpha_tune == PROCESSOR_EV5) 9446 alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop); 9447 else 9448 gcc_unreachable (); 9449} 9450 9451/* Insert an unop between sibcall or noreturn function call and GP load. */ 9452 9453static void 9454alpha_pad_function_end (void) 9455{ 9456 rtx_insn *insn, *next; 9457 9458 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 9459 { 9460 if (!CALL_P (insn) 9461 || !(SIBLING_CALL_P (insn) 9462 || find_reg_note (insn, REG_NORETURN, NULL_RTX))) 9463 continue; 9464 9465 /* Make sure we do not split a call and its corresponding 9466 CALL_ARG_LOCATION note. */ 9467 next = NEXT_INSN (insn); 9468 if (next == NULL) 9469 continue; 9470 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION) 9471 insn = next; 9472 9473 next = next_active_insn (insn); 9474 if (next) 9475 { 9476 rtx pat = PATTERN (next); 9477 9478 if (GET_CODE (pat) == SET 9479 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE 9480 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1) 9481 emit_insn_after (gen_unop (), insn); 9482 } 9483 } 9484} 9485 9486/* Machine dependent reorg pass. */ 9487 9488static void 9489alpha_reorg (void) 9490{ 9491 /* Workaround for a linker error that triggers when an exception 9492 handler immediatelly follows a sibcall or a noreturn function. 9493 9494In the sibcall case: 9495 9496 The instruction stream from an object file: 9497 9498 1d8: 00 00 fb 6b jmp (t12) 9499 1dc: 00 00 ba 27 ldah gp,0(ra) 9500 1e0: 00 00 bd 23 lda gp,0(gp) 9501 1e4: 00 00 7d a7 ldq t12,0(gp) 9502 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec> 9503 9504 was converted in the final link pass to: 9505 9506 12003aa88: 67 fa ff c3 br 120039428 <...> 9507 12003aa8c: 00 00 fe 2f unop 9508 12003aa90: 00 00 fe 2f unop 9509 12003aa94: 48 83 7d a7 ldq t12,-31928(gp) 9510 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec> 9511 9512And in the noreturn case: 9513 9514 The instruction stream from an object file: 9515 9516 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58> 9517 58: 00 00 ba 27 ldah gp,0(ra) 9518 5c: 00 00 bd 23 lda gp,0(gp) 9519 60: 00 00 7d a7 ldq t12,0(gp) 9520 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68> 9521 9522 was converted in the final link pass to: 9523 9524 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8> 9525 fdb28: 00 00 fe 2f unop 9526 fdb2c: 00 00 fe 2f unop 9527 fdb30: 30 82 7d a7 ldq t12,-32208(gp) 9528 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68> 9529 9530 GP load instructions were wrongly cleared by the linker relaxation 9531 pass. This workaround prevents removal of GP loads by inserting 9532 an unop instruction between a sibcall or noreturn function call and 9533 exception handler prologue. */ 9534 9535 if (current_function_has_exception_handlers ()) 9536 alpha_pad_function_end (); 9537} 9538 9539static void 9540alpha_file_start (void) 9541{ 9542 default_file_start (); 9543 9544 fputs ("\t.set noreorder\n", asm_out_file); 9545 fputs ("\t.set volatile\n", asm_out_file); 9546 if (TARGET_ABI_OSF) 9547 fputs ("\t.set noat\n", asm_out_file); 9548 if (TARGET_EXPLICIT_RELOCS) 9549 fputs ("\t.set nomacro\n", asm_out_file); 9550 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX) 9551 { 9552 const char *arch; 9553 9554 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX) 9555 arch = "ev6"; 9556 else if (TARGET_MAX) 9557 arch = "pca56"; 9558 else if (TARGET_BWX) 9559 arch = "ev56"; 9560 else if (alpha_cpu == PROCESSOR_EV5) 9561 arch = "ev5"; 9562 else 9563 arch = "ev4"; 9564 9565 fprintf (asm_out_file, "\t.arch %s\n", arch); 9566 } 9567} 9568 9569/* Since we don't have a .dynbss section, we should not allow global 9570 relocations in the .rodata section. */ 9571 9572static int 9573alpha_elf_reloc_rw_mask (void) 9574{ 9575 return flag_pic ? 3 : 2; 9576} 9577 9578/* Return a section for X. The only special thing we do here is to 9579 honor small data. */ 9580 9581static section * 9582alpha_elf_select_rtx_section (machine_mode mode, rtx x, 9583 unsigned HOST_WIDE_INT align) 9584{ 9585 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value) 9586 /* ??? Consider using mergeable sdata sections. */ 9587 return sdata_section; 9588 else 9589 return default_elf_select_rtx_section (mode, x, align); 9590} 9591 9592static unsigned int 9593alpha_elf_section_type_flags (tree decl, const char *name, int reloc) 9594{ 9595 unsigned int flags = 0; 9596 9597 if (strcmp (name, ".sdata") == 0 9598 || strncmp (name, ".sdata.", 7) == 0 9599 || strncmp (name, ".gnu.linkonce.s.", 16) == 0 9600 || strcmp (name, ".sbss") == 0 9601 || strncmp (name, ".sbss.", 6) == 0 9602 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0) 9603 flags = SECTION_SMALL; 9604 9605 flags |= default_section_type_flags (decl, name, reloc); 9606 return flags; 9607} 9608 9609/* Structure to collect function names for final output in link section. */ 9610/* Note that items marked with GTY can't be ifdef'ed out. */ 9611 9612enum reloc_kind 9613{ 9614 KIND_LINKAGE, 9615 KIND_CODEADDR 9616}; 9617 9618struct GTY(()) alpha_links 9619{ 9620 rtx func; 9621 rtx linkage; 9622 enum reloc_kind rkind; 9623}; 9624 9625#if TARGET_ABI_OPEN_VMS 9626 9627/* Return the VMS argument type corresponding to MODE. */ 9628 9629enum avms_arg_type 9630alpha_arg_type (machine_mode mode) 9631{ 9632 switch (mode) 9633 { 9634 case SFmode: 9635 return TARGET_FLOAT_VAX ? FF : FS; 9636 case DFmode: 9637 return TARGET_FLOAT_VAX ? FD : FT; 9638 default: 9639 return I64; 9640 } 9641} 9642 9643/* Return an rtx for an integer representing the VMS Argument Information 9644 register value. */ 9645 9646rtx 9647alpha_arg_info_reg_val (CUMULATIVE_ARGS cum) 9648{ 9649 unsigned HOST_WIDE_INT regval = cum.num_args; 9650 int i; 9651 9652 for (i = 0; i < 6; i++) 9653 regval |= ((int) cum.atypes[i]) << (i * 3 + 8); 9654 9655 return GEN_INT (regval); 9656} 9657 9658 9659/* Return a SYMBOL_REF representing the reference to the .linkage entry 9660 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if 9661 this is the reference to the linkage pointer value, 0 if this is the 9662 reference to the function entry value. RFLAG is 1 if this a reduced 9663 reference (code address only), 0 if this is a full reference. */ 9664 9665rtx 9666alpha_use_linkage (rtx func, bool lflag, bool rflag) 9667{ 9668 struct alpha_links *al = NULL; 9669 const char *name = XSTR (func, 0); 9670 9671 if (cfun->machine->links) 9672 { 9673 /* Is this name already defined? */ 9674 alpha_links **slot = cfun->machine->links->get (name); 9675 if (slot) 9676 al = *slot; 9677 } 9678 else 9679 cfun->machine->links 9680 = hash_map<const char *, alpha_links *, string_traits>::create_ggc (64); 9681 9682 if (al == NULL) 9683 { 9684 size_t buf_len; 9685 char *linksym; 9686 tree id; 9687 9688 if (name[0] == '*') 9689 name++; 9690 9691 /* Follow transparent alias, as this is used for CRTL translations. */ 9692 id = maybe_get_identifier (name); 9693 if (id) 9694 { 9695 while (IDENTIFIER_TRANSPARENT_ALIAS (id)) 9696 id = TREE_CHAIN (id); 9697 name = IDENTIFIER_POINTER (id); 9698 } 9699 9700 buf_len = strlen (name) + 8 + 9; 9701 linksym = (char *) alloca (buf_len); 9702 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name); 9703 9704 al = ggc_alloc<alpha_links> (); 9705 al->func = func; 9706 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym)); 9707 9708 cfun->machine->links->put (ggc_strdup (name), al); 9709 } 9710 9711 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE; 9712 9713 if (lflag) 9714 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8)); 9715 else 9716 return al->linkage; 9717} 9718 9719static int 9720alpha_write_one_linkage (const char *name, alpha_links *link, FILE *stream) 9721{ 9722 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0)); 9723 if (link->rkind == KIND_CODEADDR) 9724 { 9725 /* External and used, request code address. */ 9726 fprintf (stream, "\t.code_address "); 9727 } 9728 else 9729 { 9730 if (!SYMBOL_REF_EXTERNAL_P (link->func) 9731 && SYMBOL_REF_LOCAL_P (link->func)) 9732 { 9733 /* Locally defined, build linkage pair. */ 9734 fprintf (stream, "\t.quad %s..en\n", name); 9735 fprintf (stream, "\t.quad "); 9736 } 9737 else 9738 { 9739 /* External, request linkage pair. */ 9740 fprintf (stream, "\t.linkage "); 9741 } 9742 } 9743 assemble_name (stream, name); 9744 fputs ("\n", stream); 9745 9746 return 0; 9747} 9748 9749static void 9750alpha_write_linkage (FILE *stream, const char *funname) 9751{ 9752 fprintf (stream, "\t.link\n"); 9753 fprintf (stream, "\t.align 3\n"); 9754 in_section = NULL; 9755 9756#ifdef TARGET_VMS_CRASH_DEBUG 9757 fputs ("\t.name ", stream); 9758 assemble_name (stream, funname); 9759 fputs ("..na\n", stream); 9760#endif 9761 9762 ASM_OUTPUT_LABEL (stream, funname); 9763 fprintf (stream, "\t.pdesc "); 9764 assemble_name (stream, funname); 9765 fprintf (stream, "..en,%s\n", 9766 alpha_procedure_type == PT_STACK ? "stack" 9767 : alpha_procedure_type == PT_REGISTER ? "reg" : "null"); 9768 9769 if (cfun->machine->links) 9770 { 9771 hash_map<const char *, alpha_links *, string_traits>::iterator iter 9772 = cfun->machine->links->begin (); 9773 for (; iter != cfun->machine->links->end (); ++iter) 9774 alpha_write_one_linkage ((*iter).first, (*iter).second, stream); 9775 } 9776} 9777 9778/* Switch to an arbitrary section NAME with attributes as specified 9779 by FLAGS. ALIGN specifies any known alignment requirements for 9780 the section; 0 if the default should be used. */ 9781 9782static void 9783vms_asm_named_section (const char *name, unsigned int flags, 9784 tree decl ATTRIBUTE_UNUSED) 9785{ 9786 fputc ('\n', asm_out_file); 9787 fprintf (asm_out_file, ".section\t%s", name); 9788 9789 if (flags & SECTION_DEBUG) 9790 fprintf (asm_out_file, ",NOWRT"); 9791 9792 fputc ('\n', asm_out_file); 9793} 9794 9795/* Record an element in the table of global constructors. SYMBOL is 9796 a SYMBOL_REF of the function to be called; PRIORITY is a number 9797 between 0 and MAX_INIT_PRIORITY. 9798 9799 Differs from default_ctors_section_asm_out_constructor in that the 9800 width of the .ctors entry is always 64 bits, rather than the 32 bits 9801 used by a normal pointer. */ 9802 9803static void 9804vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED) 9805{ 9806 switch_to_section (ctors_section); 9807 assemble_align (BITS_PER_WORD); 9808 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1); 9809} 9810 9811static void 9812vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED) 9813{ 9814 switch_to_section (dtors_section); 9815 assemble_align (BITS_PER_WORD); 9816 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1); 9817} 9818#else 9819rtx 9820alpha_use_linkage (rtx func ATTRIBUTE_UNUSED, 9821 bool lflag ATTRIBUTE_UNUSED, 9822 bool rflag ATTRIBUTE_UNUSED) 9823{ 9824 return NULL_RTX; 9825} 9826 9827#endif /* TARGET_ABI_OPEN_VMS */ 9828 9829static void 9830alpha_init_libfuncs (void) 9831{ 9832 if (TARGET_ABI_OPEN_VMS) 9833 { 9834 /* Use the VMS runtime library functions for division and 9835 remainder. */ 9836 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I"); 9837 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L"); 9838 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI"); 9839 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL"); 9840 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I"); 9841 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L"); 9842 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI"); 9843 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL"); 9844 abort_libfunc = init_one_libfunc ("decc$abort"); 9845 memcmp_libfunc = init_one_libfunc ("decc$memcmp"); 9846#ifdef MEM_LIBFUNCS_INIT 9847 MEM_LIBFUNCS_INIT; 9848#endif 9849 } 9850} 9851 9852/* On the Alpha, we use this to disable the floating-point registers 9853 when they don't exist. */ 9854 9855static void 9856alpha_conditional_register_usage (void) 9857{ 9858 int i; 9859 if (! TARGET_FPREGS) 9860 for (i = 32; i < 63; i++) 9861 fixed_regs[i] = call_used_regs[i] = 1; 9862} 9863 9864/* Canonicalize a comparison from one we don't have to one we do have. */ 9865 9866static void 9867alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1, 9868 bool op0_preserve_value) 9869{ 9870 if (!op0_preserve_value 9871 && (*code == GE || *code == GT || *code == GEU || *code == GTU) 9872 && (REG_P (*op1) || *op1 == const0_rtx)) 9873 { 9874 rtx tem = *op0; 9875 *op0 = *op1; 9876 *op1 = tem; 9877 *code = (int)swap_condition ((enum rtx_code)*code); 9878 } 9879 9880 if ((*code == LT || *code == LTU) 9881 && CONST_INT_P (*op1) && INTVAL (*op1) == 256) 9882 { 9883 *code = *code == LT ? LE : LEU; 9884 *op1 = GEN_INT (255); 9885 } 9886} 9887 9888/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ 9889 9890static void 9891alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 9892{ 9893 const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17); 9894 9895 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv; 9896 tree new_fenv_var, reload_fenv, restore_fnenv; 9897 tree update_call, atomic_feraiseexcept, hold_fnclex; 9898 9899 /* Assume OSF/1 compatible interfaces. */ 9900 if (!TARGET_ABI_OSF) 9901 return; 9902 9903 /* Generate the equivalent of : 9904 unsigned long fenv_var; 9905 fenv_var = __ieee_get_fp_control (); 9906 9907 unsigned long masked_fenv; 9908 masked_fenv = fenv_var & mask; 9909 9910 __ieee_set_fp_control (masked_fenv); */ 9911 9912 fenv_var = create_tmp_var (long_unsigned_type_node); 9913 get_fpscr 9914 = build_fn_decl ("__ieee_get_fp_control", 9915 build_function_type_list (long_unsigned_type_node, NULL)); 9916 set_fpscr 9917 = build_fn_decl ("__ieee_set_fp_control", 9918 build_function_type_list (void_type_node, NULL)); 9919 mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK); 9920 ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, 9921 fenv_var, build_call_expr (get_fpscr, 0)); 9922 masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask); 9923 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv); 9924 *hold = build2 (COMPOUND_EXPR, void_type_node, 9925 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), 9926 hold_fnclex); 9927 9928 /* Store the value of masked_fenv to clear the exceptions: 9929 __ieee_set_fp_control (masked_fenv); */ 9930 9931 *clear = build_call_expr (set_fpscr, 1, masked_fenv); 9932 9933 /* Generate the equivalent of : 9934 unsigned long new_fenv_var; 9935 new_fenv_var = __ieee_get_fp_control (); 9936 9937 __ieee_set_fp_control (fenv_var); 9938 9939 __atomic_feraiseexcept (new_fenv_var); */ 9940 9941 new_fenv_var = create_tmp_var (long_unsigned_type_node); 9942 reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var, 9943 build_call_expr (get_fpscr, 0)); 9944 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var); 9945 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 9946 update_call 9947 = build_call_expr (atomic_feraiseexcept, 1, 9948 fold_convert (integer_type_node, new_fenv_var)); 9949 *update = build2 (COMPOUND_EXPR, void_type_node, 9950 build2 (COMPOUND_EXPR, void_type_node, 9951 reload_fenv, restore_fnenv), update_call); 9952} 9953 9954/* Initialize the GCC target structure. */ 9955#if TARGET_ABI_OPEN_VMS 9956# undef TARGET_ATTRIBUTE_TABLE 9957# define TARGET_ATTRIBUTE_TABLE vms_attribute_table 9958# undef TARGET_CAN_ELIMINATE 9959# define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate 9960#endif 9961 9962#undef TARGET_IN_SMALL_DATA_P 9963#define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p 9964 9965#undef TARGET_ASM_ALIGNED_HI_OP 9966#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" 9967#undef TARGET_ASM_ALIGNED_DI_OP 9968#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 9969 9970/* Default unaligned ops are provided for ELF systems. To get unaligned 9971 data for non-ELF systems, we have to turn off auto alignment. */ 9972#if TARGET_ABI_OPEN_VMS 9973#undef TARGET_ASM_UNALIGNED_HI_OP 9974#define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t" 9975#undef TARGET_ASM_UNALIGNED_SI_OP 9976#define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t" 9977#undef TARGET_ASM_UNALIGNED_DI_OP 9978#define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t" 9979#endif 9980 9981#undef TARGET_ASM_RELOC_RW_MASK 9982#define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask 9983#undef TARGET_ASM_SELECT_RTX_SECTION 9984#define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section 9985#undef TARGET_SECTION_TYPE_FLAGS 9986#define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags 9987 9988#undef TARGET_ASM_FUNCTION_END_PROLOGUE 9989#define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue 9990 9991#undef TARGET_INIT_LIBFUNCS 9992#define TARGET_INIT_LIBFUNCS alpha_init_libfuncs 9993 9994#undef TARGET_LEGITIMIZE_ADDRESS 9995#define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address 9996#undef TARGET_MODE_DEPENDENT_ADDRESS_P 9997#define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p 9998 9999#undef TARGET_ASM_FILE_START 10000#define TARGET_ASM_FILE_START alpha_file_start 10001 10002#undef TARGET_SCHED_ADJUST_COST 10003#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost 10004#undef TARGET_SCHED_ISSUE_RATE 10005#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate 10006#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 10007#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 10008 alpha_multipass_dfa_lookahead 10009 10010#undef TARGET_HAVE_TLS 10011#define TARGET_HAVE_TLS HAVE_AS_TLS 10012 10013#undef TARGET_BUILTIN_DECL 10014#define TARGET_BUILTIN_DECL alpha_builtin_decl 10015#undef TARGET_INIT_BUILTINS 10016#define TARGET_INIT_BUILTINS alpha_init_builtins 10017#undef TARGET_EXPAND_BUILTIN 10018#define TARGET_EXPAND_BUILTIN alpha_expand_builtin 10019#undef TARGET_FOLD_BUILTIN 10020#define TARGET_FOLD_BUILTIN alpha_fold_builtin 10021#undef TARGET_GIMPLE_FOLD_BUILTIN 10022#define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin 10023 10024#undef TARGET_FUNCTION_OK_FOR_SIBCALL 10025#define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall 10026#undef TARGET_CANNOT_COPY_INSN_P 10027#define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p 10028#undef TARGET_LEGITIMATE_CONSTANT_P 10029#define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p 10030#undef TARGET_CANNOT_FORCE_CONST_MEM 10031#define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem 10032 10033#if TARGET_ABI_OSF 10034#undef TARGET_ASM_OUTPUT_MI_THUNK 10035#define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf 10036#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 10037#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 10038#undef TARGET_STDARG_OPTIMIZE_HOOK 10039#define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook 10040#endif 10041 10042/* Use 16-bits anchor. */ 10043#undef TARGET_MIN_ANCHOR_OFFSET 10044#define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1 10045#undef TARGET_MAX_ANCHOR_OFFSET 10046#define TARGET_MAX_ANCHOR_OFFSET 0x7fff 10047#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 10048#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true 10049 10050#undef TARGET_RTX_COSTS 10051#define TARGET_RTX_COSTS alpha_rtx_costs 10052#undef TARGET_ADDRESS_COST 10053#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 10054 10055#undef TARGET_MACHINE_DEPENDENT_REORG 10056#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg 10057 10058#undef TARGET_PROMOTE_FUNCTION_MODE 10059#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote 10060#undef TARGET_PROMOTE_PROTOTYPES 10061#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false 10062#undef TARGET_RETURN_IN_MEMORY 10063#define TARGET_RETURN_IN_MEMORY alpha_return_in_memory 10064#undef TARGET_PASS_BY_REFERENCE 10065#define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference 10066#undef TARGET_SETUP_INCOMING_VARARGS 10067#define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs 10068#undef TARGET_STRICT_ARGUMENT_NAMING 10069#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 10070#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED 10071#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true 10072#undef TARGET_SPLIT_COMPLEX_ARG 10073#define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg 10074#undef TARGET_GIMPLIFY_VA_ARG_EXPR 10075#define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg 10076#undef TARGET_ARG_PARTIAL_BYTES 10077#define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes 10078#undef TARGET_FUNCTION_ARG 10079#define TARGET_FUNCTION_ARG alpha_function_arg 10080#undef TARGET_FUNCTION_ARG_ADVANCE 10081#define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance 10082#undef TARGET_TRAMPOLINE_INIT 10083#define TARGET_TRAMPOLINE_INIT alpha_trampoline_init 10084 10085#undef TARGET_INSTANTIATE_DECLS 10086#define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls 10087 10088#undef TARGET_SECONDARY_RELOAD 10089#define TARGET_SECONDARY_RELOAD alpha_secondary_reload 10090 10091#undef TARGET_SCALAR_MODE_SUPPORTED_P 10092#define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p 10093#undef TARGET_VECTOR_MODE_SUPPORTED_P 10094#define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p 10095 10096#undef TARGET_BUILD_BUILTIN_VA_LIST 10097#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list 10098 10099#undef TARGET_EXPAND_BUILTIN_VA_START 10100#define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start 10101 10102/* The Alpha architecture does not require sequential consistency. See 10103 http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html 10104 for an example of how it can be violated in practice. */ 10105#undef TARGET_RELAXED_ORDERING 10106#define TARGET_RELAXED_ORDERING true 10107 10108#undef TARGET_OPTION_OVERRIDE 10109#define TARGET_OPTION_OVERRIDE alpha_option_override 10110 10111#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 10112#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \ 10113 alpha_override_options_after_change 10114 10115#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 10116#undef TARGET_MANGLE_TYPE 10117#define TARGET_MANGLE_TYPE alpha_mangle_type 10118#endif 10119 10120#undef TARGET_LEGITIMATE_ADDRESS_P 10121#define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p 10122 10123#undef TARGET_CONDITIONAL_REGISTER_USAGE 10124#define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage 10125 10126#undef TARGET_CANONICALIZE_COMPARISON 10127#define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison 10128 10129#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 10130#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv 10131 10132struct gcc_target targetm = TARGET_INITIALIZER; 10133 10134 10135#include "gt-alpha.h" 10136