1/* Subroutines used for code generation on the DEC Alpha. 2 Copyright (C) 1992-2022 Free Software Foundation, Inc. 3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21 22#define IN_TARGET_CODE 1 23 24#include "config.h" 25#include "system.h" 26#include "coretypes.h" 27#include "backend.h" 28#include "target.h" 29#include "rtl.h" 30#include "tree.h" 31#include "stringpool.h" 32#include "attribs.h" 33#include "memmodel.h" 34#include "gimple.h" 35#include "df.h" 36#include "predict.h" 37#include "tm_p.h" 38#include "ssa.h" 39#include "expmed.h" 40#include "optabs.h" 41#include "regs.h" 42#include "emit-rtl.h" 43#include "recog.h" 44#include "diagnostic-core.h" 45#include "alias.h" 46#include "fold-const.h" 47#include "stor-layout.h" 48#include "calls.h" 49#include "varasm.h" 50#include "output.h" 51#include "insn-attr.h" 52#include "explow.h" 53#include "expr.h" 54#include "reload.h" 55#include "except.h" 56#include "common/common-target.h" 57#include "debug.h" 58#include "langhooks.h" 59#include "cfgrtl.h" 60#include "tree-pass.h" 61#include "context.h" 62#include "gimple-iterator.h" 63#include "gimplify.h" 64#include "tree-stdarg.h" 65#include "tm-constrs.h" 66#include "libfuncs.h" 67#include "builtins.h" 68#include "rtl-iter.h" 69#include "flags.h" 70#include "opts.h" 71 72/* This file should be included last. */ 73#include "target-def.h" 74 75/* Specify which cpu to schedule for. */ 76enum processor_type alpha_tune; 77 78/* Which cpu we're generating code for. */ 79enum processor_type alpha_cpu; 80 81static const char * const alpha_cpu_name[] = 82{ 83 "ev4", "ev5", "ev6" 84}; 85 86/* Specify how accurate floating-point traps need to be. */ 87 88enum alpha_trap_precision alpha_tp; 89 90/* Specify the floating-point rounding mode. */ 91 92enum alpha_fp_rounding_mode alpha_fprm; 93 94/* Specify which things cause traps. */ 95 96enum alpha_fp_trap_mode alpha_fptm; 97 98/* Nonzero if inside of a function, because the Alpha asm can't 99 handle .files inside of functions. */ 100 101static int inside_function = FALSE; 102 103/* The number of cycles of latency we should assume on memory reads. */ 104 105static int alpha_memory_latency = 3; 106 107/* Whether the function needs the GP. */ 108 109static int alpha_function_needs_gp; 110 111/* The assembler name of the current function. */ 112 113static const char *alpha_fnname; 114 115/* The next explicit relocation sequence number. */ 116extern GTY(()) int alpha_next_sequence_number; 117int alpha_next_sequence_number = 1; 118 119/* The literal and gpdisp sequence numbers for this insn, as printed 120 by %# and %* respectively. */ 121extern GTY(()) int alpha_this_literal_sequence_number; 122extern GTY(()) int alpha_this_gpdisp_sequence_number; 123int alpha_this_literal_sequence_number; 124int alpha_this_gpdisp_sequence_number; 125 126/* Costs of various operations on the different architectures. */ 127 128struct alpha_rtx_cost_data 129{ 130 unsigned char fp_add; 131 unsigned char fp_mult; 132 unsigned char fp_div_sf; 133 unsigned char fp_div_df; 134 unsigned char int_mult_si; 135 unsigned char int_mult_di; 136 unsigned char int_shift; 137 unsigned char int_cmov; 138 unsigned short int_div; 139}; 140 141static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] = 142{ 143 { /* EV4 */ 144 COSTS_N_INSNS (6), /* fp_add */ 145 COSTS_N_INSNS (6), /* fp_mult */ 146 COSTS_N_INSNS (34), /* fp_div_sf */ 147 COSTS_N_INSNS (63), /* fp_div_df */ 148 COSTS_N_INSNS (23), /* int_mult_si */ 149 COSTS_N_INSNS (23), /* int_mult_di */ 150 COSTS_N_INSNS (2), /* int_shift */ 151 COSTS_N_INSNS (2), /* int_cmov */ 152 COSTS_N_INSNS (97), /* int_div */ 153 }, 154 { /* EV5 */ 155 COSTS_N_INSNS (4), /* fp_add */ 156 COSTS_N_INSNS (4), /* fp_mult */ 157 COSTS_N_INSNS (15), /* fp_div_sf */ 158 COSTS_N_INSNS (22), /* fp_div_df */ 159 COSTS_N_INSNS (8), /* int_mult_si */ 160 COSTS_N_INSNS (12), /* int_mult_di */ 161 COSTS_N_INSNS (1) + 1, /* int_shift */ 162 COSTS_N_INSNS (1), /* int_cmov */ 163 COSTS_N_INSNS (83), /* int_div */ 164 }, 165 { /* EV6 */ 166 COSTS_N_INSNS (4), /* fp_add */ 167 COSTS_N_INSNS (4), /* fp_mult */ 168 COSTS_N_INSNS (12), /* fp_div_sf */ 169 COSTS_N_INSNS (15), /* fp_div_df */ 170 COSTS_N_INSNS (7), /* int_mult_si */ 171 COSTS_N_INSNS (7), /* int_mult_di */ 172 COSTS_N_INSNS (1), /* int_shift */ 173 COSTS_N_INSNS (2), /* int_cmov */ 174 COSTS_N_INSNS (86), /* int_div */ 175 }, 176}; 177 178/* Similar but tuned for code size instead of execution latency. The 179 extra +N is fractional cost tuning based on latency. It's used to 180 encourage use of cheaper insns like shift, but only if there's just 181 one of them. */ 182 183static struct alpha_rtx_cost_data const alpha_rtx_cost_size = 184{ 185 COSTS_N_INSNS (1), /* fp_add */ 186 COSTS_N_INSNS (1), /* fp_mult */ 187 COSTS_N_INSNS (1), /* fp_div_sf */ 188 COSTS_N_INSNS (1) + 1, /* fp_div_df */ 189 COSTS_N_INSNS (1) + 1, /* int_mult_si */ 190 COSTS_N_INSNS (1) + 2, /* int_mult_di */ 191 COSTS_N_INSNS (1), /* int_shift */ 192 COSTS_N_INSNS (1), /* int_cmov */ 193 COSTS_N_INSNS (6), /* int_div */ 194}; 195 196/* Get the number of args of a function in one of two ways. */ 197#if TARGET_ABI_OPEN_VMS 198#define NUM_ARGS crtl->args.info.num_args 199#else 200#define NUM_ARGS crtl->args.info 201#endif 202 203#define REG_PV 27 204#define REG_RA 26 205 206/* Declarations of static functions. */ 207static struct machine_function *alpha_init_machine_status (void); 208static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx); 209static void alpha_handle_trap_shadows (void); 210static void alpha_align_insns (void); 211static void alpha_override_options_after_change (void); 212 213#if TARGET_ABI_OPEN_VMS 214static void alpha_write_linkage (FILE *, const char *); 215static bool vms_valid_pointer_mode (scalar_int_mode); 216#else 217#define vms_patch_builtins() gcc_unreachable() 218#endif 219 220static unsigned int 221rest_of_handle_trap_shadows (void) 222{ 223 alpha_handle_trap_shadows (); 224 return 0; 225} 226 227namespace { 228 229const pass_data pass_data_handle_trap_shadows = 230{ 231 RTL_PASS, 232 "trap_shadows", /* name */ 233 OPTGROUP_NONE, /* optinfo_flags */ 234 TV_NONE, /* tv_id */ 235 0, /* properties_required */ 236 0, /* properties_provided */ 237 0, /* properties_destroyed */ 238 0, /* todo_flags_start */ 239 TODO_df_finish, /* todo_flags_finish */ 240}; 241 242class pass_handle_trap_shadows : public rtl_opt_pass 243{ 244public: 245 pass_handle_trap_shadows(gcc::context *ctxt) 246 : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt) 247 {} 248 249 /* opt_pass methods: */ 250 virtual bool gate (function *) 251 { 252 return alpha_tp != ALPHA_TP_PROG || flag_exceptions; 253 } 254 255 virtual unsigned int execute (function *) 256 { 257 return rest_of_handle_trap_shadows (); 258 } 259 260}; // class pass_handle_trap_shadows 261 262} // anon namespace 263 264rtl_opt_pass * 265make_pass_handle_trap_shadows (gcc::context *ctxt) 266{ 267 return new pass_handle_trap_shadows (ctxt); 268} 269 270static unsigned int 271rest_of_align_insns (void) 272{ 273 alpha_align_insns (); 274 return 0; 275} 276 277namespace { 278 279const pass_data pass_data_align_insns = 280{ 281 RTL_PASS, 282 "align_insns", /* name */ 283 OPTGROUP_NONE, /* optinfo_flags */ 284 TV_NONE, /* tv_id */ 285 0, /* properties_required */ 286 0, /* properties_provided */ 287 0, /* properties_destroyed */ 288 0, /* todo_flags_start */ 289 TODO_df_finish, /* todo_flags_finish */ 290}; 291 292class pass_align_insns : public rtl_opt_pass 293{ 294public: 295 pass_align_insns(gcc::context *ctxt) 296 : rtl_opt_pass(pass_data_align_insns, ctxt) 297 {} 298 299 /* opt_pass methods: */ 300 virtual bool gate (function *) 301 { 302 /* Due to the number of extra trapb insns, don't bother fixing up 303 alignment when trap precision is instruction. Moreover, we can 304 only do our job when sched2 is run. */ 305 return ((alpha_tune == PROCESSOR_EV4 306 || alpha_tune == PROCESSOR_EV5) 307 && optimize && !optimize_size 308 && alpha_tp != ALPHA_TP_INSN 309 && flag_schedule_insns_after_reload); 310 } 311 312 virtual unsigned int execute (function *) 313 { 314 return rest_of_align_insns (); 315 } 316 317}; // class pass_align_insns 318 319} // anon namespace 320 321rtl_opt_pass * 322make_pass_align_insns (gcc::context *ctxt) 323{ 324 return new pass_align_insns (ctxt); 325} 326 327#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 328/* Implement TARGET_MANGLE_TYPE. */ 329 330static const char * 331alpha_mangle_type (const_tree type) 332{ 333 if (TYPE_MAIN_VARIANT (type) == long_double_type_node 334 && TARGET_LONG_DOUBLE_128) 335 return "g"; 336 337 /* For all other types, use normal C++ mangling. */ 338 return NULL; 339} 340#endif 341 342/* Parse target option strings. */ 343 344static void 345alpha_option_override (void) 346{ 347 static const struct cpu_table { 348 const char *const name; 349 const enum processor_type processor; 350 const int flags; 351 const unsigned short line_size; /* in bytes */ 352 const unsigned short l1_size; /* in kb. */ 353 const unsigned short l2_size; /* in kb. */ 354 } cpu_table[] = { 355 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches. 356 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45 357 had 64k to 8M 8-byte direct Bcache. */ 358 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 }, 359 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 }, 360 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 }, 361 362 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2, 363 and 1M to 16M 64 byte L3 (not modeled). 364 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache. 365 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */ 366 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 }, 367 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 }, 368 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 }, 369 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 }, 370 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, 371 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, 372 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, 373 374 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */ 375 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 }, 376 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 }, 377 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX, 378 64, 64, 16*1024 }, 379 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX, 380 64, 64, 16*1024 } 381 }; 382 383 int const ct_size = ARRAY_SIZE (cpu_table); 384 int line_size = 0, l1_size = 0, l2_size = 0; 385 int i; 386 387#ifdef SUBTARGET_OVERRIDE_OPTIONS 388 SUBTARGET_OVERRIDE_OPTIONS; 389#endif 390 391 /* Default to full IEEE compliance mode for Go language. */ 392 if (strcmp (lang_hooks.name, "GNU Go") == 0 393 && !(target_flags_explicit & MASK_IEEE)) 394 target_flags |= MASK_IEEE; 395 396 alpha_fprm = ALPHA_FPRM_NORM; 397 alpha_tp = ALPHA_TP_PROG; 398 alpha_fptm = ALPHA_FPTM_N; 399 400 if (TARGET_IEEE) 401 { 402 alpha_tp = ALPHA_TP_INSN; 403 alpha_fptm = ALPHA_FPTM_SU; 404 } 405 if (TARGET_IEEE_WITH_INEXACT) 406 { 407 alpha_tp = ALPHA_TP_INSN; 408 alpha_fptm = ALPHA_FPTM_SUI; 409 } 410 411 if (alpha_tp_string) 412 { 413 if (! strcmp (alpha_tp_string, "p")) 414 alpha_tp = ALPHA_TP_PROG; 415 else if (! strcmp (alpha_tp_string, "f")) 416 alpha_tp = ALPHA_TP_FUNC; 417 else if (! strcmp (alpha_tp_string, "i")) 418 alpha_tp = ALPHA_TP_INSN; 419 else 420 error ("bad value %qs for %<-mtrap-precision%> switch", 421 alpha_tp_string); 422 } 423 424 if (alpha_fprm_string) 425 { 426 if (! strcmp (alpha_fprm_string, "n")) 427 alpha_fprm = ALPHA_FPRM_NORM; 428 else if (! strcmp (alpha_fprm_string, "m")) 429 alpha_fprm = ALPHA_FPRM_MINF; 430 else if (! strcmp (alpha_fprm_string, "c")) 431 alpha_fprm = ALPHA_FPRM_CHOP; 432 else if (! strcmp (alpha_fprm_string,"d")) 433 alpha_fprm = ALPHA_FPRM_DYN; 434 else 435 error ("bad value %qs for %<-mfp-rounding-mode%> switch", 436 alpha_fprm_string); 437 } 438 439 if (alpha_fptm_string) 440 { 441 if (strcmp (alpha_fptm_string, "n") == 0) 442 alpha_fptm = ALPHA_FPTM_N; 443 else if (strcmp (alpha_fptm_string, "u") == 0) 444 alpha_fptm = ALPHA_FPTM_U; 445 else if (strcmp (alpha_fptm_string, "su") == 0) 446 alpha_fptm = ALPHA_FPTM_SU; 447 else if (strcmp (alpha_fptm_string, "sui") == 0) 448 alpha_fptm = ALPHA_FPTM_SUI; 449 else 450 error ("bad value %qs for %<-mfp-trap-mode%> switch", 451 alpha_fptm_string); 452 } 453 454 if (alpha_cpu_string) 455 { 456 for (i = 0; i < ct_size; i++) 457 if (! strcmp (alpha_cpu_string, cpu_table [i].name)) 458 { 459 alpha_tune = alpha_cpu = cpu_table[i].processor; 460 line_size = cpu_table[i].line_size; 461 l1_size = cpu_table[i].l1_size; 462 l2_size = cpu_table[i].l2_size; 463 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX); 464 target_flags |= cpu_table[i].flags; 465 break; 466 } 467 if (i == ct_size) 468 error ("bad value %qs for %<-mcpu%> switch", alpha_cpu_string); 469 } 470 471 if (alpha_tune_string) 472 { 473 for (i = 0; i < ct_size; i++) 474 if (! strcmp (alpha_tune_string, cpu_table [i].name)) 475 { 476 alpha_tune = cpu_table[i].processor; 477 line_size = cpu_table[i].line_size; 478 l1_size = cpu_table[i].l1_size; 479 l2_size = cpu_table[i].l2_size; 480 break; 481 } 482 if (i == ct_size) 483 error ("bad value %qs for %<-mtune%> switch", alpha_tune_string); 484 } 485 486 if (line_size) 487 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 488 param_l1_cache_line_size, line_size); 489 if (l1_size) 490 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 491 param_l1_cache_size, l1_size); 492 if (l2_size) 493 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 494 param_l2_cache_size, l2_size); 495 496 /* Do some sanity checks on the above options. */ 497 498 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI) 499 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6) 500 { 501 warning (0, "fp software completion requires %<-mtrap-precision=i%>"); 502 alpha_tp = ALPHA_TP_INSN; 503 } 504 505 if (alpha_cpu == PROCESSOR_EV6) 506 { 507 /* Except for EV6 pass 1 (not released), we always have precise 508 arithmetic traps. Which means we can do software completion 509 without minding trap shadows. */ 510 alpha_tp = ALPHA_TP_PROG; 511 } 512 513 if (TARGET_FLOAT_VAX) 514 { 515 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN) 516 { 517 warning (0, "rounding mode not supported for VAX floats"); 518 alpha_fprm = ALPHA_FPRM_NORM; 519 } 520 if (alpha_fptm == ALPHA_FPTM_SUI) 521 { 522 warning (0, "trap mode not supported for VAX floats"); 523 alpha_fptm = ALPHA_FPTM_SU; 524 } 525 if (target_flags_explicit & MASK_LONG_DOUBLE_128) 526 warning (0, "128-bit %<long double%> not supported for VAX floats"); 527 target_flags &= ~MASK_LONG_DOUBLE_128; 528 } 529 530 { 531 char *end; 532 int lat; 533 534 if (!alpha_mlat_string) 535 alpha_mlat_string = "L1"; 536 537 if (ISDIGIT ((unsigned char)alpha_mlat_string[0]) 538 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0')) 539 ; 540 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l') 541 && ISDIGIT ((unsigned char)alpha_mlat_string[1]) 542 && alpha_mlat_string[2] == '\0') 543 { 544 static int const cache_latency[][4] = 545 { 546 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */ 547 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */ 548 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */ 549 }; 550 551 lat = alpha_mlat_string[1] - '0'; 552 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1) 553 { 554 warning (0, "L%d cache latency unknown for %s", 555 lat, alpha_cpu_name[alpha_tune]); 556 lat = 3; 557 } 558 else 559 lat = cache_latency[alpha_tune][lat-1]; 560 } 561 else if (! strcmp (alpha_mlat_string, "main")) 562 { 563 /* Most current memories have about 370ns latency. This is 564 a reasonable guess for a fast cpu. */ 565 lat = 150; 566 } 567 else 568 { 569 warning (0, "bad value %qs for %<-mmemory-latency%>", 570 alpha_mlat_string); 571 lat = 3; 572 } 573 574 alpha_memory_latency = lat; 575 } 576 577 /* Default the definition of "small data" to 8 bytes. */ 578 if (!OPTION_SET_P (g_switch_value)) 579 g_switch_value = 8; 580 581 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */ 582 if (flag_pic == 1) 583 target_flags |= MASK_SMALL_DATA; 584 else if (flag_pic == 2) 585 target_flags &= ~MASK_SMALL_DATA; 586 587 alpha_override_options_after_change (); 588 589 /* Register variables and functions with the garbage collector. */ 590 591 /* Set up function hooks. */ 592 init_machine_status = alpha_init_machine_status; 593 594 /* Tell the compiler when we're using VAX floating point. */ 595 if (TARGET_FLOAT_VAX) 596 { 597 REAL_MODE_FORMAT (SFmode) = &vax_f_format; 598 REAL_MODE_FORMAT (DFmode) = &vax_g_format; 599 REAL_MODE_FORMAT (TFmode) = NULL; 600 } 601 602#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 603 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) 604 target_flags |= MASK_LONG_DOUBLE_128; 605#endif 606 607} 608 609/* Implement targetm.override_options_after_change. */ 610 611static void 612alpha_override_options_after_change (void) 613{ 614 /* Align labels and loops for optimal branching. */ 615 /* ??? Kludge these by not doing anything if we don't optimize. */ 616 if (optimize > 0) 617 { 618 if (flag_align_loops && !str_align_loops) 619 str_align_loops = "16"; 620 if (flag_align_jumps && !str_align_jumps) 621 str_align_jumps = "16"; 622 } 623 if (flag_align_functions && !str_align_functions) 624 str_align_functions = "16"; 625} 626 627/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */ 628 629int 630zap_mask (HOST_WIDE_INT value) 631{ 632 int i; 633 634 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; 635 i++, value >>= 8) 636 if ((value & 0xff) != 0 && (value & 0xff) != 0xff) 637 return 0; 638 639 return 1; 640} 641 642/* Return true if OP is valid for a particular TLS relocation. 643 We are already guaranteed that OP is a CONST. */ 644 645int 646tls_symbolic_operand_1 (rtx op, int size, int unspec) 647{ 648 op = XEXP (op, 0); 649 650 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec) 651 return 0; 652 op = XVECEXP (op, 0, 0); 653 654 if (GET_CODE (op) != SYMBOL_REF) 655 return 0; 656 657 switch (SYMBOL_REF_TLS_MODEL (op)) 658 { 659 case TLS_MODEL_LOCAL_DYNAMIC: 660 return unspec == UNSPEC_DTPREL && size == alpha_tls_size; 661 case TLS_MODEL_INITIAL_EXEC: 662 return unspec == UNSPEC_TPREL && size == 64; 663 case TLS_MODEL_LOCAL_EXEC: 664 return unspec == UNSPEC_TPREL && size == alpha_tls_size; 665 default: 666 gcc_unreachable (); 667 } 668} 669 670/* Used by aligned_memory_operand and unaligned_memory_operand to 671 resolve what reload is going to do with OP if it's a register. */ 672 673rtx 674resolve_reload_operand (rtx op) 675{ 676 if (reload_in_progress) 677 { 678 rtx tmp = op; 679 if (SUBREG_P (tmp)) 680 tmp = SUBREG_REG (tmp); 681 if (REG_P (tmp) 682 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER) 683 { 684 op = reg_equiv_memory_loc (REGNO (tmp)); 685 if (op == 0) 686 return 0; 687 } 688 } 689 return op; 690} 691 692/* The scalar modes supported differs from the default check-what-c-supports 693 version in that sometimes TFmode is available even when long double 694 indicates only DFmode. */ 695 696static bool 697alpha_scalar_mode_supported_p (scalar_mode mode) 698{ 699 switch (mode) 700 { 701 case E_QImode: 702 case E_HImode: 703 case E_SImode: 704 case E_DImode: 705 case E_TImode: /* via optabs.cc */ 706 return true; 707 708 case E_SFmode: 709 case E_DFmode: 710 return true; 711 712 case E_TFmode: 713 return TARGET_HAS_XFLOATING_LIBS; 714 715 default: 716 return false; 717 } 718} 719 720/* Alpha implements a couple of integer vector mode operations when 721 TARGET_MAX is enabled. We do not check TARGET_MAX here, however, 722 which allows the vectorizer to operate on e.g. move instructions, 723 or when expand_vector_operations can do something useful. */ 724 725static bool 726alpha_vector_mode_supported_p (machine_mode mode) 727{ 728 return mode == V8QImode || mode == V4HImode || mode == V2SImode; 729} 730 731/* Return the TLS model to use for SYMBOL. */ 732 733static enum tls_model 734tls_symbolic_operand_type (rtx symbol) 735{ 736 enum tls_model model; 737 738 if (GET_CODE (symbol) != SYMBOL_REF) 739 return TLS_MODEL_NONE; 740 model = SYMBOL_REF_TLS_MODEL (symbol); 741 742 /* Local-exec with a 64-bit size is the same code as initial-exec. */ 743 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64) 744 model = TLS_MODEL_INITIAL_EXEC; 745 746 return model; 747} 748 749/* Return true if the function DECL will share the same GP as any 750 function in the current unit of translation. */ 751 752static bool 753decl_has_samegp (const_tree decl) 754{ 755 /* Functions that are not local can be overridden, and thus may 756 not share the same gp. */ 757 if (!(*targetm.binds_local_p) (decl)) 758 return false; 759 760 /* If -msmall-data is in effect, assume that there is only one GP 761 for the module, and so any local symbol has this property. We 762 need explicit relocations to be able to enforce this for symbols 763 not defined in this unit of translation, however. */ 764 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA) 765 return true; 766 767 /* Functions that are not external are defined in this UoT. */ 768 /* ??? Irritatingly, static functions not yet emitted are still 769 marked "external". Apply this to non-static functions only. */ 770 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl); 771} 772 773/* Return true if EXP should be placed in the small data section. */ 774 775static bool 776alpha_in_small_data_p (const_tree exp) 777{ 778 /* We want to merge strings, so we never consider them small data. */ 779 if (TREE_CODE (exp) == STRING_CST) 780 return false; 781 782 /* Functions are never in the small data area. Duh. */ 783 if (TREE_CODE (exp) == FUNCTION_DECL) 784 return false; 785 786 /* COMMON symbols are never small data. */ 787 if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp)) 788 return false; 789 790 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 791 { 792 const char *section = DECL_SECTION_NAME (exp); 793 if (strcmp (section, ".sdata") == 0 794 || strcmp (section, ".sbss") == 0) 795 return true; 796 } 797 else 798 { 799 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 800 801 /* If this is an incomplete type with size 0, then we can't put it 802 in sdata because it might be too big when completed. */ 803 if (size > 0 && size <= g_switch_value) 804 return true; 805 } 806 807 return false; 808} 809 810#if TARGET_ABI_OPEN_VMS 811static bool 812vms_valid_pointer_mode (scalar_int_mode mode) 813{ 814 return (mode == SImode || mode == DImode); 815} 816 817static bool 818alpha_linkage_symbol_p (const char *symname) 819{ 820 int symlen = strlen (symname); 821 822 if (symlen > 4) 823 return strcmp (&symname [symlen - 4], "..lk") == 0; 824 825 return false; 826} 827 828#define LINKAGE_SYMBOL_REF_P(X) \ 829 ((GET_CODE (X) == SYMBOL_REF \ 830 && alpha_linkage_symbol_p (XSTR (X, 0))) \ 831 || (GET_CODE (X) == CONST \ 832 && GET_CODE (XEXP (X, 0)) == PLUS \ 833 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \ 834 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0)))) 835#endif 836 837/* legitimate_address_p recognizes an RTL expression that is a valid 838 memory address for an instruction. The MODE argument is the 839 machine mode for the MEM expression that wants to use this address. 840 841 For Alpha, we have either a constant address or the sum of a 842 register and a constant address, or just a register. For DImode, 843 any of those forms can be surrounded with an AND that clear the 844 low-order three bits; this is an "unaligned" access. */ 845 846static bool 847alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict) 848{ 849 /* If this is an ldq_u type address, discard the outer AND. */ 850 if (mode == DImode 851 && GET_CODE (x) == AND 852 && CONST_INT_P (XEXP (x, 1)) 853 && INTVAL (XEXP (x, 1)) == -8) 854 x = XEXP (x, 0); 855 856 /* Discard non-paradoxical subregs. */ 857 if (SUBREG_P (x) 858 && (GET_MODE_SIZE (GET_MODE (x)) 859 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) 860 x = SUBREG_REG (x); 861 862 /* Unadorned general registers are valid. */ 863 if (REG_P (x) 864 && (strict 865 ? STRICT_REG_OK_FOR_BASE_P (x) 866 : NONSTRICT_REG_OK_FOR_BASE_P (x))) 867 return true; 868 869 /* Constant addresses (i.e. +/- 32k) are valid. */ 870 if (CONSTANT_ADDRESS_P (x)) 871 return true; 872 873#if TARGET_ABI_OPEN_VMS 874 if (LINKAGE_SYMBOL_REF_P (x)) 875 return true; 876#endif 877 878 /* Register plus a small constant offset is valid. */ 879 if (GET_CODE (x) == PLUS) 880 { 881 rtx ofs = XEXP (x, 1); 882 x = XEXP (x, 0); 883 884 /* Discard non-paradoxical subregs. */ 885 if (SUBREG_P (x) 886 && (GET_MODE_SIZE (GET_MODE (x)) 887 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) 888 x = SUBREG_REG (x); 889 890 if (REG_P (x)) 891 { 892 if (! strict 893 && NONSTRICT_REG_OK_FP_BASE_P (x) 894 && CONST_INT_P (ofs)) 895 return true; 896 if ((strict 897 ? STRICT_REG_OK_FOR_BASE_P (x) 898 : NONSTRICT_REG_OK_FOR_BASE_P (x)) 899 && CONSTANT_ADDRESS_P (ofs)) 900 return true; 901 } 902 } 903 904 /* If we're managing explicit relocations, LO_SUM is valid, as are small 905 data symbols. Avoid explicit relocations of modes larger than word 906 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */ 907 else if (TARGET_EXPLICIT_RELOCS 908 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) 909 { 910 if (small_symbolic_operand (x, Pmode)) 911 return true; 912 913 if (GET_CODE (x) == LO_SUM) 914 { 915 rtx ofs = XEXP (x, 1); 916 x = XEXP (x, 0); 917 918 /* Discard non-paradoxical subregs. */ 919 if (SUBREG_P (x) 920 && (GET_MODE_SIZE (GET_MODE (x)) 921 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) 922 x = SUBREG_REG (x); 923 924 /* Must have a valid base register. */ 925 if (! (REG_P (x) 926 && (strict 927 ? STRICT_REG_OK_FOR_BASE_P (x) 928 : NONSTRICT_REG_OK_FOR_BASE_P (x)))) 929 return false; 930 931 /* The symbol must be local. */ 932 if (local_symbolic_operand (ofs, Pmode) 933 || dtp32_symbolic_operand (ofs, Pmode) 934 || tp32_symbolic_operand (ofs, Pmode)) 935 return true; 936 } 937 } 938 939 return false; 940} 941 942/* Build the SYMBOL_REF for __tls_get_addr. */ 943 944static GTY(()) rtx tls_get_addr_libfunc; 945 946static rtx 947get_tls_get_addr (void) 948{ 949 if (!tls_get_addr_libfunc) 950 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); 951 return tls_get_addr_libfunc; 952} 953 954/* Try machine-dependent ways of modifying an illegitimate address 955 to be legitimate. If we find one, return the new, valid address. */ 956 957static rtx 958alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode) 959{ 960 HOST_WIDE_INT addend; 961 962 /* If the address is (plus reg const_int) and the CONST_INT is not a 963 valid offset, compute the high part of the constant and add it to 964 the register. Then our address is (plus temp low-part-const). */ 965 if (GET_CODE (x) == PLUS 966 && REG_P (XEXP (x, 0)) 967 && CONST_INT_P (XEXP (x, 1)) 968 && ! CONSTANT_ADDRESS_P (XEXP (x, 1))) 969 { 970 addend = INTVAL (XEXP (x, 1)); 971 x = XEXP (x, 0); 972 goto split_addend; 973 } 974 975 /* If the address is (const (plus FOO const_int)), find the low-order 976 part of the CONST_INT. Then load FOO plus any high-order part of the 977 CONST_INT into a register. Our address is (plus reg low-part-const). 978 This is done to reduce the number of GOT entries. */ 979 if (can_create_pseudo_p () 980 && GET_CODE (x) == CONST 981 && GET_CODE (XEXP (x, 0)) == PLUS 982 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 983 { 984 addend = INTVAL (XEXP (XEXP (x, 0), 1)); 985 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0)); 986 goto split_addend; 987 } 988 989 /* If we have a (plus reg const), emit the load as in (2), then add 990 the two registers, and finally generate (plus reg low-part-const) as 991 our address. */ 992 if (can_create_pseudo_p () 993 && GET_CODE (x) == PLUS 994 && REG_P (XEXP (x, 0)) 995 && GET_CODE (XEXP (x, 1)) == CONST 996 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS 997 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1))) 998 { 999 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1)); 1000 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0), 1001 XEXP (XEXP (XEXP (x, 1), 0), 0), 1002 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1003 goto split_addend; 1004 } 1005 1006 /* If this is a local symbol, split the address into HIGH/LO_SUM parts. 1007 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold 1008 around +/- 32k offset. */ 1009 if (TARGET_EXPLICIT_RELOCS 1010 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD 1011 && symbolic_operand (x, Pmode)) 1012 { 1013 rtx r0, r16, eqv, tga, tp, dest, seq; 1014 rtx_insn *insn; 1015 1016 switch (tls_symbolic_operand_type (x)) 1017 { 1018 case TLS_MODEL_NONE: 1019 break; 1020 1021 case TLS_MODEL_GLOBAL_DYNAMIC: 1022 { 1023 start_sequence (); 1024 1025 r0 = gen_rtx_REG (Pmode, 0); 1026 r16 = gen_rtx_REG (Pmode, 16); 1027 tga = get_tls_get_addr (); 1028 dest = gen_reg_rtx (Pmode); 1029 seq = GEN_INT (alpha_next_sequence_number++); 1030 1031 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq)); 1032 rtx val = gen_call_value_osf_tlsgd (r0, tga, seq); 1033 insn = emit_call_insn (val); 1034 RTL_CONST_CALL_P (insn) = 1; 1035 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); 1036 1037 insn = get_insns (); 1038 end_sequence (); 1039 1040 emit_libcall_block (insn, dest, r0, x); 1041 return dest; 1042 } 1043 1044 case TLS_MODEL_LOCAL_DYNAMIC: 1045 { 1046 start_sequence (); 1047 1048 r0 = gen_rtx_REG (Pmode, 0); 1049 r16 = gen_rtx_REG (Pmode, 16); 1050 tga = get_tls_get_addr (); 1051 scratch = gen_reg_rtx (Pmode); 1052 seq = GEN_INT (alpha_next_sequence_number++); 1053 1054 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq)); 1055 rtx val = gen_call_value_osf_tlsldm (r0, tga, seq); 1056 insn = emit_call_insn (val); 1057 RTL_CONST_CALL_P (insn) = 1; 1058 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); 1059 1060 insn = get_insns (); 1061 end_sequence (); 1062 1063 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 1064 UNSPEC_TLSLDM_CALL); 1065 emit_libcall_block (insn, scratch, r0, eqv); 1066 1067 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL); 1068 eqv = gen_rtx_CONST (Pmode, eqv); 1069 1070 if (alpha_tls_size == 64) 1071 { 1072 dest = gen_reg_rtx (Pmode); 1073 emit_insn (gen_rtx_SET (dest, eqv)); 1074 emit_insn (gen_adddi3 (dest, dest, scratch)); 1075 return dest; 1076 } 1077 if (alpha_tls_size == 32) 1078 { 1079 rtx temp = gen_rtx_HIGH (Pmode, eqv); 1080 temp = gen_rtx_PLUS (Pmode, scratch, temp); 1081 scratch = gen_reg_rtx (Pmode); 1082 emit_insn (gen_rtx_SET (scratch, temp)); 1083 } 1084 return gen_rtx_LO_SUM (Pmode, scratch, eqv); 1085 } 1086 1087 case TLS_MODEL_INITIAL_EXEC: 1088 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); 1089 eqv = gen_rtx_CONST (Pmode, eqv); 1090 tp = gen_reg_rtx (Pmode); 1091 scratch = gen_reg_rtx (Pmode); 1092 dest = gen_reg_rtx (Pmode); 1093 1094 emit_insn (gen_get_thread_pointerdi (tp)); 1095 emit_insn (gen_rtx_SET (scratch, eqv)); 1096 emit_insn (gen_adddi3 (dest, tp, scratch)); 1097 return dest; 1098 1099 case TLS_MODEL_LOCAL_EXEC: 1100 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); 1101 eqv = gen_rtx_CONST (Pmode, eqv); 1102 tp = gen_reg_rtx (Pmode); 1103 1104 emit_insn (gen_get_thread_pointerdi (tp)); 1105 if (alpha_tls_size == 32) 1106 { 1107 rtx temp = gen_rtx_HIGH (Pmode, eqv); 1108 temp = gen_rtx_PLUS (Pmode, tp, temp); 1109 tp = gen_reg_rtx (Pmode); 1110 emit_insn (gen_rtx_SET (tp, temp)); 1111 } 1112 return gen_rtx_LO_SUM (Pmode, tp, eqv); 1113 1114 default: 1115 gcc_unreachable (); 1116 } 1117 1118 if (local_symbolic_operand (x, Pmode)) 1119 { 1120 if (small_symbolic_operand (x, Pmode)) 1121 return x; 1122 else 1123 { 1124 if (can_create_pseudo_p ()) 1125 scratch = gen_reg_rtx (Pmode); 1126 emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x))); 1127 return gen_rtx_LO_SUM (Pmode, scratch, x); 1128 } 1129 } 1130 } 1131 1132 return NULL; 1133 1134 split_addend: 1135 { 1136 HOST_WIDE_INT low, high; 1137 1138 low = ((addend & 0xffff) ^ 0x8000) - 0x8000; 1139 addend -= low; 1140 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000; 1141 addend -= high; 1142 1143 if (addend) 1144 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend), 1145 (!can_create_pseudo_p () ? scratch : NULL_RTX), 1146 1, OPTAB_LIB_WIDEN); 1147 if (high) 1148 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high), 1149 (!can_create_pseudo_p () ? scratch : NULL_RTX), 1150 1, OPTAB_LIB_WIDEN); 1151 1152 return plus_constant (Pmode, x, low); 1153 } 1154} 1155 1156 1157/* Try machine-dependent ways of modifying an illegitimate address 1158 to be legitimate. Return X or the new, valid address. */ 1159 1160static rtx 1161alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 1162 machine_mode mode) 1163{ 1164 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode); 1165 return new_x ? new_x : x; 1166} 1167 1168/* Return true if ADDR has an effect that depends on the machine mode it 1169 is used for. On the Alpha this is true only for the unaligned modes. 1170 We can simplify the test since we know that the address must be valid. */ 1171 1172static bool 1173alpha_mode_dependent_address_p (const_rtx addr, 1174 addr_space_t as ATTRIBUTE_UNUSED) 1175{ 1176 return GET_CODE (addr) == AND; 1177} 1178 1179/* Primarily this is required for TLS symbols, but given that our move 1180 patterns *ought* to be able to handle any symbol at any time, we 1181 should never be spilling symbolic operands to the constant pool, ever. */ 1182 1183static bool 1184alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 1185{ 1186 enum rtx_code code = GET_CODE (x); 1187 return code == SYMBOL_REF || code == LABEL_REF || code == CONST; 1188} 1189 1190/* We do not allow indirect calls to be optimized into sibling calls, nor 1191 can we allow a call to a function with a different GP to be optimized 1192 into a sibcall. */ 1193 1194static bool 1195alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 1196{ 1197 /* Can't do indirect tail calls, since we don't know if the target 1198 uses the same GP. */ 1199 if (!decl) 1200 return false; 1201 1202 /* Otherwise, we can make a tail call if the target function shares 1203 the same GP. */ 1204 return decl_has_samegp (decl); 1205} 1206 1207bool 1208some_small_symbolic_operand_int (rtx x) 1209{ 1210 subrtx_var_iterator::array_type array; 1211 FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) 1212 { 1213 rtx x = *iter; 1214 /* Don't re-split. */ 1215 if (GET_CODE (x) == LO_SUM) 1216 iter.skip_subrtxes (); 1217 else if (small_symbolic_operand (x, Pmode)) 1218 return true; 1219 } 1220 return false; 1221} 1222 1223rtx 1224split_small_symbolic_operand (rtx x) 1225{ 1226 x = copy_insn (x); 1227 subrtx_ptr_iterator::array_type array; 1228 FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL) 1229 { 1230 rtx *ptr = *iter; 1231 rtx x = *ptr; 1232 /* Don't re-split. */ 1233 if (GET_CODE (x) == LO_SUM) 1234 iter.skip_subrtxes (); 1235 else if (small_symbolic_operand (x, Pmode)) 1236 { 1237 *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x); 1238 iter.skip_subrtxes (); 1239 } 1240 } 1241 return x; 1242} 1243 1244/* Indicate that INSN cannot be duplicated. This is true for any insn 1245 that we've marked with gpdisp relocs, since those have to stay in 1246 1-1 correspondence with one another. 1247 1248 Technically we could copy them if we could set up a mapping from one 1249 sequence number to another, across the set of insns to be duplicated. 1250 This seems overly complicated and error-prone since interblock motion 1251 from sched-ebb could move one of the pair of insns to a different block. 1252 1253 Also cannot allow jsr insns to be duplicated. If they throw exceptions, 1254 then they'll be in a different block from their ldgp. Which could lead 1255 the bb reorder code to think that it would be ok to copy just the block 1256 containing the call and branch to the block containing the ldgp. */ 1257 1258static bool 1259alpha_cannot_copy_insn_p (rtx_insn *insn) 1260{ 1261 if (!reload_completed || !TARGET_EXPLICIT_RELOCS) 1262 return false; 1263 if (recog_memoized (insn) >= 0) 1264 return get_attr_cannot_copy (insn); 1265 else 1266 return false; 1267} 1268 1269 1270/* Try a machine-dependent way of reloading an illegitimate address 1271 operand. If we find one, push the reload and return the new rtx. */ 1272 1273rtx 1274alpha_legitimize_reload_address (rtx x, 1275 machine_mode mode ATTRIBUTE_UNUSED, 1276 int opnum, int type, 1277 int ind_levels ATTRIBUTE_UNUSED) 1278{ 1279 /* We must recognize output that we have already generated ourselves. */ 1280 if (GET_CODE (x) == PLUS 1281 && GET_CODE (XEXP (x, 0)) == PLUS 1282 && REG_P (XEXP (XEXP (x, 0), 0)) 1283 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 1284 && CONST_INT_P (XEXP (x, 1))) 1285 { 1286 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 1287 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 1288 opnum, (enum reload_type) type); 1289 return x; 1290 } 1291 1292 /* We wish to handle large displacements off a base register by 1293 splitting the addend across an ldah and the mem insn. This 1294 cuts number of extra insns needed from 3 to 1. */ 1295 if (GET_CODE (x) == PLUS 1296 && REG_P (XEXP (x, 0)) 1297 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER 1298 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0))) 1299 && CONST_INT_P (XEXP (x, 1))) 1300 { 1301 HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); 1302 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; 1303 HOST_WIDE_INT high 1304 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; 1305 1306 /* Check for 32-bit overflow. */ 1307 if (high + low != val) 1308 return NULL_RTX; 1309 1310 /* Reload the high part into a base reg; leave the low part 1311 in the mem directly. */ 1312 x = gen_rtx_PLUS (GET_MODE (x), 1313 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), 1314 GEN_INT (high)), 1315 GEN_INT (low)); 1316 1317 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 1318 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 1319 opnum, (enum reload_type) type); 1320 return x; 1321 } 1322 1323 return NULL_RTX; 1324} 1325 1326/* Return the cost of moving between registers of various classes. Moving 1327 between FLOAT_REGS and anything else except float regs is expensive. 1328 In fact, we make it quite expensive because we really don't want to 1329 do these moves unless it is clearly worth it. Optimizations may 1330 reduce the impact of not being able to allocate a pseudo to a 1331 hard register. */ 1332 1333static int 1334alpha_register_move_cost (machine_mode /*mode*/, 1335 reg_class_t from, reg_class_t to) 1336{ 1337 if ((from == FLOAT_REGS) == (to == FLOAT_REGS)) 1338 return 2; 1339 1340 if (TARGET_FIX) 1341 return (from == FLOAT_REGS) ? 6 : 8; 1342 1343 return 4 + 2 * alpha_memory_latency; 1344} 1345 1346/* Return the cost of moving data of MODE from a register to 1347 or from memory. On the Alpha, bump this up a bit. */ 1348 1349static int 1350alpha_memory_move_cost (machine_mode /*mode*/, reg_class_t /*regclass*/, 1351 bool /*in*/) 1352{ 1353 return 2 * alpha_memory_latency; 1354} 1355 1356/* Compute a (partial) cost for rtx X. Return true if the complete 1357 cost has been computed, and false if subexpressions should be 1358 scanned. In either case, *TOTAL contains the cost result. */ 1359 1360static bool 1361alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total, 1362 bool speed) 1363{ 1364 int code = GET_CODE (x); 1365 bool float_mode_p = FLOAT_MODE_P (mode); 1366 const struct alpha_rtx_cost_data *cost_data; 1367 1368 if (!speed) 1369 cost_data = &alpha_rtx_cost_size; 1370 else 1371 cost_data = &alpha_rtx_cost_data[alpha_tune]; 1372 1373 switch (code) 1374 { 1375 case CONST_INT: 1376 /* If this is an 8-bit constant, return zero since it can be used 1377 nearly anywhere with no cost. If it is a valid operand for an 1378 ADD or AND, likewise return 0 if we know it will be used in that 1379 context. Otherwise, return 2 since it might be used there later. 1380 All other constants take at least two insns. */ 1381 if (INTVAL (x) >= 0 && INTVAL (x) < 256) 1382 { 1383 *total = 0; 1384 return true; 1385 } 1386 /* FALLTHRU */ 1387 1388 case CONST_DOUBLE: 1389 case CONST_WIDE_INT: 1390 if (x == CONST0_RTX (mode)) 1391 *total = 0; 1392 else if ((outer_code == PLUS && add_operand (x, VOIDmode)) 1393 || (outer_code == AND && and_operand (x, VOIDmode))) 1394 *total = 0; 1395 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode)) 1396 *total = 2; 1397 else 1398 *total = COSTS_N_INSNS (2); 1399 return true; 1400 1401 case CONST: 1402 case SYMBOL_REF: 1403 case LABEL_REF: 1404 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode)) 1405 *total = COSTS_N_INSNS (outer_code != MEM); 1406 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode)) 1407 *total = COSTS_N_INSNS (1 + (outer_code != MEM)); 1408 else if (tls_symbolic_operand_type (x)) 1409 /* Estimate of cost for call_pal rduniq. */ 1410 /* ??? How many insns do we emit here? More than one... */ 1411 *total = COSTS_N_INSNS (15); 1412 else 1413 /* Otherwise we do a load from the GOT. */ 1414 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); 1415 return true; 1416 1417 case HIGH: 1418 /* This is effectively an add_operand. */ 1419 *total = 2; 1420 return true; 1421 1422 case PLUS: 1423 case MINUS: 1424 if (float_mode_p) 1425 *total = cost_data->fp_add; 1426 else if (GET_CODE (XEXP (x, 0)) == ASHIFT 1427 && const23_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) 1428 { 1429 *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode, 1430 (enum rtx_code) outer_code, opno, speed) 1431 + rtx_cost (XEXP (x, 1), mode, 1432 (enum rtx_code) outer_code, opno, speed) 1433 + COSTS_N_INSNS (1)); 1434 return true; 1435 } 1436 return false; 1437 1438 case MULT: 1439 if (float_mode_p) 1440 *total = cost_data->fp_mult; 1441 else if (mode == DImode) 1442 *total = cost_data->int_mult_di; 1443 else 1444 *total = cost_data->int_mult_si; 1445 return false; 1446 1447 case ASHIFT: 1448 if (CONST_INT_P (XEXP (x, 1)) 1449 && INTVAL (XEXP (x, 1)) <= 3) 1450 { 1451 *total = COSTS_N_INSNS (1); 1452 return false; 1453 } 1454 /* FALLTHRU */ 1455 1456 case ASHIFTRT: 1457 case LSHIFTRT: 1458 *total = cost_data->int_shift; 1459 return false; 1460 1461 case IF_THEN_ELSE: 1462 if (float_mode_p) 1463 *total = cost_data->fp_add; 1464 else 1465 *total = cost_data->int_cmov; 1466 return false; 1467 1468 case DIV: 1469 case UDIV: 1470 case MOD: 1471 case UMOD: 1472 if (!float_mode_p) 1473 *total = cost_data->int_div; 1474 else if (mode == SFmode) 1475 *total = cost_data->fp_div_sf; 1476 else 1477 *total = cost_data->fp_div_df; 1478 return false; 1479 1480 case MEM: 1481 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); 1482 return true; 1483 1484 case NEG: 1485 if (! float_mode_p) 1486 { 1487 *total = COSTS_N_INSNS (1); 1488 return false; 1489 } 1490 /* FALLTHRU */ 1491 1492 case ABS: 1493 if (! float_mode_p) 1494 { 1495 *total = COSTS_N_INSNS (1) + cost_data->int_cmov; 1496 return false; 1497 } 1498 /* FALLTHRU */ 1499 1500 case FLOAT: 1501 case UNSIGNED_FLOAT: 1502 case FIX: 1503 case UNSIGNED_FIX: 1504 case FLOAT_TRUNCATE: 1505 *total = cost_data->fp_add; 1506 return false; 1507 1508 case FLOAT_EXTEND: 1509 if (MEM_P (XEXP (x, 0))) 1510 *total = 0; 1511 else 1512 *total = cost_data->fp_add; 1513 return false; 1514 1515 default: 1516 return false; 1517 } 1518} 1519 1520/* REF is an alignable memory location. Place an aligned SImode 1521 reference into *PALIGNED_MEM and the number of bits to shift into 1522 *PBITNUM. SCRATCH is a free register for use in reloading out 1523 of range stack slots. */ 1524 1525void 1526get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum) 1527{ 1528 rtx base; 1529 HOST_WIDE_INT disp, offset; 1530 1531 gcc_assert (MEM_P (ref)); 1532 1533 if (reload_in_progress) 1534 { 1535 base = find_replacement (&XEXP (ref, 0)); 1536 gcc_assert (memory_address_p (GET_MODE (ref), base)); 1537 } 1538 else 1539 base = XEXP (ref, 0); 1540 1541 if (GET_CODE (base) == PLUS) 1542 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0); 1543 else 1544 disp = 0; 1545 1546 /* Find the byte offset within an aligned word. If the memory itself is 1547 claimed to be aligned, believe it. Otherwise, aligned_memory_operand 1548 will have examined the base register and determined it is aligned, and 1549 thus displacements from it are naturally alignable. */ 1550 if (MEM_ALIGN (ref) >= 32) 1551 offset = 0; 1552 else 1553 offset = disp & 3; 1554 1555 /* The location should not cross aligned word boundary. */ 1556 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref)) 1557 <= GET_MODE_SIZE (SImode)); 1558 1559 /* Access the entire aligned word. */ 1560 *paligned_mem = widen_memory_access (ref, SImode, -offset); 1561 1562 /* Convert the byte offset within the word to a bit offset. */ 1563 offset *= BITS_PER_UNIT; 1564 *pbitnum = GEN_INT (offset); 1565} 1566 1567/* Similar, but just get the address. Handle the two reload cases. 1568 Add EXTRA_OFFSET to the address we return. */ 1569 1570rtx 1571get_unaligned_address (rtx ref) 1572{ 1573 rtx base; 1574 HOST_WIDE_INT offset = 0; 1575 1576 gcc_assert (MEM_P (ref)); 1577 1578 if (reload_in_progress) 1579 { 1580 base = find_replacement (&XEXP (ref, 0)); 1581 gcc_assert (memory_address_p (GET_MODE (ref), base)); 1582 } 1583 else 1584 base = XEXP (ref, 0); 1585 1586 if (GET_CODE (base) == PLUS) 1587 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0); 1588 1589 return plus_constant (Pmode, base, offset); 1590} 1591 1592/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7. 1593 X is always returned in a register. */ 1594 1595rtx 1596get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs) 1597{ 1598 if (GET_CODE (addr) == PLUS) 1599 { 1600 ofs += INTVAL (XEXP (addr, 1)); 1601 addr = XEXP (addr, 0); 1602 } 1603 1604 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7), 1605 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1606} 1607 1608/* On the Alpha, all (non-symbolic) constants except zero go into 1609 a floating-point register via memory. Note that we cannot 1610 return anything that is not a subset of RCLASS, and that some 1611 symbolic constants cannot be dropped to memory. */ 1612 1613enum reg_class 1614alpha_preferred_reload_class(rtx x, enum reg_class rclass) 1615{ 1616 /* Zero is present in any register class. */ 1617 if (x == CONST0_RTX (GET_MODE (x))) 1618 return rclass; 1619 1620 /* These sorts of constants we can easily drop to memory. */ 1621 if (CONST_SCALAR_INT_P (x) 1622 || CONST_DOUBLE_P (x) 1623 || GET_CODE (x) == CONST_VECTOR) 1624 { 1625 if (rclass == FLOAT_REGS) 1626 return NO_REGS; 1627 if (rclass == ALL_REGS) 1628 return GENERAL_REGS; 1629 return rclass; 1630 } 1631 1632 /* All other kinds of constants should not (and in the case of HIGH 1633 cannot) be dropped to memory -- instead we use a GENERAL_REGS 1634 secondary reload. */ 1635 if (CONSTANT_P (x)) 1636 return (rclass == ALL_REGS ? GENERAL_REGS : rclass); 1637 1638 return rclass; 1639} 1640 1641/* Inform reload about cases where moving X with a mode MODE to a register in 1642 RCLASS requires an extra scratch or immediate register. Return the class 1643 needed for the immediate register. */ 1644 1645static reg_class_t 1646alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 1647 machine_mode mode, secondary_reload_info *sri) 1648{ 1649 enum reg_class rclass = (enum reg_class) rclass_i; 1650 1651 /* Loading and storing HImode or QImode values to and from memory 1652 usually requires a scratch register. */ 1653 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode)) 1654 { 1655 if (any_memory_operand (x, mode)) 1656 { 1657 if (in_p) 1658 { 1659 if (!aligned_memory_operand (x, mode)) 1660 sri->icode = direct_optab_handler (reload_in_optab, mode); 1661 } 1662 else 1663 sri->icode = direct_optab_handler (reload_out_optab, mode); 1664 return NO_REGS; 1665 } 1666 } 1667 1668 /* We also cannot do integral arithmetic into FP regs, as might result 1669 from register elimination into a DImode fp register. */ 1670 if (rclass == FLOAT_REGS) 1671 { 1672 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) 1673 return GENERAL_REGS; 1674 if (in_p && INTEGRAL_MODE_P (mode) 1675 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x)) 1676 return GENERAL_REGS; 1677 } 1678 1679 return NO_REGS; 1680} 1681 1682/* Implement TARGET_SECONDARY_MEMORY_NEEDED. 1683 1684 If we are copying between general and FP registers, we need a memory 1685 location unless the FIX extension is available. */ 1686 1687static bool 1688alpha_secondary_memory_needed (machine_mode, reg_class_t class1, 1689 reg_class_t class2) 1690{ 1691 return (!TARGET_FIX 1692 && ((class1 == FLOAT_REGS && class2 != FLOAT_REGS) 1693 || (class2 == FLOAT_REGS && class1 != FLOAT_REGS))); 1694} 1695 1696/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. If MODE is 1697 floating-point, use it. Otherwise, widen to a word like the default. 1698 This is needed because we always store integers in FP registers in 1699 quadword format. This whole area is very tricky! */ 1700 1701static machine_mode 1702alpha_secondary_memory_needed_mode (machine_mode mode) 1703{ 1704 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 1705 return mode; 1706 if (GET_MODE_SIZE (mode) >= 4) 1707 return mode; 1708 return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require (); 1709} 1710 1711/* Given SEQ, which is an INSN list, look for any MEMs in either 1712 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and 1713 volatile flags from REF into each of the MEMs found. If REF is not 1714 a MEM, don't do anything. */ 1715 1716void 1717alpha_set_memflags (rtx seq, rtx ref) 1718{ 1719 rtx_insn *insn; 1720 1721 if (!MEM_P (ref)) 1722 return; 1723 1724 /* This is only called from alpha.md, after having had something 1725 generated from one of the insn patterns. So if everything is 1726 zero, the pattern is already up-to-date. */ 1727 if (!MEM_VOLATILE_P (ref) 1728 && !MEM_NOTRAP_P (ref) 1729 && !MEM_READONLY_P (ref)) 1730 return; 1731 1732 subrtx_var_iterator::array_type array; 1733 for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn)) 1734 if (INSN_P (insn)) 1735 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST) 1736 { 1737 rtx x = *iter; 1738 if (MEM_P (x)) 1739 { 1740 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref); 1741 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref); 1742 MEM_READONLY_P (x) = MEM_READONLY_P (ref); 1743 /* Sadly, we cannot use alias sets because the extra 1744 aliasing produced by the AND interferes. Given that 1745 two-byte quantities are the only thing we would be 1746 able to differentiate anyway, there does not seem to 1747 be any point in convoluting the early out of the 1748 alias check. */ 1749 iter.skip_subrtxes (); 1750 } 1751 } 1752 else 1753 gcc_unreachable (); 1754} 1755 1756static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT, 1757 int, bool); 1758 1759/* Internal routine for alpha_emit_set_const to check for N or below insns. 1760 If NO_OUTPUT is true, then we only check to see if N insns are possible, 1761 and return pc_rtx if successful. */ 1762 1763static rtx 1764alpha_emit_set_const_1 (rtx target, machine_mode mode, 1765 HOST_WIDE_INT c, int n, bool no_output) 1766{ 1767 HOST_WIDE_INT new_const; 1768 int i, bits; 1769 /* Use a pseudo if highly optimizing and still generating RTL. */ 1770 rtx subtarget 1771 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target); 1772 rtx temp, insn; 1773 1774 /* If this is a sign-extended 32-bit constant, we can do this in at most 1775 three insns, so do it if we have enough insns left. */ 1776 1777 if (c >> 31 == -1 || c >> 31 == 0) 1778 { 1779 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000; 1780 HOST_WIDE_INT tmp1 = c - low; 1781 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000; 1782 HOST_WIDE_INT extra = 0; 1783 1784 /* If HIGH will be interpreted as negative but the constant is 1785 positive, we must adjust it to do two ldha insns. */ 1786 1787 if ((high & 0x8000) != 0 && c >= 0) 1788 { 1789 extra = 0x4000; 1790 tmp1 -= 0x40000000; 1791 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000); 1792 } 1793 1794 if (c == low || (low == 0 && extra == 0)) 1795 { 1796 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode) 1797 but that meant that we can't handle INT_MIN on 32-bit machines 1798 (like NT/Alpha), because we recurse indefinitely through 1799 emit_move_insn to gen_movdi. So instead, since we know exactly 1800 what we want, create it explicitly. */ 1801 1802 if (no_output) 1803 return pc_rtx; 1804 if (target == NULL) 1805 target = gen_reg_rtx (mode); 1806 emit_insn (gen_rtx_SET (target, GEN_INT (c))); 1807 return target; 1808 } 1809 else if (n >= 2 + (extra != 0)) 1810 { 1811 if (no_output) 1812 return pc_rtx; 1813 if (!can_create_pseudo_p ()) 1814 { 1815 emit_insn (gen_rtx_SET (target, GEN_INT (high << 16))); 1816 temp = target; 1817 } 1818 else 1819 temp = copy_to_suggested_reg (GEN_INT (high << 16), 1820 subtarget, mode); 1821 1822 /* As of 2002-02-23, addsi3 is only available when not optimizing. 1823 This means that if we go through expand_binop, we'll try to 1824 generate extensions, etc, which will require new pseudos, which 1825 will fail during some split phases. The SImode add patterns 1826 still exist, but are not named. So build the insns by hand. */ 1827 1828 if (extra != 0) 1829 { 1830 if (! subtarget) 1831 subtarget = gen_reg_rtx (mode); 1832 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16)); 1833 insn = gen_rtx_SET (subtarget, insn); 1834 emit_insn (insn); 1835 temp = subtarget; 1836 } 1837 1838 if (target == NULL) 1839 target = gen_reg_rtx (mode); 1840 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low)); 1841 insn = gen_rtx_SET (target, insn); 1842 emit_insn (insn); 1843 return target; 1844 } 1845 } 1846 1847 /* If we couldn't do it that way, try some other methods. But if we have 1848 no instructions left, don't bother. Likewise, if this is SImode and 1849 we can't make pseudos, we can't do anything since the expand_binop 1850 and expand_unop calls will widen and try to make pseudos. */ 1851 1852 if (n == 1 || (mode == SImode && !can_create_pseudo_p ())) 1853 return 0; 1854 1855 /* Next, see if we can load a related constant and then shift and possibly 1856 negate it to get the constant we want. Try this once each increasing 1857 numbers of insns. */ 1858 1859 for (i = 1; i < n; i++) 1860 { 1861 /* First, see if minus some low bits, we've an easy load of 1862 high bits. */ 1863 1864 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000; 1865 if (new_const != 0) 1866 { 1867 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output); 1868 if (temp) 1869 { 1870 if (no_output) 1871 return temp; 1872 return expand_binop (mode, add_optab, temp, GEN_INT (new_const), 1873 target, 0, OPTAB_WIDEN); 1874 } 1875 } 1876 1877 /* Next try complementing. */ 1878 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output); 1879 if (temp) 1880 { 1881 if (no_output) 1882 return temp; 1883 return expand_unop (mode, one_cmpl_optab, temp, target, 0); 1884 } 1885 1886 /* Next try to form a constant and do a left shift. We can do this 1887 if some low-order bits are zero; the exact_log2 call below tells 1888 us that information. The bits we are shifting out could be any 1889 value, but here we'll just try the 0- and sign-extended forms of 1890 the constant. To try to increase the chance of having the same 1891 constant in more than one insn, start at the highest number of 1892 bits to shift, but try all possibilities in case a ZAPNOT will 1893 be useful. */ 1894 1895 bits = exact_log2 (c & -c); 1896 if (bits > 0) 1897 for (; bits > 0; bits--) 1898 { 1899 new_const = c >> bits; 1900 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); 1901 if (!temp && c < 0) 1902 { 1903 new_const = (unsigned HOST_WIDE_INT)c >> bits; 1904 temp = alpha_emit_set_const (subtarget, mode, new_const, 1905 i, no_output); 1906 } 1907 if (temp) 1908 { 1909 if (no_output) 1910 return temp; 1911 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits), 1912 target, 0, OPTAB_WIDEN); 1913 } 1914 } 1915 1916 /* Now try high-order zero bits. Here we try the shifted-in bits as 1917 all zero and all ones. Be careful to avoid shifting outside the 1918 mode and to avoid shifting outside the host wide int size. */ 1919 1920 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) 1921 - floor_log2 (c) - 1); 1922 if (bits > 0) 1923 for (; bits > 0; bits--) 1924 { 1925 new_const = c << bits; 1926 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); 1927 if (!temp) 1928 { 1929 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1); 1930 temp = alpha_emit_set_const (subtarget, mode, new_const, 1931 i, no_output); 1932 } 1933 if (temp) 1934 { 1935 if (no_output) 1936 return temp; 1937 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits), 1938 target, 1, OPTAB_WIDEN); 1939 } 1940 } 1941 1942 /* Now try high-order 1 bits. We get that with a sign-extension. 1943 But one bit isn't enough here. Be careful to avoid shifting outside 1944 the mode and to avoid shifting outside the host wide int size. */ 1945 1946 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) 1947 - floor_log2 (~ c) - 2); 1948 if (bits > 0) 1949 for (; bits > 0; bits--) 1950 { 1951 new_const = c << bits; 1952 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); 1953 if (!temp) 1954 { 1955 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1); 1956 temp = alpha_emit_set_const (subtarget, mode, new_const, 1957 i, no_output); 1958 } 1959 if (temp) 1960 { 1961 if (no_output) 1962 return temp; 1963 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits), 1964 target, 0, OPTAB_WIDEN); 1965 } 1966 } 1967 } 1968 1969 /* Finally, see if can load a value into the target that is the same as the 1970 constant except that all bytes that are 0 are changed to be 0xff. If we 1971 can, then we can do a ZAPNOT to obtain the desired constant. */ 1972 1973 new_const = c; 1974 for (i = 0; i < 64; i += 8) 1975 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0) 1976 new_const |= (HOST_WIDE_INT) 0xff << i; 1977 1978 /* We are only called for SImode and DImode. If this is SImode, ensure that 1979 we are sign extended to a full word. */ 1980 1981 if (mode == SImode) 1982 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000; 1983 1984 if (new_const != c) 1985 { 1986 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output); 1987 if (temp) 1988 { 1989 if (no_output) 1990 return temp; 1991 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const), 1992 target, 0, OPTAB_WIDEN); 1993 } 1994 } 1995 1996 return 0; 1997} 1998 1999/* Try to output insns to set TARGET equal to the constant C if it can be 2000 done in less than N insns. Do all computations in MODE. Returns the place 2001 where the output has been placed if it can be done and the insns have been 2002 emitted. If it would take more than N insns, zero is returned and no 2003 insns and emitted. */ 2004 2005static rtx 2006alpha_emit_set_const (rtx target, machine_mode mode, 2007 HOST_WIDE_INT c, int n, bool no_output) 2008{ 2009 machine_mode orig_mode = mode; 2010 rtx orig_target = target; 2011 rtx result = 0; 2012 int i; 2013 2014 /* If we can't make any pseudos, TARGET is an SImode hard register, we 2015 can't load this constant in one insn, do this in DImode. */ 2016 if (!can_create_pseudo_p () && mode == SImode 2017 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER) 2018 { 2019 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output); 2020 if (result) 2021 return result; 2022 2023 target = no_output ? NULL : gen_lowpart (DImode, target); 2024 mode = DImode; 2025 } 2026 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode) 2027 { 2028 target = no_output ? NULL : gen_lowpart (DImode, target); 2029 mode = DImode; 2030 } 2031 2032 /* Try 1 insn, then 2, then up to N. */ 2033 for (i = 1; i <= n; i++) 2034 { 2035 result = alpha_emit_set_const_1 (target, mode, c, i, no_output); 2036 if (result) 2037 { 2038 rtx_insn *insn; 2039 rtx set; 2040 2041 if (no_output) 2042 return result; 2043 2044 insn = get_last_insn (); 2045 set = single_set (insn); 2046 if (! CONSTANT_P (SET_SRC (set))) 2047 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c)); 2048 break; 2049 } 2050 } 2051 2052 /* Allow for the case where we changed the mode of TARGET. */ 2053 if (result) 2054 { 2055 if (result == target) 2056 result = orig_target; 2057 else if (mode != orig_mode) 2058 result = gen_lowpart (orig_mode, result); 2059 } 2060 2061 return result; 2062} 2063 2064/* Having failed to find a 3 insn sequence in alpha_emit_set_const, 2065 fall back to a straight forward decomposition. We do this to avoid 2066 exponential run times encountered when looking for longer sequences 2067 with alpha_emit_set_const. */ 2068 2069static rtx 2070alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1) 2071{ 2072 HOST_WIDE_INT d1, d2, d3, d4; 2073 2074 /* Decompose the entire word */ 2075 2076 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; 2077 c1 -= d1; 2078 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; 2079 c1 = (c1 - d2) >> 32; 2080 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; 2081 c1 -= d3; 2082 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; 2083 gcc_assert (c1 == d4); 2084 2085 /* Construct the high word */ 2086 if (d4) 2087 { 2088 emit_move_insn (target, GEN_INT (d4)); 2089 if (d3) 2090 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3))); 2091 } 2092 else 2093 emit_move_insn (target, GEN_INT (d3)); 2094 2095 /* Shift it into place */ 2096 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32))); 2097 2098 /* Add in the low bits. */ 2099 if (d2) 2100 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2))); 2101 if (d1) 2102 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1))); 2103 2104 return target; 2105} 2106 2107/* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits. */ 2108 2109static HOST_WIDE_INT 2110alpha_extract_integer (rtx x) 2111{ 2112 if (GET_CODE (x) == CONST_VECTOR) 2113 x = simplify_subreg (DImode, x, GET_MODE (x), 0); 2114 2115 gcc_assert (CONST_INT_P (x)); 2116 2117 return INTVAL (x); 2118} 2119 2120/* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which 2121 we are willing to load the value into a register via a move pattern. 2122 Normally this is all symbolic constants, integral constants that 2123 take three or fewer instructions, and floating-point zero. */ 2124 2125bool 2126alpha_legitimate_constant_p (machine_mode mode, rtx x) 2127{ 2128 HOST_WIDE_INT i0; 2129 2130 switch (GET_CODE (x)) 2131 { 2132 case LABEL_REF: 2133 case HIGH: 2134 return true; 2135 2136 case CONST: 2137 if (GET_CODE (XEXP (x, 0)) == PLUS 2138 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 2139 x = XEXP (XEXP (x, 0), 0); 2140 else 2141 return true; 2142 2143 if (GET_CODE (x) != SYMBOL_REF) 2144 return true; 2145 /* FALLTHRU */ 2146 2147 case SYMBOL_REF: 2148 /* TLS symbols are never valid. */ 2149 return SYMBOL_REF_TLS_MODEL (x) == 0; 2150 2151 case CONST_WIDE_INT: 2152 if (TARGET_BUILD_CONSTANTS) 2153 return true; 2154 if (x == CONST0_RTX (mode)) 2155 return true; 2156 mode = DImode; 2157 gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2); 2158 i0 = CONST_WIDE_INT_ELT (x, 1); 2159 if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL) 2160 return false; 2161 i0 = CONST_WIDE_INT_ELT (x, 0); 2162 goto do_integer; 2163 2164 case CONST_DOUBLE: 2165 if (x == CONST0_RTX (mode)) 2166 return true; 2167 return false; 2168 2169 case CONST_VECTOR: 2170 if (x == CONST0_RTX (mode)) 2171 return true; 2172 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) 2173 return false; 2174 if (GET_MODE_SIZE (mode) != 8) 2175 return false; 2176 /* FALLTHRU */ 2177 2178 case CONST_INT: 2179 if (TARGET_BUILD_CONSTANTS) 2180 return true; 2181 i0 = alpha_extract_integer (x); 2182 do_integer: 2183 return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL; 2184 2185 default: 2186 return false; 2187 } 2188} 2189 2190/* Operand 1 is known to be a constant, and should require more than one 2191 instruction to load. Emit that multi-part load. */ 2192 2193bool 2194alpha_split_const_mov (machine_mode mode, rtx *operands) 2195{ 2196 HOST_WIDE_INT i0; 2197 rtx temp = NULL_RTX; 2198 2199 i0 = alpha_extract_integer (operands[1]); 2200 2201 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false); 2202 2203 if (!temp && TARGET_BUILD_CONSTANTS) 2204 temp = alpha_emit_set_long_const (operands[0], i0); 2205 2206 if (temp) 2207 { 2208 if (!rtx_equal_p (operands[0], temp)) 2209 emit_move_insn (operands[0], temp); 2210 return true; 2211 } 2212 2213 return false; 2214} 2215 2216/* Expand a move instruction; return true if all work is done. 2217 We don't handle non-bwx subword loads here. */ 2218 2219bool 2220alpha_expand_mov (machine_mode mode, rtx *operands) 2221{ 2222 rtx tmp; 2223 2224 /* If the output is not a register, the input must be. */ 2225 if (MEM_P (operands[0]) 2226 && ! reg_or_0_operand (operands[1], mode)) 2227 operands[1] = force_reg (mode, operands[1]); 2228 2229 /* Allow legitimize_address to perform some simplifications. */ 2230 if (mode == Pmode && symbolic_operand (operands[1], mode)) 2231 { 2232 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode); 2233 if (tmp) 2234 { 2235 if (tmp == operands[0]) 2236 return true; 2237 operands[1] = tmp; 2238 return false; 2239 } 2240 } 2241 2242 /* Early out for non-constants and valid constants. */ 2243 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode)) 2244 return false; 2245 2246 /* Split large integers. */ 2247 if (CONST_INT_P (operands[1]) 2248 || GET_CODE (operands[1]) == CONST_VECTOR) 2249 { 2250 if (alpha_split_const_mov (mode, operands)) 2251 return true; 2252 } 2253 2254 /* Otherwise we've nothing left but to drop the thing to memory. */ 2255 tmp = force_const_mem (mode, operands[1]); 2256 2257 if (tmp == NULL_RTX) 2258 return false; 2259 2260 if (reload_in_progress) 2261 { 2262 emit_move_insn (operands[0], XEXP (tmp, 0)); 2263 operands[1] = replace_equiv_address (tmp, operands[0]); 2264 } 2265 else 2266 operands[1] = validize_mem (tmp); 2267 return false; 2268} 2269 2270/* Expand a non-bwx QImode or HImode move instruction; 2271 return true if all work is done. */ 2272 2273bool 2274alpha_expand_mov_nobwx (machine_mode mode, rtx *operands) 2275{ 2276 rtx seq; 2277 2278 /* If the output is not a register, the input must be. */ 2279 if (MEM_P (operands[0])) 2280 operands[1] = force_reg (mode, operands[1]); 2281 2282 /* Handle four memory cases, unaligned and aligned for either the input 2283 or the output. The only case where we can be called during reload is 2284 for aligned loads; all other cases require temporaries. */ 2285 2286 if (any_memory_operand (operands[1], mode)) 2287 { 2288 if (aligned_memory_operand (operands[1], mode)) 2289 { 2290 if (reload_in_progress) 2291 { 2292 seq = gen_reload_in_aligned (mode, operands[0], operands[1]); 2293 emit_insn (seq); 2294 } 2295 else 2296 { 2297 rtx aligned_mem, bitnum; 2298 rtx scratch = gen_reg_rtx (SImode); 2299 rtx subtarget; 2300 bool copyout; 2301 2302 get_aligned_mem (operands[1], &aligned_mem, &bitnum); 2303 2304 subtarget = operands[0]; 2305 if (REG_P (subtarget)) 2306 subtarget = gen_lowpart (DImode, subtarget), copyout = false; 2307 else 2308 subtarget = gen_reg_rtx (DImode), copyout = true; 2309 2310 if (mode == QImode) 2311 seq = gen_aligned_loadqi (subtarget, aligned_mem, 2312 bitnum, scratch); 2313 else 2314 seq = gen_aligned_loadhi (subtarget, aligned_mem, 2315 bitnum, scratch); 2316 emit_insn (seq); 2317 2318 if (copyout) 2319 emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); 2320 } 2321 } 2322 else 2323 { 2324 /* Don't pass these as parameters since that makes the generated 2325 code depend on parameter evaluation order which will cause 2326 bootstrap failures. */ 2327 2328 rtx temp1, temp2, subtarget, ua; 2329 bool copyout; 2330 2331 temp1 = gen_reg_rtx (DImode); 2332 temp2 = gen_reg_rtx (DImode); 2333 2334 subtarget = operands[0]; 2335 if (REG_P (subtarget)) 2336 subtarget = gen_lowpart (DImode, subtarget), copyout = false; 2337 else 2338 subtarget = gen_reg_rtx (DImode), copyout = true; 2339 2340 ua = get_unaligned_address (operands[1]); 2341 if (mode == QImode) 2342 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2); 2343 else 2344 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2); 2345 2346 alpha_set_memflags (seq, operands[1]); 2347 emit_insn (seq); 2348 2349 if (copyout) 2350 emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); 2351 } 2352 return true; 2353 } 2354 2355 if (any_memory_operand (operands[0], mode)) 2356 { 2357 if (aligned_memory_operand (operands[0], mode)) 2358 { 2359 rtx aligned_mem, bitnum; 2360 rtx temp1 = gen_reg_rtx (SImode); 2361 rtx temp2 = gen_reg_rtx (SImode); 2362 2363 get_aligned_mem (operands[0], &aligned_mem, &bitnum); 2364 2365 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum, 2366 temp1, temp2)); 2367 } 2368 else 2369 { 2370 rtx temp1 = gen_reg_rtx (DImode); 2371 rtx temp2 = gen_reg_rtx (DImode); 2372 rtx temp3 = gen_reg_rtx (DImode); 2373 rtx ua = get_unaligned_address (operands[0]); 2374 2375 seq = gen_unaligned_store 2376 (mode, ua, operands[1], temp1, temp2, temp3); 2377 2378 alpha_set_memflags (seq, operands[0]); 2379 emit_insn (seq); 2380 } 2381 return true; 2382 } 2383 2384 return false; 2385} 2386 2387/* Implement the movmisalign patterns. One of the operands is a memory 2388 that is not naturally aligned. Emit instructions to load it. */ 2389 2390void 2391alpha_expand_movmisalign (machine_mode mode, rtx *operands) 2392{ 2393 /* Honor misaligned loads, for those we promised to do so. */ 2394 if (MEM_P (operands[1])) 2395 { 2396 rtx tmp; 2397 2398 if (register_operand (operands[0], mode)) 2399 tmp = operands[0]; 2400 else 2401 tmp = gen_reg_rtx (mode); 2402 2403 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0); 2404 if (tmp != operands[0]) 2405 emit_move_insn (operands[0], tmp); 2406 } 2407 else if (MEM_P (operands[0])) 2408 { 2409 if (!reg_or_0_operand (operands[1], mode)) 2410 operands[1] = force_reg (mode, operands[1]); 2411 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0); 2412 } 2413 else 2414 gcc_unreachable (); 2415} 2416 2417/* Generate an unsigned DImode to FP conversion. This is the same code 2418 optabs would emit if we didn't have TFmode patterns. 2419 2420 For SFmode, this is the only construction I've found that can pass 2421 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode 2422 intermediates will work, because you'll get intermediate rounding 2423 that ruins the end result. Some of this could be fixed by turning 2424 on round-to-positive-infinity, but that requires diddling the fpsr, 2425 which kills performance. I tried turning this around and converting 2426 to a negative number, so that I could turn on /m, but either I did 2427 it wrong or there's something else cause I wound up with the exact 2428 same single-bit error. There is a branch-less form of this same code: 2429 2430 srl $16,1,$1 2431 and $16,1,$2 2432 cmplt $16,0,$3 2433 or $1,$2,$2 2434 cmovge $16,$16,$2 2435 itoft $3,$f10 2436 itoft $2,$f11 2437 cvtqs $f11,$f11 2438 adds $f11,$f11,$f0 2439 fcmoveq $f10,$f11,$f0 2440 2441 I'm not using it because it's the same number of instructions as 2442 this branch-full form, and it has more serialized long latency 2443 instructions on the critical path. 2444 2445 For DFmode, we can avoid rounding errors by breaking up the word 2446 into two pieces, converting them separately, and adding them back: 2447 2448 LC0: .long 0,0x5f800000 2449 2450 itoft $16,$f11 2451 lda $2,LC0 2452 cmplt $16,0,$1 2453 cpyse $f11,$f31,$f10 2454 cpyse $f31,$f11,$f11 2455 s4addq $1,$2,$1 2456 lds $f12,0($1) 2457 cvtqt $f10,$f10 2458 cvtqt $f11,$f11 2459 addt $f12,$f10,$f0 2460 addt $f0,$f11,$f0 2461 2462 This doesn't seem to be a clear-cut win over the optabs form. 2463 It probably all depends on the distribution of numbers being 2464 converted -- in the optabs form, all but high-bit-set has a 2465 much lower minimum execution time. */ 2466 2467void 2468alpha_emit_floatuns (rtx operands[2]) 2469{ 2470 rtx neglab, donelab, i0, i1, f0, in, out; 2471 machine_mode mode; 2472 2473 out = operands[0]; 2474 in = force_reg (DImode, operands[1]); 2475 mode = GET_MODE (out); 2476 neglab = gen_label_rtx (); 2477 donelab = gen_label_rtx (); 2478 i0 = gen_reg_rtx (DImode); 2479 i1 = gen_reg_rtx (DImode); 2480 f0 = gen_reg_rtx (mode); 2481 2482 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); 2483 2484 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in))); 2485 emit_jump_insn (gen_jump (donelab)); 2486 emit_barrier (); 2487 2488 emit_label (neglab); 2489 2490 emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); 2491 emit_insn (gen_anddi3 (i1, in, const1_rtx)); 2492 emit_insn (gen_iordi3 (i0, i0, i1)); 2493 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0))); 2494 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); 2495 2496 emit_label (donelab); 2497} 2498 2499/* Generate the comparison for a conditional branch. */ 2500 2501void 2502alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode) 2503{ 2504 enum rtx_code cmp_code, branch_code; 2505 machine_mode branch_mode = VOIDmode; 2506 enum rtx_code code = GET_CODE (operands[0]); 2507 rtx op0 = operands[1], op1 = operands[2]; 2508 rtx tem; 2509 2510 if (cmp_mode == TFmode) 2511 { 2512 op0 = alpha_emit_xfloating_compare (&code, op0, op1); 2513 op1 = const0_rtx; 2514 cmp_mode = DImode; 2515 } 2516 2517 /* The general case: fold the comparison code to the types of compares 2518 that we have, choosing the branch as necessary. */ 2519 switch (code) 2520 { 2521 case EQ: case LE: case LT: case LEU: case LTU: 2522 case UNORDERED: 2523 /* We have these compares. */ 2524 cmp_code = code, branch_code = NE; 2525 break; 2526 2527 case NE: 2528 case ORDERED: 2529 /* These must be reversed. */ 2530 cmp_code = reverse_condition (code), branch_code = EQ; 2531 break; 2532 2533 case GE: case GT: case GEU: case GTU: 2534 /* For FP, we swap them, for INT, we reverse them. */ 2535 if (cmp_mode == DFmode) 2536 { 2537 cmp_code = swap_condition (code); 2538 branch_code = NE; 2539 std::swap (op0, op1); 2540 } 2541 else 2542 { 2543 cmp_code = reverse_condition (code); 2544 branch_code = EQ; 2545 } 2546 break; 2547 2548 default: 2549 gcc_unreachable (); 2550 } 2551 2552 if (cmp_mode == DFmode) 2553 { 2554 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED) 2555 { 2556 /* When we are not as concerned about non-finite values, and we 2557 are comparing against zero, we can branch directly. */ 2558 if (op1 == CONST0_RTX (DFmode)) 2559 cmp_code = UNKNOWN, branch_code = code; 2560 else if (op0 == CONST0_RTX (DFmode)) 2561 { 2562 /* Undo the swap we probably did just above. */ 2563 std::swap (op0, op1); 2564 branch_code = swap_condition (cmp_code); 2565 cmp_code = UNKNOWN; 2566 } 2567 } 2568 else 2569 { 2570 /* ??? We mark the branch mode to be CCmode to prevent the 2571 compare and branch from being combined, since the compare 2572 insn follows IEEE rules that the branch does not. */ 2573 branch_mode = CCmode; 2574 } 2575 } 2576 else 2577 { 2578 /* The following optimizations are only for signed compares. */ 2579 if (code != LEU && code != LTU && code != GEU && code != GTU) 2580 { 2581 /* Whee. Compare and branch against 0 directly. */ 2582 if (op1 == const0_rtx) 2583 cmp_code = UNKNOWN, branch_code = code; 2584 2585 /* If the constants doesn't fit into an immediate, but can 2586 be generated by lda/ldah, we adjust the argument and 2587 compare against zero, so we can use beq/bne directly. */ 2588 /* ??? Don't do this when comparing against symbols, otherwise 2589 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will 2590 be declared false out of hand (at least for non-weak). */ 2591 else if (CONST_INT_P (op1) 2592 && (code == EQ || code == NE) 2593 && !(symbolic_operand (op0, VOIDmode) 2594 || (REG_P (op0) && REG_POINTER (op0)))) 2595 { 2596 rtx n_op1 = GEN_INT (-INTVAL (op1)); 2597 2598 if (! satisfies_constraint_I (op1) 2599 && (satisfies_constraint_K (n_op1) 2600 || satisfies_constraint_L (n_op1))) 2601 cmp_code = PLUS, branch_code = code, op1 = n_op1; 2602 } 2603 } 2604 2605 if (!reg_or_0_operand (op0, DImode)) 2606 op0 = force_reg (DImode, op0); 2607 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode)) 2608 op1 = force_reg (DImode, op1); 2609 } 2610 2611 /* Emit an initial compare instruction, if necessary. */ 2612 tem = op0; 2613 if (cmp_code != UNKNOWN) 2614 { 2615 tem = gen_reg_rtx (cmp_mode); 2616 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)); 2617 } 2618 2619 /* Emit the branch instruction. */ 2620 tem = gen_rtx_SET (pc_rtx, 2621 gen_rtx_IF_THEN_ELSE (VOIDmode, 2622 gen_rtx_fmt_ee (branch_code, 2623 branch_mode, tem, 2624 CONST0_RTX (cmp_mode)), 2625 gen_rtx_LABEL_REF (VOIDmode, 2626 operands[3]), 2627 pc_rtx)); 2628 emit_jump_insn (tem); 2629} 2630 2631/* Certain simplifications can be done to make invalid setcc operations 2632 valid. Return the final comparison, or NULL if we can't work. */ 2633 2634bool 2635alpha_emit_setcc (rtx operands[], machine_mode cmp_mode) 2636{ 2637 enum rtx_code cmp_code; 2638 enum rtx_code code = GET_CODE (operands[1]); 2639 rtx op0 = operands[2], op1 = operands[3]; 2640 rtx tmp; 2641 2642 if (cmp_mode == TFmode) 2643 { 2644 op0 = alpha_emit_xfloating_compare (&code, op0, op1); 2645 op1 = const0_rtx; 2646 cmp_mode = DImode; 2647 } 2648 2649 if (cmp_mode == DFmode && !TARGET_FIX) 2650 return 0; 2651 2652 /* The general case: fold the comparison code to the types of compares 2653 that we have, choosing the branch as necessary. */ 2654 2655 cmp_code = UNKNOWN; 2656 switch (code) 2657 { 2658 case EQ: case LE: case LT: case LEU: case LTU: 2659 case UNORDERED: 2660 /* We have these compares. */ 2661 if (cmp_mode == DFmode) 2662 cmp_code = code, code = NE; 2663 break; 2664 2665 case NE: 2666 if (cmp_mode == DImode && op1 == const0_rtx) 2667 break; 2668 /* FALLTHRU */ 2669 2670 case ORDERED: 2671 cmp_code = reverse_condition (code); 2672 code = EQ; 2673 break; 2674 2675 case GE: case GT: case GEU: case GTU: 2676 /* These normally need swapping, but for integer zero we have 2677 special patterns that recognize swapped operands. */ 2678 if (cmp_mode == DImode && op1 == const0_rtx) 2679 break; 2680 code = swap_condition (code); 2681 if (cmp_mode == DFmode) 2682 cmp_code = code, code = NE; 2683 std::swap (op0, op1); 2684 break; 2685 2686 default: 2687 gcc_unreachable (); 2688 } 2689 2690 if (cmp_mode == DImode) 2691 { 2692 if (!register_operand (op0, DImode)) 2693 op0 = force_reg (DImode, op0); 2694 if (!reg_or_8bit_operand (op1, DImode)) 2695 op1 = force_reg (DImode, op1); 2696 } 2697 2698 /* Emit an initial compare instruction, if necessary. */ 2699 if (cmp_code != UNKNOWN) 2700 { 2701 tmp = gen_reg_rtx (cmp_mode); 2702 emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode, 2703 op0, op1))); 2704 2705 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp; 2706 op1 = const0_rtx; 2707 } 2708 2709 /* Emit the setcc instruction. */ 2710 emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode, 2711 op0, op1))); 2712 return true; 2713} 2714 2715 2716/* Rewrite a comparison against zero CMP of the form 2717 (CODE (cc0) (const_int 0)) so it can be written validly in 2718 a conditional move (if_then_else CMP ...). 2719 If both of the operands that set cc0 are nonzero we must emit 2720 an insn to perform the compare (it can't be done within 2721 the conditional move). */ 2722 2723rtx 2724alpha_emit_conditional_move (rtx cmp, machine_mode mode) 2725{ 2726 enum rtx_code code = GET_CODE (cmp); 2727 enum rtx_code cmov_code = NE; 2728 rtx op0 = XEXP (cmp, 0); 2729 rtx op1 = XEXP (cmp, 1); 2730 machine_mode cmp_mode 2731 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0)); 2732 machine_mode cmov_mode = VOIDmode; 2733 int local_fast_math = flag_unsafe_math_optimizations; 2734 rtx tem; 2735 2736 if (cmp_mode == TFmode) 2737 { 2738 op0 = alpha_emit_xfloating_compare (&code, op0, op1); 2739 op1 = const0_rtx; 2740 cmp_mode = DImode; 2741 } 2742 2743 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode); 2744 2745 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode)) 2746 { 2747 enum rtx_code cmp_code; 2748 2749 if (! TARGET_FIX) 2750 return 0; 2751 2752 /* If we have fp<->int register move instructions, do a cmov by 2753 performing the comparison in fp registers, and move the 2754 zero/nonzero value to integer registers, where we can then 2755 use a normal cmov, or vice-versa. */ 2756 2757 switch (code) 2758 { 2759 case EQ: case LE: case LT: case LEU: case LTU: 2760 case UNORDERED: 2761 /* We have these compares. */ 2762 cmp_code = code, code = NE; 2763 break; 2764 2765 case NE: 2766 case ORDERED: 2767 /* These must be reversed. */ 2768 cmp_code = reverse_condition (code), code = EQ; 2769 break; 2770 2771 case GE: case GT: case GEU: case GTU: 2772 /* These normally need swapping, but for integer zero we have 2773 special patterns that recognize swapped operands. */ 2774 if (cmp_mode == DImode && op1 == const0_rtx) 2775 cmp_code = code, code = NE; 2776 else 2777 { 2778 cmp_code = swap_condition (code); 2779 code = NE; 2780 std::swap (op0, op1); 2781 } 2782 break; 2783 2784 default: 2785 gcc_unreachable (); 2786 } 2787 2788 if (cmp_mode == DImode) 2789 { 2790 if (!reg_or_0_operand (op0, DImode)) 2791 op0 = force_reg (DImode, op0); 2792 if (!reg_or_8bit_operand (op1, DImode)) 2793 op1 = force_reg (DImode, op1); 2794 } 2795 2796 tem = gen_reg_rtx (cmp_mode); 2797 emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, 2798 op0, op1))); 2799 2800 cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode; 2801 op0 = gen_lowpart (cmp_mode, tem); 2802 op1 = CONST0_RTX (cmp_mode); 2803 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); 2804 local_fast_math = 1; 2805 } 2806 2807 if (cmp_mode == DImode) 2808 { 2809 if (!reg_or_0_operand (op0, DImode)) 2810 op0 = force_reg (DImode, op0); 2811 if (!reg_or_8bit_operand (op1, DImode)) 2812 op1 = force_reg (DImode, op1); 2813 } 2814 2815 /* We may be able to use a conditional move directly. 2816 This avoids emitting spurious compares. */ 2817 if (signed_comparison_operator (cmp, VOIDmode) 2818 && (cmp_mode == DImode || local_fast_math) 2819 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode))) 2820 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1); 2821 2822 /* We can't put the comparison inside the conditional move; 2823 emit a compare instruction and put that inside the 2824 conditional move. Make sure we emit only comparisons we have; 2825 swap or reverse as necessary. */ 2826 2827 if (!can_create_pseudo_p ()) 2828 return NULL_RTX; 2829 2830 switch (code) 2831 { 2832 case EQ: case LE: case LT: case LEU: case LTU: 2833 case UNORDERED: 2834 /* We have these compares: */ 2835 break; 2836 2837 case NE: 2838 case ORDERED: 2839 /* These must be reversed. */ 2840 code = reverse_condition (code); 2841 cmov_code = EQ; 2842 break; 2843 2844 case GE: case GT: case GEU: case GTU: 2845 /* These normally need swapping, but for integer zero we have 2846 special patterns that recognize swapped operands. */ 2847 if (cmp_mode == DImode && op1 == const0_rtx) 2848 break; 2849 code = swap_condition (code); 2850 std::swap (op0, op1); 2851 break; 2852 2853 default: 2854 gcc_unreachable (); 2855 } 2856 2857 if (cmp_mode == DImode) 2858 { 2859 if (!reg_or_0_operand (op0, DImode)) 2860 op0 = force_reg (DImode, op0); 2861 if (!reg_or_8bit_operand (op1, DImode)) 2862 op1 = force_reg (DImode, op1); 2863 } 2864 2865 /* ??? We mark the branch mode to be CCmode to prevent the compare 2866 and cmov from being combined, since the compare insn follows IEEE 2867 rules that the cmov does not. */ 2868 if (cmp_mode == DFmode && !local_fast_math) 2869 cmov_mode = CCmode; 2870 2871 tem = gen_reg_rtx (cmp_mode); 2872 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1)); 2873 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode)); 2874} 2875 2876/* Simplify a conditional move of two constants into a setcc with 2877 arithmetic. This is done with a splitter since combine would 2878 just undo the work if done during code generation. It also catches 2879 cases we wouldn't have before cse. */ 2880 2881int 2882alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond, 2883 rtx t_rtx, rtx f_rtx) 2884{ 2885 HOST_WIDE_INT t, f, diff; 2886 machine_mode mode; 2887 rtx target, subtarget, tmp; 2888 2889 mode = GET_MODE (dest); 2890 t = INTVAL (t_rtx); 2891 f = INTVAL (f_rtx); 2892 diff = t - f; 2893 2894 if (((code == NE || code == EQ) && diff < 0) 2895 || (code == GE || code == GT)) 2896 { 2897 code = reverse_condition (code); 2898 std::swap (t, f); 2899 diff = -diff; 2900 } 2901 2902 subtarget = target = dest; 2903 if (mode != DImode) 2904 { 2905 target = gen_lowpart (DImode, dest); 2906 if (can_create_pseudo_p ()) 2907 subtarget = gen_reg_rtx (DImode); 2908 else 2909 subtarget = target; 2910 } 2911 /* Below, we must be careful to use copy_rtx on target and subtarget 2912 in intermediate insns, as they may be a subreg rtx, which may not 2913 be shared. */ 2914 2915 if (f == 0 && exact_log2 (diff) > 0 2916 /* On EV6, we've got enough shifters to make non-arithmetic shifts 2917 viable over a longer latency cmove. On EV5, the E0 slot is a 2918 scarce resource, and on EV4 shift has the same latency as a cmove. */ 2919 && (diff <= 8 || alpha_tune == PROCESSOR_EV6)) 2920 { 2921 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); 2922 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp)); 2923 2924 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget), 2925 GEN_INT (exact_log2 (t))); 2926 emit_insn (gen_rtx_SET (target, tmp)); 2927 } 2928 else if (f == 0 && t == -1) 2929 { 2930 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); 2931 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp)); 2932 2933 emit_insn (gen_negdi2 (target, copy_rtx (subtarget))); 2934 } 2935 else if (diff == 1 || diff == 4 || diff == 8) 2936 { 2937 rtx add_op; 2938 2939 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); 2940 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp)); 2941 2942 if (diff == 1) 2943 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f))); 2944 else 2945 { 2946 add_op = GEN_INT (f); 2947 if (sext_add_operand (add_op, mode)) 2948 { 2949 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget), 2950 GEN_INT (exact_log2 (diff))); 2951 tmp = gen_rtx_PLUS (DImode, tmp, add_op); 2952 emit_insn (gen_rtx_SET (target, tmp)); 2953 } 2954 else 2955 return 0; 2956 } 2957 } 2958 else 2959 return 0; 2960 2961 return 1; 2962} 2963 2964/* Look up the function X_floating library function name for the 2965 given operation. */ 2966 2967struct GTY(()) xfloating_op 2968{ 2969 const enum rtx_code code; 2970 const char *const GTY((skip)) osf_func; 2971 const char *const GTY((skip)) vms_func; 2972 rtx libcall; 2973}; 2974 2975static GTY(()) struct xfloating_op xfloating_ops[] = 2976{ 2977 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 }, 2978 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 }, 2979 { MULT, "_OtsMulX", "OTS$MUL_X", 0 }, 2980 { DIV, "_OtsDivX", "OTS$DIV_X", 0 }, 2981 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 }, 2982 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 }, 2983 { LT, "_OtsLssX", "OTS$LSS_X", 0 }, 2984 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 }, 2985 { GT, "_OtsGtrX", "OTS$GTR_X", 0 }, 2986 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 }, 2987 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 }, 2988 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 }, 2989 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 }, 2990 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 }, 2991 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 } 2992}; 2993 2994static GTY(()) struct xfloating_op vax_cvt_ops[] = 2995{ 2996 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 }, 2997 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 } 2998}; 2999 3000static rtx 3001alpha_lookup_xfloating_lib_func (enum rtx_code code) 3002{ 3003 struct xfloating_op *ops = xfloating_ops; 3004 long n = ARRAY_SIZE (xfloating_ops); 3005 long i; 3006 3007 gcc_assert (TARGET_HAS_XFLOATING_LIBS); 3008 3009 /* How irritating. Nothing to key off for the main table. */ 3010 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE)) 3011 { 3012 ops = vax_cvt_ops; 3013 n = ARRAY_SIZE (vax_cvt_ops); 3014 } 3015 3016 for (i = 0; i < n; ++i, ++ops) 3017 if (ops->code == code) 3018 { 3019 rtx func = ops->libcall; 3020 if (!func) 3021 { 3022 func = init_one_libfunc (TARGET_ABI_OPEN_VMS 3023 ? ops->vms_func : ops->osf_func); 3024 ops->libcall = func; 3025 } 3026 return func; 3027 } 3028 3029 gcc_unreachable (); 3030} 3031 3032/* Most X_floating operations take the rounding mode as an argument. 3033 Compute that here. */ 3034 3035static int 3036alpha_compute_xfloating_mode_arg (enum rtx_code code, 3037 enum alpha_fp_rounding_mode round) 3038{ 3039 int mode; 3040 3041 switch (round) 3042 { 3043 case ALPHA_FPRM_NORM: 3044 mode = 2; 3045 break; 3046 case ALPHA_FPRM_MINF: 3047 mode = 1; 3048 break; 3049 case ALPHA_FPRM_CHOP: 3050 mode = 0; 3051 break; 3052 case ALPHA_FPRM_DYN: 3053 mode = 4; 3054 break; 3055 default: 3056 gcc_unreachable (); 3057 3058 /* XXX For reference, round to +inf is mode = 3. */ 3059 } 3060 3061 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N) 3062 mode |= 0x10000; 3063 3064 return mode; 3065} 3066 3067/* Emit an X_floating library function call. 3068 3069 Note that these functions do not follow normal calling conventions: 3070 TFmode arguments are passed in two integer registers (as opposed to 3071 indirect); TFmode return values appear in R16+R17. 3072 3073 FUNC is the function to call. 3074 TARGET is where the output belongs. 3075 OPERANDS are the inputs. 3076 NOPERANDS is the count of inputs. 3077 EQUIV is the expression equivalent for the function. 3078*/ 3079 3080static void 3081alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[], 3082 int noperands, rtx equiv) 3083{ 3084 rtx usage = NULL_RTX, reg; 3085 int regno = 16, i; 3086 3087 start_sequence (); 3088 3089 for (i = 0; i < noperands; ++i) 3090 { 3091 switch (GET_MODE (operands[i])) 3092 { 3093 case E_TFmode: 3094 reg = gen_rtx_REG (TFmode, regno); 3095 regno += 2; 3096 break; 3097 3098 case E_DFmode: 3099 reg = gen_rtx_REG (DFmode, regno + 32); 3100 regno += 1; 3101 break; 3102 3103 case E_VOIDmode: 3104 gcc_assert (CONST_INT_P (operands[i])); 3105 /* FALLTHRU */ 3106 case E_DImode: 3107 reg = gen_rtx_REG (DImode, regno); 3108 regno += 1; 3109 break; 3110 3111 default: 3112 gcc_unreachable (); 3113 } 3114 3115 emit_move_insn (reg, operands[i]); 3116 use_reg (&usage, reg); 3117 } 3118 3119 switch (GET_MODE (target)) 3120 { 3121 case E_TFmode: 3122 reg = gen_rtx_REG (TFmode, 16); 3123 break; 3124 case E_DFmode: 3125 reg = gen_rtx_REG (DFmode, 32); 3126 break; 3127 case E_DImode: 3128 reg = gen_rtx_REG (DImode, 0); 3129 break; 3130 default: 3131 gcc_unreachable (); 3132 } 3133 3134 rtx mem = gen_rtx_MEM (QImode, func); 3135 rtx_insn *tmp = emit_call_insn (gen_call_value (reg, mem, const0_rtx, 3136 const0_rtx, const0_rtx)); 3137 CALL_INSN_FUNCTION_USAGE (tmp) = usage; 3138 RTL_CONST_CALL_P (tmp) = 1; 3139 3140 tmp = get_insns (); 3141 end_sequence (); 3142 3143 emit_libcall_block (tmp, target, reg, equiv); 3144} 3145 3146/* Emit an X_floating library function call for arithmetic (+,-,*,/). */ 3147 3148void 3149alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[]) 3150{ 3151 rtx func; 3152 int mode; 3153 rtx out_operands[3]; 3154 3155 func = alpha_lookup_xfloating_lib_func (code); 3156 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm); 3157 3158 out_operands[0] = operands[1]; 3159 out_operands[1] = operands[2]; 3160 out_operands[2] = GEN_INT (mode); 3161 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3, 3162 gen_rtx_fmt_ee (code, TFmode, operands[1], 3163 operands[2])); 3164} 3165 3166/* Emit an X_floating library function call for a comparison. */ 3167 3168static rtx 3169alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1) 3170{ 3171 enum rtx_code cmp_code, res_code; 3172 rtx func, out, operands[2], note; 3173 3174 /* X_floating library comparison functions return 3175 -1 unordered 3176 0 false 3177 1 true 3178 Convert the compare against the raw return value. */ 3179 3180 cmp_code = *pcode; 3181 switch (cmp_code) 3182 { 3183 case UNORDERED: 3184 cmp_code = EQ; 3185 res_code = LT; 3186 break; 3187 case ORDERED: 3188 cmp_code = EQ; 3189 res_code = GE; 3190 break; 3191 case NE: 3192 res_code = NE; 3193 break; 3194 case EQ: 3195 case LT: 3196 case GT: 3197 case LE: 3198 case GE: 3199 res_code = GT; 3200 break; 3201 default: 3202 gcc_unreachable (); 3203 } 3204 *pcode = res_code; 3205 3206 func = alpha_lookup_xfloating_lib_func (cmp_code); 3207 3208 operands[0] = op0; 3209 operands[1] = op1; 3210 out = gen_reg_rtx (DImode); 3211 3212 /* What's actually returned is -1,0,1, not a proper boolean value. */ 3213 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1); 3214 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE); 3215 alpha_emit_xfloating_libcall (func, out, operands, 2, note); 3216 3217 return out; 3218} 3219 3220/* Emit an X_floating library function call for a conversion. */ 3221 3222void 3223alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[]) 3224{ 3225 int noperands = 1, mode; 3226 rtx out_operands[2]; 3227 rtx func; 3228 enum rtx_code code = orig_code; 3229 3230 if (code == UNSIGNED_FIX) 3231 code = FIX; 3232 3233 func = alpha_lookup_xfloating_lib_func (code); 3234 3235 out_operands[0] = operands[1]; 3236 3237 switch (code) 3238 { 3239 case FIX: 3240 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP); 3241 out_operands[1] = GEN_INT (mode); 3242 noperands = 2; 3243 break; 3244 case FLOAT_TRUNCATE: 3245 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm); 3246 out_operands[1] = GEN_INT (mode); 3247 noperands = 2; 3248 break; 3249 default: 3250 break; 3251 } 3252 3253 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands, 3254 gen_rtx_fmt_e (orig_code, 3255 GET_MODE (operands[0]), 3256 operands[1])); 3257} 3258 3259/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of 3260 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true, 3261 guarantee that the sequence 3262 set (OP[0] OP[2]) 3263 set (OP[1] OP[3]) 3264 is valid. Naturally, output operand ordering is little-endian. 3265 This is used by *movtf_internal and *movti_internal. */ 3266 3267void 3268alpha_split_tmode_pair (rtx operands[4], machine_mode mode, 3269 bool fixup_overlap) 3270{ 3271 switch (GET_CODE (operands[1])) 3272 { 3273 case REG: 3274 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); 3275 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1])); 3276 break; 3277 3278 case MEM: 3279 operands[3] = adjust_address (operands[1], DImode, 8); 3280 operands[2] = adjust_address (operands[1], DImode, 0); 3281 break; 3282 3283 CASE_CONST_SCALAR_INT: 3284 case CONST_DOUBLE: 3285 gcc_assert (operands[1] == CONST0_RTX (mode)); 3286 operands[2] = operands[3] = const0_rtx; 3287 break; 3288 3289 default: 3290 gcc_unreachable (); 3291 } 3292 3293 switch (GET_CODE (operands[0])) 3294 { 3295 case REG: 3296 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1); 3297 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); 3298 break; 3299 3300 case MEM: 3301 operands[1] = adjust_address (operands[0], DImode, 8); 3302 operands[0] = adjust_address (operands[0], DImode, 0); 3303 break; 3304 3305 default: 3306 gcc_unreachable (); 3307 } 3308 3309 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3])) 3310 { 3311 std::swap (operands[0], operands[1]); 3312 std::swap (operands[2], operands[3]); 3313 } 3314} 3315 3316/* Implement negtf2 or abstf2. Op0 is destination, op1 is source, 3317 op2 is a register containing the sign bit, operation is the 3318 logical operation to be performed. */ 3319 3320void 3321alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx)) 3322{ 3323 rtx high_bit = operands[2]; 3324 rtx scratch; 3325 int move; 3326 3327 alpha_split_tmode_pair (operands, TFmode, false); 3328 3329 /* Detect three flavors of operand overlap. */ 3330 move = 1; 3331 if (rtx_equal_p (operands[0], operands[2])) 3332 move = 0; 3333 else if (rtx_equal_p (operands[1], operands[2])) 3334 { 3335 if (rtx_equal_p (operands[0], high_bit)) 3336 move = 2; 3337 else 3338 move = -1; 3339 } 3340 3341 if (move < 0) 3342 emit_move_insn (operands[0], operands[2]); 3343 3344 /* ??? If the destination overlaps both source tf and high_bit, then 3345 assume source tf is dead in its entirety and use the other half 3346 for a scratch register. Otherwise "scratch" is just the proper 3347 destination register. */ 3348 scratch = operands[move < 2 ? 1 : 3]; 3349 3350 emit_insn ((*operation) (scratch, high_bit, operands[3])); 3351 3352 if (move > 0) 3353 { 3354 emit_move_insn (operands[0], operands[2]); 3355 if (move > 1) 3356 emit_move_insn (operands[1], scratch); 3357 } 3358} 3359 3360/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting 3361 unaligned data: 3362 3363 unsigned: signed: 3364 word: ldq_u r1,X(r11) ldq_u r1,X(r11) 3365 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11) 3366 lda r3,X(r11) lda r3,X+2(r11) 3367 extwl r1,r3,r1 extql r1,r3,r1 3368 extwh r2,r3,r2 extqh r2,r3,r2 3369 or r1.r2.r1 or r1,r2,r1 3370 sra r1,48,r1 3371 3372 long: ldq_u r1,X(r11) ldq_u r1,X(r11) 3373 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11) 3374 lda r3,X(r11) lda r3,X(r11) 3375 extll r1,r3,r1 extll r1,r3,r1 3376 extlh r2,r3,r2 extlh r2,r3,r2 3377 or r1.r2.r1 addl r1,r2,r1 3378 3379 quad: ldq_u r1,X(r11) 3380 ldq_u r2,X+7(r11) 3381 lda r3,X(r11) 3382 extql r1,r3,r1 3383 extqh r2,r3,r2 3384 or r1.r2.r1 3385*/ 3386 3387void 3388alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size, 3389 HOST_WIDE_INT ofs, int sign) 3390{ 3391 rtx meml, memh, addr, extl, exth, tmp, mema; 3392 machine_mode mode; 3393 3394 if (TARGET_BWX && size == 2) 3395 { 3396 meml = adjust_address (mem, QImode, ofs); 3397 memh = adjust_address (mem, QImode, ofs+1); 3398 extl = gen_reg_rtx (DImode); 3399 exth = gen_reg_rtx (DImode); 3400 emit_insn (gen_zero_extendqidi2 (extl, meml)); 3401 emit_insn (gen_zero_extendqidi2 (exth, memh)); 3402 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8), 3403 NULL, 1, OPTAB_LIB_WIDEN); 3404 addr = expand_simple_binop (DImode, IOR, extl, exth, 3405 NULL, 1, OPTAB_LIB_WIDEN); 3406 3407 if (sign && GET_MODE (tgt) != HImode) 3408 { 3409 addr = gen_lowpart (HImode, addr); 3410 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0)); 3411 } 3412 else 3413 { 3414 if (GET_MODE (tgt) != DImode) 3415 addr = gen_lowpart (GET_MODE (tgt), addr); 3416 emit_move_insn (tgt, addr); 3417 } 3418 return; 3419 } 3420 3421 meml = gen_reg_rtx (DImode); 3422 memh = gen_reg_rtx (DImode); 3423 addr = gen_reg_rtx (DImode); 3424 extl = gen_reg_rtx (DImode); 3425 exth = gen_reg_rtx (DImode); 3426 3427 mema = XEXP (mem, 0); 3428 if (GET_CODE (mema) == LO_SUM) 3429 mema = force_reg (Pmode, mema); 3430 3431 /* AND addresses cannot be in any alias set, since they may implicitly 3432 alias surrounding code. Ideally we'd have some alias set that 3433 covered all types except those with alignment 8 or higher. */ 3434 3435 tmp = change_address (mem, DImode, 3436 gen_rtx_AND (DImode, 3437 plus_constant (DImode, mema, ofs), 3438 GEN_INT (-8))); 3439 set_mem_alias_set (tmp, 0); 3440 emit_move_insn (meml, tmp); 3441 3442 tmp = change_address (mem, DImode, 3443 gen_rtx_AND (DImode, 3444 plus_constant (DImode, mema, 3445 ofs + size - 1), 3446 GEN_INT (-8))); 3447 set_mem_alias_set (tmp, 0); 3448 emit_move_insn (memh, tmp); 3449 3450 if (sign && size == 2) 3451 { 3452 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2)); 3453 3454 emit_insn (gen_extql (extl, meml, addr)); 3455 emit_insn (gen_extqh (exth, memh, addr)); 3456 3457 /* We must use tgt here for the target. Alpha-vms port fails if we use 3458 addr for the target, because addr is marked as a pointer and combine 3459 knows that pointers are always sign-extended 32-bit values. */ 3460 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN); 3461 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48), 3462 addr, 1, OPTAB_WIDEN); 3463 } 3464 else 3465 { 3466 emit_move_insn (addr, plus_constant (Pmode, mema, ofs)); 3467 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr)); 3468 switch ((int) size) 3469 { 3470 case 2: 3471 emit_insn (gen_extwh (exth, memh, addr)); 3472 mode = HImode; 3473 break; 3474 case 4: 3475 emit_insn (gen_extlh (exth, memh, addr)); 3476 mode = SImode; 3477 break; 3478 case 8: 3479 emit_insn (gen_extqh (exth, memh, addr)); 3480 mode = DImode; 3481 break; 3482 default: 3483 gcc_unreachable (); 3484 } 3485 3486 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl), 3487 gen_lowpart (mode, exth), gen_lowpart (mode, tgt), 3488 sign, OPTAB_WIDEN); 3489 } 3490 3491 if (addr != tgt) 3492 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr)); 3493} 3494 3495/* Similarly, use ins and msk instructions to perform unaligned stores. */ 3496 3497void 3498alpha_expand_unaligned_store (rtx dst, rtx src, 3499 HOST_WIDE_INT size, HOST_WIDE_INT ofs) 3500{ 3501 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta; 3502 3503 if (TARGET_BWX && size == 2) 3504 { 3505 if (src != const0_rtx) 3506 { 3507 dstl = gen_lowpart (QImode, src); 3508 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8), 3509 NULL, 1, OPTAB_LIB_WIDEN); 3510 dsth = gen_lowpart (QImode, dsth); 3511 } 3512 else 3513 dstl = dsth = const0_rtx; 3514 3515 meml = adjust_address (dst, QImode, ofs); 3516 memh = adjust_address (dst, QImode, ofs+1); 3517 3518 emit_move_insn (meml, dstl); 3519 emit_move_insn (memh, dsth); 3520 return; 3521 } 3522 3523 dstl = gen_reg_rtx (DImode); 3524 dsth = gen_reg_rtx (DImode); 3525 insl = gen_reg_rtx (DImode); 3526 insh = gen_reg_rtx (DImode); 3527 3528 dsta = XEXP (dst, 0); 3529 if (GET_CODE (dsta) == LO_SUM) 3530 dsta = force_reg (Pmode, dsta); 3531 3532 /* AND addresses cannot be in any alias set, since they may implicitly 3533 alias surrounding code. Ideally we'd have some alias set that 3534 covered all types except those with alignment 8 or higher. */ 3535 3536 meml = change_address (dst, DImode, 3537 gen_rtx_AND (DImode, 3538 plus_constant (DImode, dsta, ofs), 3539 GEN_INT (-8))); 3540 set_mem_alias_set (meml, 0); 3541 3542 memh = change_address (dst, DImode, 3543 gen_rtx_AND (DImode, 3544 plus_constant (DImode, dsta, 3545 ofs + size - 1), 3546 GEN_INT (-8))); 3547 set_mem_alias_set (memh, 0); 3548 3549 emit_move_insn (dsth, memh); 3550 emit_move_insn (dstl, meml); 3551 3552 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs)); 3553 3554 if (src != CONST0_RTX (GET_MODE (src))) 3555 { 3556 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src), 3557 GEN_INT (size*8), addr)); 3558 3559 switch ((int) size) 3560 { 3561 case 2: 3562 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr)); 3563 break; 3564 case 4: 3565 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr)); 3566 break; 3567 case 8: 3568 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr)); 3569 break; 3570 default: 3571 gcc_unreachable (); 3572 } 3573 } 3574 3575 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr)); 3576 3577 switch ((int) size) 3578 { 3579 case 2: 3580 emit_insn (gen_mskwl (dstl, dstl, addr)); 3581 break; 3582 case 4: 3583 emit_insn (gen_mskll (dstl, dstl, addr)); 3584 break; 3585 case 8: 3586 emit_insn (gen_mskql (dstl, dstl, addr)); 3587 break; 3588 default: 3589 gcc_unreachable (); 3590 } 3591 3592 if (src != CONST0_RTX (GET_MODE (src))) 3593 { 3594 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN); 3595 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN); 3596 } 3597 3598 /* Must store high before low for degenerate case of aligned. */ 3599 emit_move_insn (memh, dsth); 3600 emit_move_insn (meml, dstl); 3601} 3602 3603/* The block move code tries to maximize speed by separating loads and 3604 stores at the expense of register pressure: we load all of the data 3605 before we store it back out. There are two secondary effects worth 3606 mentioning, that this speeds copying to/from aligned and unaligned 3607 buffers, and that it makes the code significantly easier to write. */ 3608 3609#define MAX_MOVE_WORDS 8 3610 3611/* Load an integral number of consecutive unaligned quadwords. */ 3612 3613static void 3614alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem, 3615 HOST_WIDE_INT words, HOST_WIDE_INT ofs) 3616{ 3617 rtx const im8 = GEN_INT (-8); 3618 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1]; 3619 rtx sreg, areg, tmp, smema; 3620 HOST_WIDE_INT i; 3621 3622 smema = XEXP (smem, 0); 3623 if (GET_CODE (smema) == LO_SUM) 3624 smema = force_reg (Pmode, smema); 3625 3626 /* Generate all the tmp registers we need. */ 3627 for (i = 0; i < words; ++i) 3628 { 3629 data_regs[i] = out_regs[i]; 3630 ext_tmps[i] = gen_reg_rtx (DImode); 3631 } 3632 data_regs[words] = gen_reg_rtx (DImode); 3633 3634 if (ofs != 0) 3635 smem = adjust_address (smem, GET_MODE (smem), ofs); 3636 3637 /* Load up all of the source data. */ 3638 for (i = 0; i < words; ++i) 3639 { 3640 tmp = change_address (smem, DImode, 3641 gen_rtx_AND (DImode, 3642 plus_constant (DImode, smema, 8*i), 3643 im8)); 3644 set_mem_alias_set (tmp, 0); 3645 emit_move_insn (data_regs[i], tmp); 3646 } 3647 3648 tmp = change_address (smem, DImode, 3649 gen_rtx_AND (DImode, 3650 plus_constant (DImode, smema, 3651 8*words - 1), 3652 im8)); 3653 set_mem_alias_set (tmp, 0); 3654 emit_move_insn (data_regs[words], tmp); 3655 3656 /* Extract the half-word fragments. Unfortunately DEC decided to make 3657 extxh with offset zero a noop instead of zeroing the register, so 3658 we must take care of that edge condition ourselves with cmov. */ 3659 3660 sreg = copy_addr_to_reg (smema); 3661 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL, 3662 1, OPTAB_WIDEN); 3663 for (i = 0; i < words; ++i) 3664 { 3665 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg)); 3666 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg)); 3667 emit_insn (gen_rtx_SET (ext_tmps[i], 3668 gen_rtx_IF_THEN_ELSE (DImode, 3669 gen_rtx_EQ (DImode, areg, 3670 const0_rtx), 3671 const0_rtx, ext_tmps[i]))); 3672 } 3673 3674 /* Merge the half-words into whole words. */ 3675 for (i = 0; i < words; ++i) 3676 { 3677 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i], 3678 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN); 3679 } 3680} 3681 3682/* Store an integral number of consecutive unaligned quadwords. DATA_REGS 3683 may be NULL to store zeros. */ 3684 3685static void 3686alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem, 3687 HOST_WIDE_INT words, HOST_WIDE_INT ofs) 3688{ 3689 rtx const im8 = GEN_INT (-8); 3690 rtx ins_tmps[MAX_MOVE_WORDS]; 3691 rtx st_tmp_1, st_tmp_2, dreg; 3692 rtx st_addr_1, st_addr_2, dmema; 3693 HOST_WIDE_INT i; 3694 3695 dmema = XEXP (dmem, 0); 3696 if (GET_CODE (dmema) == LO_SUM) 3697 dmema = force_reg (Pmode, dmema); 3698 3699 /* Generate all the tmp registers we need. */ 3700 if (data_regs != NULL) 3701 for (i = 0; i < words; ++i) 3702 ins_tmps[i] = gen_reg_rtx(DImode); 3703 st_tmp_1 = gen_reg_rtx(DImode); 3704 st_tmp_2 = gen_reg_rtx(DImode); 3705 3706 if (ofs != 0) 3707 dmem = adjust_address (dmem, GET_MODE (dmem), ofs); 3708 3709 st_addr_2 = change_address (dmem, DImode, 3710 gen_rtx_AND (DImode, 3711 plus_constant (DImode, dmema, 3712 words*8 - 1), 3713 im8)); 3714 set_mem_alias_set (st_addr_2, 0); 3715 3716 st_addr_1 = change_address (dmem, DImode, 3717 gen_rtx_AND (DImode, dmema, im8)); 3718 set_mem_alias_set (st_addr_1, 0); 3719 3720 /* Load up the destination end bits. */ 3721 emit_move_insn (st_tmp_2, st_addr_2); 3722 emit_move_insn (st_tmp_1, st_addr_1); 3723 3724 /* Shift the input data into place. */ 3725 dreg = copy_addr_to_reg (dmema); 3726 if (data_regs != NULL) 3727 { 3728 for (i = words-1; i >= 0; --i) 3729 { 3730 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg)); 3731 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg)); 3732 } 3733 for (i = words-1; i > 0; --i) 3734 { 3735 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i], 3736 ins_tmps[i-1], ins_tmps[i-1], 1, 3737 OPTAB_WIDEN); 3738 } 3739 } 3740 3741 /* Split and merge the ends with the destination data. */ 3742 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg)); 3743 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg)); 3744 3745 if (data_regs != NULL) 3746 { 3747 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1], 3748 st_tmp_2, 1, OPTAB_WIDEN); 3749 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0], 3750 st_tmp_1, 1, OPTAB_WIDEN); 3751 } 3752 3753 /* Store it all. */ 3754 emit_move_insn (st_addr_2, st_tmp_2); 3755 for (i = words-1; i > 0; --i) 3756 { 3757 rtx tmp = change_address (dmem, DImode, 3758 gen_rtx_AND (DImode, 3759 plus_constant (DImode, 3760 dmema, i*8), 3761 im8)); 3762 set_mem_alias_set (tmp, 0); 3763 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx); 3764 } 3765 emit_move_insn (st_addr_1, st_tmp_1); 3766} 3767 3768 3769/* Expand string/block move operations. 3770 3771 operands[0] is the pointer to the destination. 3772 operands[1] is the pointer to the source. 3773 operands[2] is the number of bytes to move. 3774 operands[3] is the alignment. */ 3775 3776int 3777alpha_expand_block_move (rtx operands[]) 3778{ 3779 rtx bytes_rtx = operands[2]; 3780 rtx align_rtx = operands[3]; 3781 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); 3782 HOST_WIDE_INT bytes = orig_bytes; 3783 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT; 3784 HOST_WIDE_INT dst_align = src_align; 3785 rtx orig_src = operands[1]; 3786 rtx orig_dst = operands[0]; 3787 rtx data_regs[2 * MAX_MOVE_WORDS + 16]; 3788 rtx tmp; 3789 unsigned int i, words, ofs, nregs = 0; 3790 3791 if (orig_bytes <= 0) 3792 return 1; 3793 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) 3794 return 0; 3795 3796 /* Look for additional alignment information from recorded register info. */ 3797 3798 tmp = XEXP (orig_src, 0); 3799 if (REG_P (tmp)) 3800 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp))); 3801 else if (GET_CODE (tmp) == PLUS 3802 && REG_P (XEXP (tmp, 0)) 3803 && CONST_INT_P (XEXP (tmp, 1))) 3804 { 3805 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); 3806 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); 3807 3808 if (a > src_align) 3809 { 3810 if (a >= 64 && c % 8 == 0) 3811 src_align = 64; 3812 else if (a >= 32 && c % 4 == 0) 3813 src_align = 32; 3814 else if (a >= 16 && c % 2 == 0) 3815 src_align = 16; 3816 } 3817 } 3818 3819 tmp = XEXP (orig_dst, 0); 3820 if (REG_P (tmp)) 3821 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp))); 3822 else if (GET_CODE (tmp) == PLUS 3823 && REG_P (XEXP (tmp, 0)) 3824 && CONST_INT_P (XEXP (tmp, 1))) 3825 { 3826 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); 3827 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); 3828 3829 if (a > dst_align) 3830 { 3831 if (a >= 64 && c % 8 == 0) 3832 dst_align = 64; 3833 else if (a >= 32 && c % 4 == 0) 3834 dst_align = 32; 3835 else if (a >= 16 && c % 2 == 0) 3836 dst_align = 16; 3837 } 3838 } 3839 3840 ofs = 0; 3841 if (src_align >= 64 && bytes >= 8) 3842 { 3843 words = bytes / 8; 3844 3845 for (i = 0; i < words; ++i) 3846 data_regs[nregs + i] = gen_reg_rtx (DImode); 3847 3848 for (i = 0; i < words; ++i) 3849 emit_move_insn (data_regs[nregs + i], 3850 adjust_address (orig_src, DImode, ofs + i * 8)); 3851 3852 nregs += words; 3853 bytes -= words * 8; 3854 ofs += words * 8; 3855 } 3856 3857 if (src_align >= 32 && bytes >= 4) 3858 { 3859 words = bytes / 4; 3860 3861 for (i = 0; i < words; ++i) 3862 data_regs[nregs + i] = gen_reg_rtx (SImode); 3863 3864 for (i = 0; i < words; ++i) 3865 emit_move_insn (data_regs[nregs + i], 3866 adjust_address (orig_src, SImode, ofs + i * 4)); 3867 3868 nregs += words; 3869 bytes -= words * 4; 3870 ofs += words * 4; 3871 } 3872 3873 if (bytes >= 8) 3874 { 3875 words = bytes / 8; 3876 3877 for (i = 0; i < words+1; ++i) 3878 data_regs[nregs + i] = gen_reg_rtx (DImode); 3879 3880 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src, 3881 words, ofs); 3882 3883 nregs += words; 3884 bytes -= words * 8; 3885 ofs += words * 8; 3886 } 3887 3888 if (! TARGET_BWX && bytes >= 4) 3889 { 3890 data_regs[nregs++] = tmp = gen_reg_rtx (SImode); 3891 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0); 3892 bytes -= 4; 3893 ofs += 4; 3894 } 3895 3896 if (bytes >= 2) 3897 { 3898 if (src_align >= 16) 3899 { 3900 do { 3901 data_regs[nregs++] = tmp = gen_reg_rtx (HImode); 3902 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs)); 3903 bytes -= 2; 3904 ofs += 2; 3905 } while (bytes >= 2); 3906 } 3907 else if (! TARGET_BWX) 3908 { 3909 data_regs[nregs++] = tmp = gen_reg_rtx (HImode); 3910 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0); 3911 bytes -= 2; 3912 ofs += 2; 3913 } 3914 } 3915 3916 while (bytes > 0) 3917 { 3918 data_regs[nregs++] = tmp = gen_reg_rtx (QImode); 3919 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs)); 3920 bytes -= 1; 3921 ofs += 1; 3922 } 3923 3924 gcc_assert (nregs <= ARRAY_SIZE (data_regs)); 3925 3926 /* Now save it back out again. */ 3927 3928 i = 0, ofs = 0; 3929 3930 /* Write out the data in whatever chunks reading the source allowed. */ 3931 if (dst_align >= 64) 3932 { 3933 while (i < nregs && GET_MODE (data_regs[i]) == DImode) 3934 { 3935 emit_move_insn (adjust_address (orig_dst, DImode, ofs), 3936 data_regs[i]); 3937 ofs += 8; 3938 i++; 3939 } 3940 } 3941 3942 if (dst_align >= 32) 3943 { 3944 /* If the source has remaining DImode regs, write them out in 3945 two pieces. */ 3946 while (i < nregs && GET_MODE (data_regs[i]) == DImode) 3947 { 3948 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32), 3949 NULL_RTX, 1, OPTAB_WIDEN); 3950 3951 emit_move_insn (adjust_address (orig_dst, SImode, ofs), 3952 gen_lowpart (SImode, data_regs[i])); 3953 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4), 3954 gen_lowpart (SImode, tmp)); 3955 ofs += 8; 3956 i++; 3957 } 3958 3959 while (i < nregs && GET_MODE (data_regs[i]) == SImode) 3960 { 3961 emit_move_insn (adjust_address (orig_dst, SImode, ofs), 3962 data_regs[i]); 3963 ofs += 4; 3964 i++; 3965 } 3966 } 3967 3968 if (i < nregs && GET_MODE (data_regs[i]) == DImode) 3969 { 3970 /* Write out a remaining block of words using unaligned methods. */ 3971 3972 for (words = 1; i + words < nregs; words++) 3973 if (GET_MODE (data_regs[i + words]) != DImode) 3974 break; 3975 3976 if (words == 1) 3977 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs); 3978 else 3979 alpha_expand_unaligned_store_words (data_regs + i, orig_dst, 3980 words, ofs); 3981 3982 i += words; 3983 ofs += words * 8; 3984 } 3985 3986 /* Due to the above, this won't be aligned. */ 3987 /* ??? If we have more than one of these, consider constructing full 3988 words in registers and using alpha_expand_unaligned_store_words. */ 3989 while (i < nregs && GET_MODE (data_regs[i]) == SImode) 3990 { 3991 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); 3992 ofs += 4; 3993 i++; 3994 } 3995 3996 if (dst_align >= 16) 3997 while (i < nregs && GET_MODE (data_regs[i]) == HImode) 3998 { 3999 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]); 4000 i++; 4001 ofs += 2; 4002 } 4003 else 4004 while (i < nregs && GET_MODE (data_regs[i]) == HImode) 4005 { 4006 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs); 4007 i++; 4008 ofs += 2; 4009 } 4010 4011 /* The remainder must be byte copies. */ 4012 while (i < nregs) 4013 { 4014 gcc_assert (GET_MODE (data_regs[i]) == QImode); 4015 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]); 4016 i++; 4017 ofs += 1; 4018 } 4019 4020 return 1; 4021} 4022 4023int 4024alpha_expand_block_clear (rtx operands[]) 4025{ 4026 rtx bytes_rtx = operands[1]; 4027 rtx align_rtx = operands[3]; 4028 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); 4029 HOST_WIDE_INT bytes = orig_bytes; 4030 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT; 4031 HOST_WIDE_INT alignofs = 0; 4032 rtx orig_dst = operands[0]; 4033 rtx tmp; 4034 int i, words, ofs = 0; 4035 4036 if (orig_bytes <= 0) 4037 return 1; 4038 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) 4039 return 0; 4040 4041 /* Look for stricter alignment. */ 4042 tmp = XEXP (orig_dst, 0); 4043 if (REG_P (tmp)) 4044 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp))); 4045 else if (GET_CODE (tmp) == PLUS 4046 && REG_P (XEXP (tmp, 0)) 4047 && CONST_INT_P (XEXP (tmp, 1))) 4048 { 4049 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); 4050 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); 4051 4052 if (a > align) 4053 { 4054 if (a >= 64) 4055 align = a, alignofs = 8 - c % 8; 4056 else if (a >= 32) 4057 align = a, alignofs = 4 - c % 4; 4058 else if (a >= 16) 4059 align = a, alignofs = 2 - c % 2; 4060 } 4061 } 4062 4063 /* Handle an unaligned prefix first. */ 4064 4065 if (alignofs > 0) 4066 { 4067 /* Given that alignofs is bounded by align, the only time BWX could 4068 generate three stores is for a 7 byte fill. Prefer two individual 4069 stores over a load/mask/store sequence. */ 4070 if ((!TARGET_BWX || alignofs == 7) 4071 && align >= 32 4072 && !(alignofs == 4 && bytes >= 4)) 4073 { 4074 machine_mode mode = (align >= 64 ? DImode : SImode); 4075 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs; 4076 rtx mem, tmp; 4077 HOST_WIDE_INT mask; 4078 4079 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs); 4080 set_mem_alias_set (mem, 0); 4081 4082 mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8)); 4083 if (bytes < alignofs) 4084 { 4085 mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8); 4086 ofs += bytes; 4087 bytes = 0; 4088 } 4089 else 4090 { 4091 bytes -= alignofs; 4092 ofs += alignofs; 4093 } 4094 alignofs = 0; 4095 4096 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), 4097 NULL_RTX, 1, OPTAB_WIDEN); 4098 4099 emit_move_insn (mem, tmp); 4100 } 4101 4102 if (TARGET_BWX && (alignofs & 1) && bytes >= 1) 4103 { 4104 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); 4105 bytes -= 1; 4106 ofs += 1; 4107 alignofs -= 1; 4108 } 4109 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2) 4110 { 4111 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx); 4112 bytes -= 2; 4113 ofs += 2; 4114 alignofs -= 2; 4115 } 4116 if (alignofs == 4 && bytes >= 4) 4117 { 4118 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); 4119 bytes -= 4; 4120 ofs += 4; 4121 alignofs = 0; 4122 } 4123 4124 /* If we've not used the extra lead alignment information by now, 4125 we won't be able to. Downgrade align to match what's left over. */ 4126 if (alignofs > 0) 4127 { 4128 alignofs = alignofs & -alignofs; 4129 align = MIN (align, alignofs * BITS_PER_UNIT); 4130 } 4131 } 4132 4133 /* Handle a block of contiguous long-words. */ 4134 4135 if (align >= 64 && bytes >= 8) 4136 { 4137 words = bytes / 8; 4138 4139 for (i = 0; i < words; ++i) 4140 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8), 4141 const0_rtx); 4142 4143 bytes -= words * 8; 4144 ofs += words * 8; 4145 } 4146 4147 /* If the block is large and appropriately aligned, emit a single 4148 store followed by a sequence of stq_u insns. */ 4149 4150 if (align >= 32 && bytes > 16) 4151 { 4152 rtx orig_dsta; 4153 4154 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); 4155 bytes -= 4; 4156 ofs += 4; 4157 4158 orig_dsta = XEXP (orig_dst, 0); 4159 if (GET_CODE (orig_dsta) == LO_SUM) 4160 orig_dsta = force_reg (Pmode, orig_dsta); 4161 4162 words = bytes / 8; 4163 for (i = 0; i < words; ++i) 4164 { 4165 rtx mem 4166 = change_address (orig_dst, DImode, 4167 gen_rtx_AND (DImode, 4168 plus_constant (DImode, orig_dsta, 4169 ofs + i*8), 4170 GEN_INT (-8))); 4171 set_mem_alias_set (mem, 0); 4172 emit_move_insn (mem, const0_rtx); 4173 } 4174 4175 /* Depending on the alignment, the first stq_u may have overlapped 4176 with the initial stl, which means that the last stq_u didn't 4177 write as much as it would appear. Leave those questionable bytes 4178 unaccounted for. */ 4179 bytes -= words * 8 - 4; 4180 ofs += words * 8 - 4; 4181 } 4182 4183 /* Handle a smaller block of aligned words. */ 4184 4185 if ((align >= 64 && bytes == 4) 4186 || (align == 32 && bytes >= 4)) 4187 { 4188 words = bytes / 4; 4189 4190 for (i = 0; i < words; ++i) 4191 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4), 4192 const0_rtx); 4193 4194 bytes -= words * 4; 4195 ofs += words * 4; 4196 } 4197 4198 /* An unaligned block uses stq_u stores for as many as possible. */ 4199 4200 if (bytes >= 8) 4201 { 4202 words = bytes / 8; 4203 4204 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs); 4205 4206 bytes -= words * 8; 4207 ofs += words * 8; 4208 } 4209 4210 /* Next clean up any trailing pieces. */ 4211 4212 /* Count the number of bits in BYTES for which aligned stores could 4213 be emitted. */ 4214 words = 0; 4215 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1) 4216 if (bytes & i) 4217 words += 1; 4218 4219 /* If we have appropriate alignment (and it wouldn't take too many 4220 instructions otherwise), mask out the bytes we need. */ 4221 if (TARGET_BWX ? words > 2 : bytes > 0) 4222 { 4223 if (align >= 64) 4224 { 4225 rtx mem, tmp; 4226 HOST_WIDE_INT mask; 4227 4228 mem = adjust_address (orig_dst, DImode, ofs); 4229 set_mem_alias_set (mem, 0); 4230 4231 mask = HOST_WIDE_INT_M1U << (bytes * 8); 4232 4233 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask), 4234 NULL_RTX, 1, OPTAB_WIDEN); 4235 4236 emit_move_insn (mem, tmp); 4237 return 1; 4238 } 4239 else if (align >= 32 && bytes < 4) 4240 { 4241 rtx mem, tmp; 4242 HOST_WIDE_INT mask; 4243 4244 mem = adjust_address (orig_dst, SImode, ofs); 4245 set_mem_alias_set (mem, 0); 4246 4247 mask = HOST_WIDE_INT_M1U << (bytes * 8); 4248 4249 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask), 4250 NULL_RTX, 1, OPTAB_WIDEN); 4251 4252 emit_move_insn (mem, tmp); 4253 return 1; 4254 } 4255 } 4256 4257 if (!TARGET_BWX && bytes >= 4) 4258 { 4259 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs); 4260 bytes -= 4; 4261 ofs += 4; 4262 } 4263 4264 if (bytes >= 2) 4265 { 4266 if (align >= 16) 4267 { 4268 do { 4269 emit_move_insn (adjust_address (orig_dst, HImode, ofs), 4270 const0_rtx); 4271 bytes -= 2; 4272 ofs += 2; 4273 } while (bytes >= 2); 4274 } 4275 else if (! TARGET_BWX) 4276 { 4277 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs); 4278 bytes -= 2; 4279 ofs += 2; 4280 } 4281 } 4282 4283 while (bytes > 0) 4284 { 4285 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); 4286 bytes -= 1; 4287 ofs += 1; 4288 } 4289 4290 return 1; 4291} 4292 4293/* Returns a mask so that zap(x, value) == x & mask. */ 4294 4295rtx 4296alpha_expand_zap_mask (HOST_WIDE_INT value) 4297{ 4298 rtx result; 4299 int i; 4300 HOST_WIDE_INT mask = 0; 4301 4302 for (i = 7; i >= 0; --i) 4303 { 4304 mask <<= 8; 4305 if (!((value >> i) & 1)) 4306 mask |= 0xff; 4307 } 4308 4309 result = gen_int_mode (mask, DImode); 4310 return result; 4311} 4312 4313void 4314alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx), 4315 machine_mode mode, 4316 rtx op0, rtx op1, rtx op2) 4317{ 4318 op0 = gen_lowpart (mode, op0); 4319 4320 if (op1 == const0_rtx) 4321 op1 = CONST0_RTX (mode); 4322 else 4323 op1 = gen_lowpart (mode, op1); 4324 4325 if (op2 == const0_rtx) 4326 op2 = CONST0_RTX (mode); 4327 else 4328 op2 = gen_lowpart (mode, op2); 4329 4330 emit_insn ((*gen) (op0, op1, op2)); 4331} 4332 4333/* A subroutine of the atomic operation splitters. Jump to LABEL if 4334 COND is true. Mark the jump as unlikely to be taken. */ 4335 4336static void 4337emit_unlikely_jump (rtx cond, rtx label) 4338{ 4339 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); 4340 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x)); 4341 add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); 4342} 4343 4344/* Subroutines of the atomic operation splitters. Emit barriers 4345 as needed for the memory MODEL. */ 4346 4347static void 4348alpha_pre_atomic_barrier (enum memmodel model) 4349{ 4350 if (need_atomic_barrier_p (model, true)) 4351 emit_insn (gen_memory_barrier ()); 4352} 4353 4354static void 4355alpha_post_atomic_barrier (enum memmodel model) 4356{ 4357 if (need_atomic_barrier_p (model, false)) 4358 emit_insn (gen_memory_barrier ()); 4359} 4360 4361/* A subroutine of the atomic operation splitters. Emit an insxl 4362 instruction in MODE. */ 4363 4364static rtx 4365emit_insxl (machine_mode mode, rtx op1, rtx op2) 4366{ 4367 rtx ret = gen_reg_rtx (DImode); 4368 rtx (*fn) (rtx, rtx, rtx); 4369 4370 switch (mode) 4371 { 4372 case E_QImode: 4373 fn = gen_insbl; 4374 break; 4375 case E_HImode: 4376 fn = gen_inswl; 4377 break; 4378 case E_SImode: 4379 fn = gen_insll; 4380 break; 4381 case E_DImode: 4382 fn = gen_insql; 4383 break; 4384 default: 4385 gcc_unreachable (); 4386 } 4387 4388 op1 = force_reg (mode, op1); 4389 emit_insn (fn (ret, op1, op2)); 4390 4391 return ret; 4392} 4393 4394/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation 4395 to perform. MEM is the memory on which to operate. VAL is the second 4396 operand of the binary operator. BEFORE and AFTER are optional locations to 4397 return the value of MEM either before of after the operation. SCRATCH is 4398 a scratch register. */ 4399 4400void 4401alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before, 4402 rtx after, rtx scratch, enum memmodel model) 4403{ 4404 machine_mode mode = GET_MODE (mem); 4405 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch)); 4406 4407 alpha_pre_atomic_barrier (model); 4408 4409 label = gen_label_rtx (); 4410 emit_label (label); 4411 label = gen_rtx_LABEL_REF (DImode, label); 4412 4413 if (before == NULL) 4414 before = scratch; 4415 emit_insn (gen_load_locked (mode, before, mem)); 4416 4417 if (code == NOT) 4418 { 4419 x = gen_rtx_AND (mode, before, val); 4420 emit_insn (gen_rtx_SET (val, x)); 4421 4422 x = gen_rtx_NOT (mode, val); 4423 } 4424 else 4425 x = gen_rtx_fmt_ee (code, mode, before, val); 4426 if (after) 4427 emit_insn (gen_rtx_SET (after, copy_rtx (x))); 4428 emit_insn (gen_rtx_SET (scratch, x)); 4429 4430 emit_insn (gen_store_conditional (mode, cond, mem, scratch)); 4431 4432 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4433 emit_unlikely_jump (x, label); 4434 4435 alpha_post_atomic_barrier (model); 4436} 4437 4438/* Expand a compare and swap operation. */ 4439 4440void 4441alpha_split_compare_and_swap (rtx operands[]) 4442{ 4443 rtx cond, retval, mem, oldval, newval; 4444 bool is_weak; 4445 enum memmodel mod_s, mod_f; 4446 machine_mode mode; 4447 rtx label1, label2, x; 4448 4449 cond = operands[0]; 4450 retval = operands[1]; 4451 mem = operands[2]; 4452 oldval = operands[3]; 4453 newval = operands[4]; 4454 is_weak = (operands[5] != const0_rtx); 4455 mod_s = memmodel_from_int (INTVAL (operands[6])); 4456 mod_f = memmodel_from_int (INTVAL (operands[7])); 4457 mode = GET_MODE (mem); 4458 4459 alpha_pre_atomic_barrier (mod_s); 4460 4461 label1 = NULL_RTX; 4462 if (!is_weak) 4463 { 4464 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4465 emit_label (XEXP (label1, 0)); 4466 } 4467 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4468 4469 emit_insn (gen_load_locked (mode, retval, mem)); 4470 4471 x = gen_lowpart (DImode, retval); 4472 if (oldval == const0_rtx) 4473 { 4474 emit_move_insn (cond, const0_rtx); 4475 x = gen_rtx_NE (DImode, x, const0_rtx); 4476 } 4477 else 4478 { 4479 x = gen_rtx_EQ (DImode, x, oldval); 4480 emit_insn (gen_rtx_SET (cond, x)); 4481 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4482 } 4483 emit_unlikely_jump (x, label2); 4484 4485 emit_move_insn (cond, newval); 4486 emit_insn (gen_store_conditional 4487 (mode, cond, mem, gen_lowpart (mode, cond))); 4488 4489 if (!is_weak) 4490 { 4491 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4492 emit_unlikely_jump (x, label1); 4493 } 4494 4495 if (!is_mm_relaxed (mod_f)) 4496 emit_label (XEXP (label2, 0)); 4497 4498 alpha_post_atomic_barrier (mod_s); 4499 4500 if (is_mm_relaxed (mod_f)) 4501 emit_label (XEXP (label2, 0)); 4502} 4503 4504void 4505alpha_expand_compare_and_swap_12 (rtx operands[]) 4506{ 4507 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f; 4508 machine_mode mode; 4509 rtx addr, align, wdst; 4510 4511 cond = operands[0]; 4512 dst = operands[1]; 4513 mem = operands[2]; 4514 oldval = operands[3]; 4515 newval = operands[4]; 4516 is_weak = operands[5]; 4517 mod_s = operands[6]; 4518 mod_f = operands[7]; 4519 mode = GET_MODE (mem); 4520 4521 /* We forced the address into a register via mem_noofs_operand. */ 4522 addr = XEXP (mem, 0); 4523 gcc_assert (register_operand (addr, DImode)); 4524 4525 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), 4526 NULL_RTX, 1, OPTAB_DIRECT); 4527 4528 oldval = convert_modes (DImode, mode, oldval, 1); 4529 4530 if (newval != const0_rtx) 4531 newval = emit_insxl (mode, newval, addr); 4532 4533 wdst = gen_reg_rtx (DImode); 4534 emit_insn (gen_atomic_compare_and_swap_1 4535 (mode, cond, wdst, mem, oldval, newval, align, 4536 is_weak, mod_s, mod_f)); 4537 4538 emit_move_insn (dst, gen_lowpart (mode, wdst)); 4539} 4540 4541void 4542alpha_split_compare_and_swap_12 (rtx operands[]) 4543{ 4544 rtx cond, dest, orig_mem, oldval, newval, align, scratch; 4545 machine_mode mode; 4546 bool is_weak; 4547 enum memmodel mod_s, mod_f; 4548 rtx label1, label2, mem, addr, width, mask, x; 4549 4550 cond = operands[0]; 4551 dest = operands[1]; 4552 orig_mem = operands[2]; 4553 oldval = operands[3]; 4554 newval = operands[4]; 4555 align = operands[5]; 4556 is_weak = (operands[6] != const0_rtx); 4557 mod_s = memmodel_from_int (INTVAL (operands[7])); 4558 mod_f = memmodel_from_int (INTVAL (operands[8])); 4559 scratch = operands[9]; 4560 mode = GET_MODE (orig_mem); 4561 addr = XEXP (orig_mem, 0); 4562 4563 mem = gen_rtx_MEM (DImode, align); 4564 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); 4565 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) 4566 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); 4567 4568 alpha_pre_atomic_barrier (mod_s); 4569 4570 label1 = NULL_RTX; 4571 if (!is_weak) 4572 { 4573 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4574 emit_label (XEXP (label1, 0)); 4575 } 4576 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4577 4578 emit_insn (gen_load_locked (DImode, scratch, mem)); 4579 4580 width = GEN_INT (GET_MODE_BITSIZE (mode)); 4581 mask = GEN_INT (mode == QImode ? 0xff : 0xffff); 4582 emit_insn (gen_extxl (dest, scratch, width, addr)); 4583 4584 if (oldval == const0_rtx) 4585 { 4586 emit_move_insn (cond, const0_rtx); 4587 x = gen_rtx_NE (DImode, dest, const0_rtx); 4588 } 4589 else 4590 { 4591 x = gen_rtx_EQ (DImode, dest, oldval); 4592 emit_insn (gen_rtx_SET (cond, x)); 4593 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4594 } 4595 emit_unlikely_jump (x, label2); 4596 4597 emit_insn (gen_mskxl (cond, scratch, mask, addr)); 4598 4599 if (newval != const0_rtx) 4600 emit_insn (gen_iordi3 (cond, cond, newval)); 4601 4602 emit_insn (gen_store_conditional (DImode, cond, mem, cond)); 4603 4604 if (!is_weak) 4605 { 4606 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4607 emit_unlikely_jump (x, label1); 4608 } 4609 4610 if (!is_mm_relaxed (mod_f)) 4611 emit_label (XEXP (label2, 0)); 4612 4613 alpha_post_atomic_barrier (mod_s); 4614 4615 if (is_mm_relaxed (mod_f)) 4616 emit_label (XEXP (label2, 0)); 4617} 4618 4619/* Expand an atomic exchange operation. */ 4620 4621void 4622alpha_split_atomic_exchange (rtx operands[]) 4623{ 4624 rtx retval, mem, val, scratch; 4625 enum memmodel model; 4626 machine_mode mode; 4627 rtx label, x, cond; 4628 4629 retval = operands[0]; 4630 mem = operands[1]; 4631 val = operands[2]; 4632 model = (enum memmodel) INTVAL (operands[3]); 4633 scratch = operands[4]; 4634 mode = GET_MODE (mem); 4635 cond = gen_lowpart (DImode, scratch); 4636 4637 alpha_pre_atomic_barrier (model); 4638 4639 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4640 emit_label (XEXP (label, 0)); 4641 4642 emit_insn (gen_load_locked (mode, retval, mem)); 4643 emit_move_insn (scratch, val); 4644 emit_insn (gen_store_conditional (mode, cond, mem, scratch)); 4645 4646 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4647 emit_unlikely_jump (x, label); 4648 4649 alpha_post_atomic_barrier (model); 4650} 4651 4652void 4653alpha_expand_atomic_exchange_12 (rtx operands[]) 4654{ 4655 rtx dst, mem, val, model; 4656 machine_mode mode; 4657 rtx addr, align, wdst; 4658 4659 dst = operands[0]; 4660 mem = operands[1]; 4661 val = operands[2]; 4662 model = operands[3]; 4663 mode = GET_MODE (mem); 4664 4665 /* We forced the address into a register via mem_noofs_operand. */ 4666 addr = XEXP (mem, 0); 4667 gcc_assert (register_operand (addr, DImode)); 4668 4669 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), 4670 NULL_RTX, 1, OPTAB_DIRECT); 4671 4672 /* Insert val into the correct byte location within the word. */ 4673 if (val != const0_rtx) 4674 val = emit_insxl (mode, val, addr); 4675 4676 wdst = gen_reg_rtx (DImode); 4677 emit_insn (gen_atomic_exchange_1 (mode, wdst, mem, val, align, model)); 4678 4679 emit_move_insn (dst, gen_lowpart (mode, wdst)); 4680} 4681 4682void 4683alpha_split_atomic_exchange_12 (rtx operands[]) 4684{ 4685 rtx dest, orig_mem, addr, val, align, scratch; 4686 rtx label, mem, width, mask, x; 4687 machine_mode mode; 4688 enum memmodel model; 4689 4690 dest = operands[0]; 4691 orig_mem = operands[1]; 4692 val = operands[2]; 4693 align = operands[3]; 4694 model = (enum memmodel) INTVAL (operands[4]); 4695 scratch = operands[5]; 4696 mode = GET_MODE (orig_mem); 4697 addr = XEXP (orig_mem, 0); 4698 4699 mem = gen_rtx_MEM (DImode, align); 4700 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); 4701 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) 4702 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); 4703 4704 alpha_pre_atomic_barrier (model); 4705 4706 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4707 emit_label (XEXP (label, 0)); 4708 4709 emit_insn (gen_load_locked (DImode, scratch, mem)); 4710 4711 width = GEN_INT (GET_MODE_BITSIZE (mode)); 4712 mask = GEN_INT (mode == QImode ? 0xff : 0xffff); 4713 emit_insn (gen_extxl (dest, scratch, width, addr)); 4714 emit_insn (gen_mskxl (scratch, scratch, mask, addr)); 4715 if (val != const0_rtx) 4716 emit_insn (gen_iordi3 (scratch, scratch, val)); 4717 4718 emit_insn (gen_store_conditional (DImode, scratch, mem, scratch)); 4719 4720 x = gen_rtx_EQ (DImode, scratch, const0_rtx); 4721 emit_unlikely_jump (x, label); 4722 4723 alpha_post_atomic_barrier (model); 4724} 4725 4726/* Adjust the cost of a scheduling dependency. Return the new cost of 4727 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 4728 4729static int 4730alpha_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, 4731 unsigned int) 4732{ 4733 enum attr_type dep_insn_type; 4734 4735 /* If the dependence is an anti-dependence, there is no cost. For an 4736 output dependence, there is sometimes a cost, but it doesn't seem 4737 worth handling those few cases. */ 4738 if (dep_type != 0) 4739 return cost; 4740 4741 /* If we can't recognize the insns, we can't really do anything. */ 4742 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 4743 return cost; 4744 4745 dep_insn_type = get_attr_type (dep_insn); 4746 4747 /* Bring in the user-defined memory latency. */ 4748 if (dep_insn_type == TYPE_ILD 4749 || dep_insn_type == TYPE_FLD 4750 || dep_insn_type == TYPE_LDSYM) 4751 cost += alpha_memory_latency-1; 4752 4753 /* Everything else handled in DFA bypasses now. */ 4754 4755 return cost; 4756} 4757 4758/* The number of instructions that can be issued per cycle. */ 4759 4760static int 4761alpha_issue_rate (void) 4762{ 4763 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4); 4764} 4765 4766/* How many alternative schedules to try. This should be as wide as the 4767 scheduling freedom in the DFA, but no wider. Making this value too 4768 large results extra work for the scheduler. 4769 4770 For EV4, loads can be issued to either IB0 or IB1, thus we have 2 4771 alternative schedules. For EV5, we can choose between E0/E1 and 4772 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */ 4773 4774static int 4775alpha_multipass_dfa_lookahead (void) 4776{ 4777 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2); 4778} 4779 4780/* Machine-specific function data. */ 4781 4782struct GTY(()) alpha_links; 4783 4784struct GTY(()) machine_function 4785{ 4786 unsigned HOST_WIDE_INT sa_mask; 4787 HOST_WIDE_INT sa_size; 4788 HOST_WIDE_INT frame_size; 4789 4790 /* For flag_reorder_blocks_and_partition. */ 4791 rtx gp_save_rtx; 4792 4793 /* For VMS condition handlers. */ 4794 bool uses_condition_handler; 4795 4796 /* Linkage entries. */ 4797 hash_map<nofree_string_hash, alpha_links *> *links; 4798}; 4799 4800/* How to allocate a 'struct machine_function'. */ 4801 4802static struct machine_function * 4803alpha_init_machine_status (void) 4804{ 4805 return ggc_cleared_alloc<machine_function> (); 4806} 4807 4808/* Support for frame based VMS condition handlers. */ 4809 4810/* A VMS condition handler may be established for a function with a call to 4811 __builtin_establish_vms_condition_handler, and cancelled with a call to 4812 __builtin_revert_vms_condition_handler. 4813 4814 The VMS Condition Handling Facility knows about the existence of a handler 4815 from the procedure descriptor .handler field. As the VMS native compilers, 4816 we store the user specified handler's address at a fixed location in the 4817 stack frame and point the procedure descriptor at a common wrapper which 4818 fetches the real handler's address and issues an indirect call. 4819 4820 The indirection wrapper is "__gcc_shell_handler", provided by libgcc. 4821 4822 We force the procedure kind to PT_STACK, and the fixed frame location is 4823 fp+8, just before the register save area. We use the handler_data field in 4824 the procedure descriptor to state the fp offset at which the installed 4825 handler address can be found. */ 4826 4827#define VMS_COND_HANDLER_FP_OFFSET 8 4828 4829/* Expand code to store the currently installed user VMS condition handler 4830 into TARGET and install HANDLER as the new condition handler. */ 4831 4832void 4833alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler) 4834{ 4835 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx, 4836 VMS_COND_HANDLER_FP_OFFSET); 4837 4838 rtx handler_slot 4839 = gen_rtx_MEM (DImode, handler_slot_address); 4840 4841 emit_move_insn (target, handler_slot); 4842 emit_move_insn (handler_slot, handler); 4843 4844 /* Notify the start/prologue/epilogue emitters that the condition handler 4845 slot is needed. In addition to reserving the slot space, this will force 4846 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx 4847 use above is correct. */ 4848 cfun->machine->uses_condition_handler = true; 4849} 4850 4851/* Expand code to store the current VMS condition handler into TARGET and 4852 nullify it. */ 4853 4854void 4855alpha_expand_builtin_revert_vms_condition_handler (rtx target) 4856{ 4857 /* We implement this by establishing a null condition handler, with the tiny 4858 side effect of setting uses_condition_handler. This is a little bit 4859 pessimistic if no actual builtin_establish call is ever issued, which is 4860 not a real problem and expected never to happen anyway. */ 4861 4862 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx); 4863} 4864 4865/* Functions to save and restore alpha_return_addr_rtx. */ 4866 4867/* Start the ball rolling with RETURN_ADDR_RTX. */ 4868 4869rtx 4870alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) 4871{ 4872 if (count != 0) 4873 return const0_rtx; 4874 4875 return get_hard_reg_initial_val (Pmode, REG_RA); 4876} 4877 4878/* Return or create a memory slot containing the gp value for the current 4879 function. Needed only if TARGET_LD_BUGGY_LDGP. */ 4880 4881rtx 4882alpha_gp_save_rtx (void) 4883{ 4884 rtx_insn *seq; 4885 rtx m = cfun->machine->gp_save_rtx; 4886 4887 if (m == NULL) 4888 { 4889 start_sequence (); 4890 4891 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD); 4892 m = validize_mem (m); 4893 emit_move_insn (m, pic_offset_table_rtx); 4894 4895 seq = get_insns (); 4896 end_sequence (); 4897 4898 /* We used to simply emit the sequence after entry_of_function. 4899 However this breaks the CFG if the first instruction in the 4900 first block is not the NOTE_INSN_BASIC_BLOCK, for example a 4901 label. Emit the sequence properly on the edge. We are only 4902 invoked from dw2_build_landing_pads and finish_eh_generation 4903 will call commit_edge_insertions thanks to a kludge. */ 4904 insert_insn_on_edge (seq, 4905 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 4906 4907 cfun->machine->gp_save_rtx = m; 4908 } 4909 4910 return m; 4911} 4912 4913static void 4914alpha_instantiate_decls (void) 4915{ 4916 if (cfun->machine->gp_save_rtx != NULL_RTX) 4917 instantiate_decl_rtl (cfun->machine->gp_save_rtx); 4918} 4919 4920static int 4921alpha_ra_ever_killed (void) 4922{ 4923 rtx_insn *top; 4924 4925 if (!has_hard_reg_initial_val (Pmode, REG_RA)) 4926 return (int)df_regs_ever_live_p (REG_RA); 4927 4928 push_topmost_sequence (); 4929 top = get_insns (); 4930 pop_topmost_sequence (); 4931 4932 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL); 4933} 4934 4935 4936/* Return the trap mode suffix applicable to the current 4937 instruction, or NULL. */ 4938 4939static const char * 4940get_trap_mode_suffix (void) 4941{ 4942 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn); 4943 4944 switch (s) 4945 { 4946 case TRAP_SUFFIX_NONE: 4947 return NULL; 4948 4949 case TRAP_SUFFIX_SU: 4950 if (alpha_fptm >= ALPHA_FPTM_SU) 4951 return "su"; 4952 return NULL; 4953 4954 case TRAP_SUFFIX_SUI: 4955 if (alpha_fptm >= ALPHA_FPTM_SUI) 4956 return "sui"; 4957 return NULL; 4958 4959 case TRAP_SUFFIX_V_SV: 4960 switch (alpha_fptm) 4961 { 4962 case ALPHA_FPTM_N: 4963 return NULL; 4964 case ALPHA_FPTM_U: 4965 return "v"; 4966 case ALPHA_FPTM_SU: 4967 case ALPHA_FPTM_SUI: 4968 return "sv"; 4969 default: 4970 gcc_unreachable (); 4971 } 4972 4973 case TRAP_SUFFIX_V_SV_SVI: 4974 switch (alpha_fptm) 4975 { 4976 case ALPHA_FPTM_N: 4977 return NULL; 4978 case ALPHA_FPTM_U: 4979 return "v"; 4980 case ALPHA_FPTM_SU: 4981 return "sv"; 4982 case ALPHA_FPTM_SUI: 4983 return "svi"; 4984 default: 4985 gcc_unreachable (); 4986 } 4987 break; 4988 4989 case TRAP_SUFFIX_U_SU_SUI: 4990 switch (alpha_fptm) 4991 { 4992 case ALPHA_FPTM_N: 4993 return NULL; 4994 case ALPHA_FPTM_U: 4995 return "u"; 4996 case ALPHA_FPTM_SU: 4997 return "su"; 4998 case ALPHA_FPTM_SUI: 4999 return "sui"; 5000 default: 5001 gcc_unreachable (); 5002 } 5003 break; 5004 5005 default: 5006 gcc_unreachable (); 5007 } 5008 gcc_unreachable (); 5009} 5010 5011/* Return the rounding mode suffix applicable to the current 5012 instruction, or NULL. */ 5013 5014static const char * 5015get_round_mode_suffix (void) 5016{ 5017 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn); 5018 5019 switch (s) 5020 { 5021 case ROUND_SUFFIX_NONE: 5022 return NULL; 5023 case ROUND_SUFFIX_NORMAL: 5024 switch (alpha_fprm) 5025 { 5026 case ALPHA_FPRM_NORM: 5027 return NULL; 5028 case ALPHA_FPRM_MINF: 5029 return "m"; 5030 case ALPHA_FPRM_CHOP: 5031 return "c"; 5032 case ALPHA_FPRM_DYN: 5033 return "d"; 5034 default: 5035 gcc_unreachable (); 5036 } 5037 break; 5038 5039 case ROUND_SUFFIX_C: 5040 return "c"; 5041 5042 default: 5043 gcc_unreachable (); 5044 } 5045 gcc_unreachable (); 5046} 5047 5048/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ 5049 5050static bool 5051alpha_print_operand_punct_valid_p (unsigned char code) 5052{ 5053 return (code == '/' || code == ',' || code == '-' || code == '~' 5054 || code == '#' || code == '*' || code == '&'); 5055} 5056 5057/* Implement TARGET_PRINT_OPERAND. The alpha-specific 5058 operand codes are documented below. */ 5059 5060static void 5061alpha_print_operand (FILE *file, rtx x, int code) 5062{ 5063 int i; 5064 5065 switch (code) 5066 { 5067 case '~': 5068 /* Print the assembler name of the current function. */ 5069 assemble_name (file, alpha_fnname); 5070 break; 5071 5072 case '&': 5073 if (const char *name = get_some_local_dynamic_name ()) 5074 assemble_name (file, name); 5075 else 5076 output_operand_lossage ("'%%&' used without any " 5077 "local dynamic TLS references"); 5078 break; 5079 5080 case '/': 5081 /* Generates the instruction suffix. The TRAP_SUFFIX and ROUND_SUFFIX 5082 attributes are examined to determine what is appropriate. */ 5083 { 5084 const char *trap = get_trap_mode_suffix (); 5085 const char *round = get_round_mode_suffix (); 5086 5087 if (trap || round) 5088 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : "")); 5089 break; 5090 } 5091 5092 case ',': 5093 /* Generates single precision suffix for floating point 5094 instructions (s for IEEE, f for VAX). */ 5095 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file); 5096 break; 5097 5098 case '-': 5099 /* Generates double precision suffix for floating point 5100 instructions (t for IEEE, g for VAX). */ 5101 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file); 5102 break; 5103 5104 case '#': 5105 if (alpha_this_literal_sequence_number == 0) 5106 alpha_this_literal_sequence_number = alpha_next_sequence_number++; 5107 fprintf (file, "%d", alpha_this_literal_sequence_number); 5108 break; 5109 5110 case '*': 5111 if (alpha_this_gpdisp_sequence_number == 0) 5112 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++; 5113 fprintf (file, "%d", alpha_this_gpdisp_sequence_number); 5114 break; 5115 5116 case 'J': 5117 { 5118 const char *lituse; 5119 5120 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL) 5121 { 5122 x = XVECEXP (x, 0, 0); 5123 lituse = "lituse_tlsgd"; 5124 } 5125 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL) 5126 { 5127 x = XVECEXP (x, 0, 0); 5128 lituse = "lituse_tlsldm"; 5129 } 5130 else if (CONST_INT_P (x)) 5131 lituse = "lituse_jsr"; 5132 else 5133 { 5134 output_operand_lossage ("invalid %%J value"); 5135 break; 5136 } 5137 5138 if (x != const0_rtx) 5139 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); 5140 } 5141 break; 5142 5143 case 'j': 5144 { 5145 const char *lituse; 5146 5147#ifdef HAVE_AS_JSRDIRECT_RELOCS 5148 lituse = "lituse_jsrdirect"; 5149#else 5150 lituse = "lituse_jsr"; 5151#endif 5152 5153 gcc_assert (INTVAL (x) != 0); 5154 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); 5155 } 5156 break; 5157 case 'r': 5158 /* If this operand is the constant zero, write it as "$31". */ 5159 if (REG_P (x)) 5160 fprintf (file, "%s", reg_names[REGNO (x)]); 5161 else if (x == CONST0_RTX (GET_MODE (x))) 5162 fprintf (file, "$31"); 5163 else 5164 output_operand_lossage ("invalid %%r value"); 5165 break; 5166 5167 case 'R': 5168 /* Similar, but for floating-point. */ 5169 if (REG_P (x)) 5170 fprintf (file, "%s", reg_names[REGNO (x)]); 5171 else if (x == CONST0_RTX (GET_MODE (x))) 5172 fprintf (file, "$f31"); 5173 else 5174 output_operand_lossage ("invalid %%R value"); 5175 break; 5176 5177 case 'N': 5178 /* Write the 1's complement of a constant. */ 5179 if (!CONST_INT_P (x)) 5180 output_operand_lossage ("invalid %%N value"); 5181 5182 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x)); 5183 break; 5184 5185 case 'P': 5186 /* Write 1 << C, for a constant C. */ 5187 if (!CONST_INT_P (x)) 5188 output_operand_lossage ("invalid %%P value"); 5189 5190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x)); 5191 break; 5192 5193 case 'h': 5194 /* Write the high-order 16 bits of a constant, sign-extended. */ 5195 if (!CONST_INT_P (x)) 5196 output_operand_lossage ("invalid %%h value"); 5197 5198 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16); 5199 break; 5200 5201 case 'L': 5202 /* Write the low-order 16 bits of a constant, sign-extended. */ 5203 if (!CONST_INT_P (x)) 5204 output_operand_lossage ("invalid %%L value"); 5205 5206 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 5207 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000)); 5208 break; 5209 5210 case 'm': 5211 /* Write mask for ZAP insn. */ 5212 if (CONST_INT_P (x)) 5213 { 5214 HOST_WIDE_INT mask = 0, value = INTVAL (x); 5215 5216 for (i = 0; i < 8; i++, value >>= 8) 5217 if (value & 0xff) 5218 mask |= (1 << i); 5219 5220 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask); 5221 } 5222 else 5223 output_operand_lossage ("invalid %%m value"); 5224 break; 5225 5226 case 'M': 5227 /* 'b', 'w', 'l', or 'q' as the value of the constant. */ 5228 if (!mode_width_operand (x, VOIDmode)) 5229 output_operand_lossage ("invalid %%M value"); 5230 5231 fprintf (file, "%s", 5232 (INTVAL (x) == 8 ? "b" 5233 : INTVAL (x) == 16 ? "w" 5234 : INTVAL (x) == 32 ? "l" 5235 : "q")); 5236 break; 5237 5238 case 'U': 5239 /* Similar, except do it from the mask. */ 5240 if (CONST_INT_P (x)) 5241 { 5242 HOST_WIDE_INT value = INTVAL (x); 5243 5244 if (value == 0xff) 5245 { 5246 fputc ('b', file); 5247 break; 5248 } 5249 if (value == 0xffff) 5250 { 5251 fputc ('w', file); 5252 break; 5253 } 5254 if (value == 0xffffffff) 5255 { 5256 fputc ('l', file); 5257 break; 5258 } 5259 if (value == -1) 5260 { 5261 fputc ('q', file); 5262 break; 5263 } 5264 } 5265 5266 output_operand_lossage ("invalid %%U value"); 5267 break; 5268 5269 case 's': 5270 /* Write the constant value divided by 8. */ 5271 if (!CONST_INT_P (x) 5272 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64 5273 || (INTVAL (x) & 7) != 0) 5274 output_operand_lossage ("invalid %%s value"); 5275 5276 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8); 5277 break; 5278 5279 case 'C': case 'D': case 'c': case 'd': 5280 /* Write out comparison name. */ 5281 { 5282 enum rtx_code c = GET_CODE (x); 5283 5284 if (!COMPARISON_P (x)) 5285 output_operand_lossage ("invalid %%C value"); 5286 5287 else if (code == 'D') 5288 c = reverse_condition (c); 5289 else if (code == 'c') 5290 c = swap_condition (c); 5291 else if (code == 'd') 5292 c = swap_condition (reverse_condition (c)); 5293 5294 if (c == LEU) 5295 fprintf (file, "ule"); 5296 else if (c == LTU) 5297 fprintf (file, "ult"); 5298 else if (c == UNORDERED) 5299 fprintf (file, "un"); 5300 else 5301 fprintf (file, "%s", GET_RTX_NAME (c)); 5302 } 5303 break; 5304 5305 case 'E': 5306 /* Write the divide or modulus operator. */ 5307 switch (GET_CODE (x)) 5308 { 5309 case DIV: 5310 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q"); 5311 break; 5312 case UDIV: 5313 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q"); 5314 break; 5315 case MOD: 5316 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q"); 5317 break; 5318 case UMOD: 5319 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q"); 5320 break; 5321 default: 5322 output_operand_lossage ("invalid %%E value"); 5323 break; 5324 } 5325 break; 5326 5327 case 'A': 5328 /* Write "_u" for unaligned access. */ 5329 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) 5330 fprintf (file, "_u"); 5331 break; 5332 5333 case 0: 5334 if (REG_P (x)) 5335 fprintf (file, "%s", reg_names[REGNO (x)]); 5336 else if (MEM_P (x)) 5337 output_address (GET_MODE (x), XEXP (x, 0)); 5338 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC) 5339 { 5340 switch (XINT (XEXP (x, 0), 1)) 5341 { 5342 case UNSPEC_DTPREL: 5343 case UNSPEC_TPREL: 5344 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0)); 5345 break; 5346 default: 5347 output_operand_lossage ("unknown relocation unspec"); 5348 break; 5349 } 5350 } 5351 else 5352 output_addr_const (file, x); 5353 break; 5354 5355 default: 5356 output_operand_lossage ("invalid %%xn code"); 5357 } 5358} 5359 5360/* Implement TARGET_PRINT_OPERAND_ADDRESS. */ 5361 5362static void 5363alpha_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) 5364{ 5365 int basereg = 31; 5366 HOST_WIDE_INT offset = 0; 5367 5368 if (GET_CODE (addr) == AND) 5369 addr = XEXP (addr, 0); 5370 5371 if (GET_CODE (addr) == PLUS 5372 && CONST_INT_P (XEXP (addr, 1))) 5373 { 5374 offset = INTVAL (XEXP (addr, 1)); 5375 addr = XEXP (addr, 0); 5376 } 5377 5378 if (GET_CODE (addr) == LO_SUM) 5379 { 5380 const char *reloc16, *reloclo; 5381 rtx op1 = XEXP (addr, 1); 5382 5383 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC) 5384 { 5385 op1 = XEXP (op1, 0); 5386 switch (XINT (op1, 1)) 5387 { 5388 case UNSPEC_DTPREL: 5389 reloc16 = NULL; 5390 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello"); 5391 break; 5392 case UNSPEC_TPREL: 5393 reloc16 = NULL; 5394 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello"); 5395 break; 5396 default: 5397 output_operand_lossage ("unknown relocation unspec"); 5398 return; 5399 } 5400 5401 output_addr_const (file, XVECEXP (op1, 0, 0)); 5402 } 5403 else 5404 { 5405 reloc16 = "gprel"; 5406 reloclo = "gprellow"; 5407 output_addr_const (file, op1); 5408 } 5409 5410 if (offset) 5411 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); 5412 5413 addr = XEXP (addr, 0); 5414 switch (GET_CODE (addr)) 5415 { 5416 case REG: 5417 basereg = REGNO (addr); 5418 break; 5419 5420 case SUBREG: 5421 basereg = subreg_regno (addr); 5422 break; 5423 5424 default: 5425 gcc_unreachable (); 5426 } 5427 5428 fprintf (file, "($%d)\t\t!%s", basereg, 5429 (basereg == 29 ? reloc16 : reloclo)); 5430 return; 5431 } 5432 5433 switch (GET_CODE (addr)) 5434 { 5435 case REG: 5436 basereg = REGNO (addr); 5437 break; 5438 5439 case SUBREG: 5440 basereg = subreg_regno (addr); 5441 break; 5442 5443 case CONST_INT: 5444 offset = INTVAL (addr); 5445 break; 5446 5447 case SYMBOL_REF: 5448 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands); 5449 fprintf (file, "%s", XSTR (addr, 0)); 5450 return; 5451 5452 case CONST: 5453 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands); 5454 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS 5455 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF); 5456 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC, 5457 XSTR (XEXP (XEXP (addr, 0), 0), 0), 5458 INTVAL (XEXP (XEXP (addr, 0), 1))); 5459 return; 5460 5461 default: 5462 output_operand_lossage ("invalid operand address"); 5463 return; 5464 } 5465 5466 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg); 5467} 5468 5469/* Emit RTL insns to initialize the variable parts of a trampoline at 5470 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx 5471 for the static chain value for the function. */ 5472 5473static void 5474alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 5475{ 5476 rtx fnaddr, mem, word1, word2; 5477 5478 fnaddr = XEXP (DECL_RTL (fndecl), 0); 5479 5480#ifdef POINTERS_EXTEND_UNSIGNED 5481 fnaddr = convert_memory_address (Pmode, fnaddr); 5482 chain_value = convert_memory_address (Pmode, chain_value); 5483#endif 5484 5485 if (TARGET_ABI_OPEN_VMS) 5486 { 5487 const char *fnname; 5488 char *trname; 5489 5490 /* Construct the name of the trampoline entry point. */ 5491 fnname = XSTR (fnaddr, 0); 5492 trname = (char *) alloca (strlen (fnname) + 5); 5493 strcpy (trname, fnname); 5494 strcat (trname, "..tr"); 5495 fnname = ggc_alloc_string (trname, strlen (trname) + 1); 5496 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname); 5497 5498 /* Trampoline (or "bounded") procedure descriptor is constructed from 5499 the function's procedure descriptor with certain fields zeroed IAW 5500 the VMS calling standard. This is stored in the first quadword. */ 5501 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr)); 5502 word1 = expand_and (DImode, word1, 5503 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)), 5504 NULL); 5505 } 5506 else 5507 { 5508 /* These 4 instructions are: 5509 ldq $1,24($27) 5510 ldq $27,16($27) 5511 jmp $31,($27),0 5512 nop 5513 We don't bother setting the HINT field of the jump; the nop 5514 is merely there for padding. */ 5515 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018)); 5516 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000)); 5517 } 5518 5519 /* Store the first two words, as computed above. */ 5520 mem = adjust_address (m_tramp, DImode, 0); 5521 emit_move_insn (mem, word1); 5522 mem = adjust_address (m_tramp, DImode, 8); 5523 emit_move_insn (mem, word2); 5524 5525 /* Store function address and static chain value. */ 5526 mem = adjust_address (m_tramp, Pmode, 16); 5527 emit_move_insn (mem, fnaddr); 5528 mem = adjust_address (m_tramp, Pmode, 24); 5529 emit_move_insn (mem, chain_value); 5530 5531 if (TARGET_ABI_OSF) 5532 { 5533 emit_insn (gen_imb ()); 5534#ifdef HAVE_ENABLE_EXECUTE_STACK 5535 emit_library_call (init_one_libfunc ("__enable_execute_stack"), 5536 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); 5537#endif 5538 } 5539} 5540 5541/* Determine where to put an argument to a function. 5542 Value is zero to push the argument on the stack, 5543 or a hard register in which to store the argument. 5544 5545 CUM is a variable of type CUMULATIVE_ARGS which gives info about 5546 the preceding args and about the function being called. 5547 ARG is a description of the argument. 5548 5549 On Alpha the first 6 words of args are normally in registers 5550 and the rest are pushed. */ 5551 5552static rtx 5553alpha_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) 5554{ 5555 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5556 int basereg; 5557 int num_args; 5558 5559 /* Don't get confused and pass small structures in FP registers. */ 5560 if (arg.aggregate_type_p ()) 5561 basereg = 16; 5562 else 5563 { 5564 /* With alpha_split_complex_arg, we shouldn't see any raw complex 5565 values here. */ 5566 gcc_checking_assert (!COMPLEX_MODE_P (arg.mode)); 5567 5568 /* Set up defaults for FP operands passed in FP registers, and 5569 integral operands passed in integer registers. */ 5570 if (TARGET_FPREGS && GET_MODE_CLASS (arg.mode) == MODE_FLOAT) 5571 basereg = 32 + 16; 5572 else 5573 basereg = 16; 5574 } 5575 5576 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for 5577 the two platforms, so we can't avoid conditional compilation. */ 5578#if TARGET_ABI_OPEN_VMS 5579 { 5580 if (arg.end_marker_p ()) 5581 return alpha_arg_info_reg_val (*cum); 5582 5583 num_args = cum->num_args; 5584 if (num_args >= 6 5585 || targetm.calls.must_pass_in_stack (arg)) 5586 return NULL_RTX; 5587 } 5588#elif TARGET_ABI_OSF 5589 { 5590 if (*cum >= 6) 5591 return NULL_RTX; 5592 num_args = *cum; 5593 5594 if (arg.end_marker_p ()) 5595 basereg = 16; 5596 else if (targetm.calls.must_pass_in_stack (arg)) 5597 return NULL_RTX; 5598 } 5599#else 5600#error Unhandled ABI 5601#endif 5602 5603 return gen_rtx_REG (arg.mode, num_args + basereg); 5604} 5605 5606/* Update the data in CUM to advance over argument ARG. */ 5607 5608static void 5609alpha_function_arg_advance (cumulative_args_t cum_v, 5610 const function_arg_info &arg) 5611{ 5612 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5613 bool onstack = targetm.calls.must_pass_in_stack (arg); 5614 int increment = onstack ? 6 : ALPHA_ARG_SIZE (arg.mode, arg.type); 5615 5616#if TARGET_ABI_OSF 5617 *cum += increment; 5618#else 5619 if (!onstack && cum->num_args < 6) 5620 cum->atypes[cum->num_args] = alpha_arg_type (arg.mode); 5621 cum->num_args += increment; 5622#endif 5623} 5624 5625static int 5626alpha_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg) 5627{ 5628 int words = 0; 5629 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v); 5630 5631#if TARGET_ABI_OPEN_VMS 5632 if (cum->num_args < 6 5633 && 6 < cum->num_args + ALPHA_ARG_SIZE (arg.mode, arg.type)) 5634 words = 6 - cum->num_args; 5635#elif TARGET_ABI_OSF 5636 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (arg.mode, arg.type)) 5637 words = 6 - *cum; 5638#else 5639#error Unhandled ABI 5640#endif 5641 5642 return words * UNITS_PER_WORD; 5643} 5644 5645 5646/* Return true if TYPE must be returned in memory, instead of in registers. */ 5647 5648static bool 5649alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) 5650{ 5651 machine_mode mode = VOIDmode; 5652 int size; 5653 5654 if (type) 5655 { 5656 mode = TYPE_MODE (type); 5657 5658 /* All aggregates are returned in memory, except on OpenVMS where 5659 records that fit 64 bits should be returned by immediate value 5660 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */ 5661 if (TARGET_ABI_OPEN_VMS 5662 && TREE_CODE (type) != ARRAY_TYPE 5663 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8) 5664 return false; 5665 5666 if (AGGREGATE_TYPE_P (type)) 5667 return true; 5668 } 5669 5670 size = GET_MODE_SIZE (mode); 5671 switch (GET_MODE_CLASS (mode)) 5672 { 5673 case MODE_VECTOR_FLOAT: 5674 /* Pass all float vectors in memory, like an aggregate. */ 5675 return true; 5676 5677 case MODE_COMPLEX_FLOAT: 5678 /* We judge complex floats on the size of their element, 5679 not the size of the whole type. */ 5680 size = GET_MODE_UNIT_SIZE (mode); 5681 break; 5682 5683 case MODE_INT: 5684 case MODE_FLOAT: 5685 case MODE_COMPLEX_INT: 5686 case MODE_VECTOR_INT: 5687 break; 5688 5689 default: 5690 /* ??? We get called on all sorts of random stuff from 5691 aggregate_value_p. We must return something, but it's not 5692 clear what's safe to return. Pretend it's a struct I 5693 guess. */ 5694 return true; 5695 } 5696 5697 /* Otherwise types must fit in one register. */ 5698 return size > UNITS_PER_WORD; 5699} 5700 5701/* Return true if ARG should be passed by invisible reference. */ 5702 5703static bool 5704alpha_pass_by_reference (cumulative_args_t, const function_arg_info &arg) 5705{ 5706 /* Pass float and _Complex float variable arguments by reference. 5707 This avoids 64-bit store from a FP register to a pretend args save area 5708 and subsequent 32-bit load from the saved location to a FP register. 5709 5710 Note that 32-bit loads and stores to/from a FP register on alpha reorder 5711 bits to form a canonical 64-bit value in the FP register. This fact 5712 invalidates compiler assumption that 32-bit FP value lives in the lower 5713 32-bits of the passed 64-bit FP value, so loading the 32-bit value from 5714 the stored 64-bit location using 32-bit FP load is invalid on alpha. 5715 5716 This introduces sort of ABI incompatibility, but until _Float32 was 5717 introduced, C-family languages promoted 32-bit float variable arg to 5718 a 64-bit double, and it was not allowed to pass float as a varible 5719 argument. Passing _Complex float as a variable argument never 5720 worked on alpha. Thus, we have no backward compatibility issues 5721 to worry about, and passing unpromoted _Float32 and _Complex float 5722 as a variable argument will actually work in the future. */ 5723 5724 if (arg.mode == SFmode || arg.mode == SCmode) 5725 return !arg.named; 5726 5727 return arg.mode == TFmode || arg.mode == TCmode; 5728} 5729 5730/* Define how to find the value returned by a function. VALTYPE is the 5731 data type of the value (as a tree). If the precise function being 5732 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0. 5733 MODE is set instead of VALTYPE for libcalls. 5734 5735 On Alpha the value is found in $0 for integer functions and 5736 $f0 for floating-point functions. */ 5737 5738static rtx 5739alpha_function_value_1 (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, 5740 machine_mode mode) 5741{ 5742 unsigned int regnum, dummy ATTRIBUTE_UNUSED; 5743 enum mode_class mclass; 5744 5745 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func)); 5746 5747 if (valtype) 5748 mode = TYPE_MODE (valtype); 5749 5750 mclass = GET_MODE_CLASS (mode); 5751 switch (mclass) 5752 { 5753 case MODE_INT: 5754 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS, 5755 where we have them returning both SImode and DImode. */ 5756 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype))) 5757 PROMOTE_MODE (mode, dummy, valtype); 5758 /* FALLTHRU */ 5759 5760 case MODE_COMPLEX_INT: 5761 case MODE_VECTOR_INT: 5762 regnum = 0; 5763 break; 5764 5765 case MODE_FLOAT: 5766 regnum = 32; 5767 break; 5768 5769 case MODE_COMPLEX_FLOAT: 5770 { 5771 machine_mode cmode = GET_MODE_INNER (mode); 5772 5773 return gen_rtx_PARALLEL 5774 (VOIDmode, 5775 gen_rtvec (2, 5776 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32), 5777 const0_rtx), 5778 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33), 5779 GEN_INT (GET_MODE_SIZE (cmode))))); 5780 } 5781 5782 case MODE_RANDOM: 5783 /* We should only reach here for BLKmode on VMS. */ 5784 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode); 5785 regnum = 0; 5786 break; 5787 5788 default: 5789 gcc_unreachable (); 5790 } 5791 5792 return gen_rtx_REG (mode, regnum); 5793} 5794 5795/* Implement TARGET_FUNCTION_VALUE. */ 5796 5797static rtx 5798alpha_function_value (const_tree valtype, const_tree fn_decl_or_type, 5799 bool /*outgoing*/) 5800{ 5801 return alpha_function_value_1 (valtype, fn_decl_or_type, VOIDmode); 5802} 5803 5804/* Implement TARGET_LIBCALL_VALUE. */ 5805 5806static rtx 5807alpha_libcall_value (machine_mode mode, const_rtx /*fun*/) 5808{ 5809 return alpha_function_value_1 (NULL_TREE, NULL_TREE, mode); 5810} 5811 5812/* Implement TARGET_FUNCTION_VALUE_REGNO_P. 5813 5814 On the Alpha, $0 $1 and $f0 $f1 are the only register thus used. */ 5815 5816static bool 5817alpha_function_value_regno_p (const unsigned int regno) 5818{ 5819 return (regno == 0 || regno == 1 || regno == 32 || regno == 33); 5820} 5821 5822/* TCmode complex values are passed by invisible reference. We 5823 should not split these values. */ 5824 5825static bool 5826alpha_split_complex_arg (const_tree type) 5827{ 5828 return TYPE_MODE (type) != TCmode; 5829} 5830 5831static tree 5832alpha_build_builtin_va_list (void) 5833{ 5834 tree base, ofs, space, record, type_decl; 5835 5836 if (TARGET_ABI_OPEN_VMS) 5837 return ptr_type_node; 5838 5839 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 5840 type_decl = build_decl (BUILTINS_LOCATION, 5841 TYPE_DECL, get_identifier ("__va_list_tag"), record); 5842 TYPE_STUB_DECL (record) = type_decl; 5843 TYPE_NAME (record) = type_decl; 5844 5845 /* C++? SET_IS_AGGR_TYPE (record, 1); */ 5846 5847 /* Dummy field to prevent alignment warnings. */ 5848 space = build_decl (BUILTINS_LOCATION, 5849 FIELD_DECL, NULL_TREE, integer_type_node); 5850 DECL_FIELD_CONTEXT (space) = record; 5851 DECL_ARTIFICIAL (space) = 1; 5852 DECL_IGNORED_P (space) = 1; 5853 5854 ofs = build_decl (BUILTINS_LOCATION, 5855 FIELD_DECL, get_identifier ("__offset"), 5856 integer_type_node); 5857 DECL_FIELD_CONTEXT (ofs) = record; 5858 DECL_CHAIN (ofs) = space; 5859 5860 base = build_decl (BUILTINS_LOCATION, 5861 FIELD_DECL, get_identifier ("__base"), 5862 ptr_type_node); 5863 DECL_FIELD_CONTEXT (base) = record; 5864 DECL_CHAIN (base) = ofs; 5865 5866 TYPE_FIELDS (record) = base; 5867 layout_type (record); 5868 5869 va_list_gpr_counter_field = ofs; 5870 return record; 5871} 5872 5873#if TARGET_ABI_OSF 5874/* Helper function for alpha_stdarg_optimize_hook. Skip over casts 5875 and constant additions. */ 5876 5877static gimple * 5878va_list_skip_additions (tree lhs) 5879{ 5880 gimple *stmt; 5881 5882 for (;;) 5883 { 5884 enum tree_code code; 5885 5886 stmt = SSA_NAME_DEF_STMT (lhs); 5887 5888 if (gimple_code (stmt) == GIMPLE_PHI) 5889 return stmt; 5890 5891 if (!is_gimple_assign (stmt) 5892 || gimple_assign_lhs (stmt) != lhs) 5893 return NULL; 5894 5895 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME) 5896 return stmt; 5897 code = gimple_assign_rhs_code (stmt); 5898 if (!CONVERT_EXPR_CODE_P (code) 5899 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR) 5900 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST 5901 || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt)))) 5902 return stmt; 5903 5904 lhs = gimple_assign_rhs1 (stmt); 5905 } 5906} 5907 5908/* Check if LHS = RHS statement is 5909 LHS = *(ap.__base + ap.__offset + cst) 5910 or 5911 LHS = *(ap.__base 5912 + ((ap.__offset + cst <= 47) 5913 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2). 5914 If the former, indicate that GPR registers are needed, 5915 if the latter, indicate that FPR registers are needed. 5916 5917 Also look for LHS = (*ptr).field, where ptr is one of the forms 5918 listed above. 5919 5920 On alpha, cfun->va_list_gpr_size is used as size of the needed 5921 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR 5922 registers are needed and bit 1 set if FPR registers are needed. 5923 Return true if va_list references should not be scanned for the 5924 current statement. */ 5925 5926static bool 5927alpha_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt) 5928{ 5929 tree base, offset, rhs; 5930 int offset_arg = 1; 5931 gimple *base_stmt; 5932 5933 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) 5934 != GIMPLE_SINGLE_RHS) 5935 return false; 5936 5937 rhs = gimple_assign_rhs1 (stmt); 5938 while (handled_component_p (rhs)) 5939 rhs = TREE_OPERAND (rhs, 0); 5940 if (TREE_CODE (rhs) != MEM_REF 5941 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME) 5942 return false; 5943 5944 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0)); 5945 if (stmt == NULL 5946 || !is_gimple_assign (stmt) 5947 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR) 5948 return false; 5949 5950 base = gimple_assign_rhs1 (stmt); 5951 if (TREE_CODE (base) == SSA_NAME) 5952 { 5953 base_stmt = va_list_skip_additions (base); 5954 if (base_stmt 5955 && is_gimple_assign (base_stmt) 5956 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) 5957 base = gimple_assign_rhs1 (base_stmt); 5958 } 5959 5960 if (TREE_CODE (base) != COMPONENT_REF 5961 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) 5962 { 5963 base = gimple_assign_rhs2 (stmt); 5964 if (TREE_CODE (base) == SSA_NAME) 5965 { 5966 base_stmt = va_list_skip_additions (base); 5967 if (base_stmt 5968 && is_gimple_assign (base_stmt) 5969 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) 5970 base = gimple_assign_rhs1 (base_stmt); 5971 } 5972 5973 if (TREE_CODE (base) != COMPONENT_REF 5974 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) 5975 return false; 5976 5977 offset_arg = 0; 5978 } 5979 5980 base = get_base_address (base); 5981 if (TREE_CODE (base) != VAR_DECL 5982 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names)) 5983 return false; 5984 5985 offset = gimple_op (stmt, 1 + offset_arg); 5986 if (TREE_CODE (offset) == SSA_NAME) 5987 { 5988 gimple *offset_stmt = va_list_skip_additions (offset); 5989 5990 if (offset_stmt 5991 && gimple_code (offset_stmt) == GIMPLE_PHI) 5992 { 5993 HOST_WIDE_INT sub; 5994 gimple *arg1_stmt, *arg2_stmt; 5995 tree arg1, arg2; 5996 enum tree_code code1, code2; 5997 5998 if (gimple_phi_num_args (offset_stmt) != 2) 5999 goto escapes; 6000 6001 arg1_stmt 6002 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0)); 6003 arg2_stmt 6004 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1)); 6005 if (arg1_stmt == NULL 6006 || !is_gimple_assign (arg1_stmt) 6007 || arg2_stmt == NULL 6008 || !is_gimple_assign (arg2_stmt)) 6009 goto escapes; 6010 6011 code1 = gimple_assign_rhs_code (arg1_stmt); 6012 code2 = gimple_assign_rhs_code (arg2_stmt); 6013 if (code1 == COMPONENT_REF 6014 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR)) 6015 /* Do nothing. */; 6016 else if (code2 == COMPONENT_REF 6017 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR)) 6018 { 6019 std::swap (arg1_stmt, arg2_stmt); 6020 code2 = code1; 6021 } 6022 else 6023 goto escapes; 6024 6025 if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt))) 6026 goto escapes; 6027 6028 sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt)); 6029 if (code2 == MINUS_EXPR) 6030 sub = -sub; 6031 if (sub < -48 || sub > -32) 6032 goto escapes; 6033 6034 arg1 = gimple_assign_rhs1 (arg1_stmt); 6035 arg2 = gimple_assign_rhs1 (arg2_stmt); 6036 if (TREE_CODE (arg2) == SSA_NAME) 6037 { 6038 arg2_stmt = va_list_skip_additions (arg2); 6039 if (arg2_stmt == NULL 6040 || !is_gimple_assign (arg2_stmt) 6041 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF) 6042 goto escapes; 6043 arg2 = gimple_assign_rhs1 (arg2_stmt); 6044 } 6045 if (arg1 != arg2) 6046 goto escapes; 6047 6048 if (TREE_CODE (arg1) != COMPONENT_REF 6049 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field 6050 || get_base_address (arg1) != base) 6051 goto escapes; 6052 6053 /* Need floating point regs. */ 6054 cfun->va_list_fpr_size |= 2; 6055 return false; 6056 } 6057 if (offset_stmt 6058 && is_gimple_assign (offset_stmt) 6059 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF) 6060 offset = gimple_assign_rhs1 (offset_stmt); 6061 } 6062 if (TREE_CODE (offset) != COMPONENT_REF 6063 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field 6064 || get_base_address (offset) != base) 6065 goto escapes; 6066 else 6067 /* Need general regs. */ 6068 cfun->va_list_fpr_size |= 1; 6069 return false; 6070 6071escapes: 6072 si->va_list_escapes = true; 6073 return false; 6074} 6075#endif 6076 6077/* Perform any needed actions needed for a function that is receiving a 6078 variable number of arguments. */ 6079 6080static void 6081alpha_setup_incoming_varargs (cumulative_args_t pcum, 6082 const function_arg_info &arg, 6083 int *pretend_size, int no_rtl) 6084{ 6085 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum); 6086 6087 /* Skip the current argument. */ 6088 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), arg); 6089 6090#if TARGET_ABI_OPEN_VMS 6091 /* For VMS, we allocate space for all 6 arg registers plus a count. 6092 6093 However, if NO registers need to be saved, don't allocate any space. 6094 This is not only because we won't need the space, but because AP 6095 includes the current_pretend_args_size and we don't want to mess up 6096 any ap-relative addresses already made. */ 6097 if (cum.num_args < 6) 6098 { 6099 if (!no_rtl) 6100 { 6101 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx); 6102 emit_insn (gen_arg_home ()); 6103 } 6104 *pretend_size = 7 * UNITS_PER_WORD; 6105 } 6106#else 6107 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but 6108 only push those that are remaining. However, if NO registers need to 6109 be saved, don't allocate any space. This is not only because we won't 6110 need the space, but because AP includes the current_pretend_args_size 6111 and we don't want to mess up any ap-relative addresses already made. 6112 6113 If we are not to use the floating-point registers, save the integer 6114 registers where we would put the floating-point registers. This is 6115 not the most efficient way to implement varargs with just one register 6116 class, but it isn't worth doing anything more efficient in this rare 6117 case. */ 6118 if (cum >= 6) 6119 return; 6120 6121 if (!no_rtl) 6122 { 6123 int count; 6124 alias_set_type set = get_varargs_alias_set (); 6125 rtx tmp; 6126 6127 count = cfun->va_list_gpr_size / UNITS_PER_WORD; 6128 if (count > 6 - cum) 6129 count = 6 - cum; 6130 6131 /* Detect whether integer registers or floating-point registers 6132 are needed by the detected va_arg statements. See above for 6133 how these values are computed. Note that the "escape" value 6134 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of 6135 these bits set. */ 6136 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3); 6137 6138 if (cfun->va_list_fpr_size & 1) 6139 { 6140 tmp = gen_rtx_MEM (BLKmode, 6141 plus_constant (Pmode, virtual_incoming_args_rtx, 6142 (cum + 6) * UNITS_PER_WORD)); 6143 MEM_NOTRAP_P (tmp) = 1; 6144 set_mem_alias_set (tmp, set); 6145 move_block_from_reg (16 + cum, tmp, count); 6146 } 6147 6148 if (cfun->va_list_fpr_size & 2) 6149 { 6150 tmp = gen_rtx_MEM (BLKmode, 6151 plus_constant (Pmode, virtual_incoming_args_rtx, 6152 cum * UNITS_PER_WORD)); 6153 MEM_NOTRAP_P (tmp) = 1; 6154 set_mem_alias_set (tmp, set); 6155 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count); 6156 } 6157 } 6158 *pretend_size = 12 * UNITS_PER_WORD; 6159#endif 6160} 6161 6162static void 6163alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) 6164{ 6165 HOST_WIDE_INT offset; 6166 tree t, offset_field, base_field; 6167 6168 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK) 6169 return; 6170 6171 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base 6172 up by 48, storing fp arg registers in the first 48 bytes, and the 6173 integer arg registers in the next 48 bytes. This is only done, 6174 however, if any integer registers need to be stored. 6175 6176 If no integer registers need be stored, then we must subtract 48 6177 in order to account for the integer arg registers which are counted 6178 in argsize above, but which are not actually stored on the stack. 6179 Must further be careful here about structures straddling the last 6180 integer argument register; that futzes with pretend_args_size, 6181 which changes the meaning of AP. */ 6182 6183 if (NUM_ARGS < 6) 6184 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD; 6185 else 6186 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size; 6187 6188 if (TARGET_ABI_OPEN_VMS) 6189 { 6190 t = make_tree (ptr_type_node, virtual_incoming_args_rtx); 6191 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD); 6192 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t); 6193 TREE_SIDE_EFFECTS (t) = 1; 6194 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6195 } 6196 else 6197 { 6198 base_field = TYPE_FIELDS (TREE_TYPE (valist)); 6199 offset_field = DECL_CHAIN (base_field); 6200 6201 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), 6202 valist, base_field, NULL_TREE); 6203 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), 6204 valist, offset_field, NULL_TREE); 6205 6206 t = make_tree (ptr_type_node, virtual_incoming_args_rtx); 6207 t = fold_build_pointer_plus_hwi (t, offset); 6208 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t); 6209 TREE_SIDE_EFFECTS (t) = 1; 6210 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6211 6212 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD); 6213 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t); 6214 TREE_SIDE_EFFECTS (t) = 1; 6215 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6216 } 6217} 6218 6219static tree 6220alpha_gimplify_va_arg_1 (tree type, tree base, tree offset, 6221 gimple_seq *pre_p) 6222{ 6223 tree type_size, ptr_type, addend, t, addr; 6224 gimple_seq internal_post; 6225 6226 /* If the type could not be passed in registers, skip the block 6227 reserved for the registers. */ 6228 if (must_pass_va_arg_in_stack (type)) 6229 { 6230 t = build_int_cst (TREE_TYPE (offset), 6*8); 6231 gimplify_assign (offset, 6232 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t), 6233 pre_p); 6234 } 6235 6236 addend = offset; 6237 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true); 6238 6239 if (TREE_CODE (type) == COMPLEX_TYPE) 6240 { 6241 tree real_part, imag_part, real_temp; 6242 6243 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base, 6244 offset, pre_p); 6245 6246 /* Copy the value into a new temporary, lest the formal temporary 6247 be reused out from under us. */ 6248 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL); 6249 6250 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base, 6251 offset, pre_p); 6252 6253 return build2 (COMPLEX_EXPR, type, real_temp, imag_part); 6254 } 6255 else if (TREE_CODE (type) == REAL_TYPE) 6256 { 6257 tree fpaddend, cond, fourtyeight; 6258 6259 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8); 6260 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend), 6261 addend, fourtyeight); 6262 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight); 6263 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond, 6264 fpaddend, addend); 6265 } 6266 6267 /* Build the final address and force that value into a temporary. */ 6268 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend); 6269 internal_post = NULL; 6270 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue); 6271 gimple_seq_add_seq (pre_p, internal_post); 6272 6273 /* Update the offset field. */ 6274 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type)); 6275 if (type_size == NULL || TREE_OVERFLOW (type_size)) 6276 t = size_zero_node; 6277 else 6278 { 6279 t = size_binop (PLUS_EXPR, type_size, size_int (7)); 6280 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8)); 6281 t = size_binop (MULT_EXPR, t, size_int (8)); 6282 } 6283 t = fold_convert (TREE_TYPE (offset), t); 6284 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t), 6285 pre_p); 6286 6287 return build_va_arg_indirect_ref (addr); 6288} 6289 6290static tree 6291alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 6292 gimple_seq *post_p) 6293{ 6294 tree offset_field, base_field, offset, base, t, r; 6295 bool indirect; 6296 6297 if (TARGET_ABI_OPEN_VMS) 6298 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 6299 6300 base_field = TYPE_FIELDS (va_list_type_node); 6301 offset_field = DECL_CHAIN (base_field); 6302 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), 6303 valist, base_field, NULL_TREE); 6304 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), 6305 valist, offset_field, NULL_TREE); 6306 6307 /* Pull the fields of the structure out into temporaries. Since we never 6308 modify the base field, we can use a formal temporary. Sign-extend the 6309 offset field so that it's the proper width for pointer arithmetic. */ 6310 base = get_formal_tmp_var (base_field, pre_p); 6311 6312 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field); 6313 offset = get_initialized_tmp_var (t, pre_p, NULL); 6314 6315 indirect = pass_va_arg_by_reference (type); 6316 6317 if (indirect) 6318 { 6319 if (TREE_CODE (type) == COMPLEX_TYPE 6320 && targetm.calls.split_complex_arg (type)) 6321 { 6322 tree real_part, imag_part, real_temp; 6323 6324 tree ptr_type = build_pointer_type_for_mode (TREE_TYPE (type), 6325 ptr_mode, true); 6326 6327 real_part = alpha_gimplify_va_arg_1 (ptr_type, base, 6328 offset, pre_p); 6329 real_part = build_va_arg_indirect_ref (real_part); 6330 6331 /* Copy the value into a new temporary, lest the formal temporary 6332 be reused out from under us. */ 6333 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL); 6334 6335 imag_part = alpha_gimplify_va_arg_1 (ptr_type, base, 6336 offset, pre_p); 6337 imag_part = build_va_arg_indirect_ref (imag_part); 6338 6339 r = build2 (COMPLEX_EXPR, type, real_temp, imag_part); 6340 6341 /* Stuff the offset temporary back into its field. */ 6342 gimplify_assign (unshare_expr (offset_field), 6343 fold_convert (TREE_TYPE (offset_field), offset), 6344 pre_p); 6345 return r; 6346 } 6347 else 6348 type = build_pointer_type_for_mode (type, ptr_mode, true); 6349 } 6350 6351 /* Find the value. Note that this will be a stable indirection, or 6352 a composite of stable indirections in the case of complex. */ 6353 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p); 6354 6355 /* Stuff the offset temporary back into its field. */ 6356 gimplify_assign (unshare_expr (offset_field), 6357 fold_convert (TREE_TYPE (offset_field), offset), pre_p); 6358 6359 if (indirect) 6360 r = build_va_arg_indirect_ref (r); 6361 6362 return r; 6363} 6364 6365/* Builtins. */ 6366 6367enum alpha_builtin 6368{ 6369 ALPHA_BUILTIN_CMPBGE, 6370 ALPHA_BUILTIN_EXTBL, 6371 ALPHA_BUILTIN_EXTWL, 6372 ALPHA_BUILTIN_EXTLL, 6373 ALPHA_BUILTIN_EXTQL, 6374 ALPHA_BUILTIN_EXTWH, 6375 ALPHA_BUILTIN_EXTLH, 6376 ALPHA_BUILTIN_EXTQH, 6377 ALPHA_BUILTIN_INSBL, 6378 ALPHA_BUILTIN_INSWL, 6379 ALPHA_BUILTIN_INSLL, 6380 ALPHA_BUILTIN_INSQL, 6381 ALPHA_BUILTIN_INSWH, 6382 ALPHA_BUILTIN_INSLH, 6383 ALPHA_BUILTIN_INSQH, 6384 ALPHA_BUILTIN_MSKBL, 6385 ALPHA_BUILTIN_MSKWL, 6386 ALPHA_BUILTIN_MSKLL, 6387 ALPHA_BUILTIN_MSKQL, 6388 ALPHA_BUILTIN_MSKWH, 6389 ALPHA_BUILTIN_MSKLH, 6390 ALPHA_BUILTIN_MSKQH, 6391 ALPHA_BUILTIN_UMULH, 6392 ALPHA_BUILTIN_ZAP, 6393 ALPHA_BUILTIN_ZAPNOT, 6394 ALPHA_BUILTIN_AMASK, 6395 ALPHA_BUILTIN_IMPLVER, 6396 ALPHA_BUILTIN_RPCC, 6397 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER, 6398 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 6399 6400 /* TARGET_MAX */ 6401 ALPHA_BUILTIN_MINUB8, 6402 ALPHA_BUILTIN_MINSB8, 6403 ALPHA_BUILTIN_MINUW4, 6404 ALPHA_BUILTIN_MINSW4, 6405 ALPHA_BUILTIN_MAXUB8, 6406 ALPHA_BUILTIN_MAXSB8, 6407 ALPHA_BUILTIN_MAXUW4, 6408 ALPHA_BUILTIN_MAXSW4, 6409 ALPHA_BUILTIN_PERR, 6410 ALPHA_BUILTIN_PKLB, 6411 ALPHA_BUILTIN_PKWB, 6412 ALPHA_BUILTIN_UNPKBL, 6413 ALPHA_BUILTIN_UNPKBW, 6414 6415 /* TARGET_CIX */ 6416 ALPHA_BUILTIN_CTTZ, 6417 ALPHA_BUILTIN_CTLZ, 6418 ALPHA_BUILTIN_CTPOP, 6419 6420 ALPHA_BUILTIN_max 6421}; 6422 6423static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = { 6424 CODE_FOR_builtin_cmpbge, 6425 CODE_FOR_extbl, 6426 CODE_FOR_extwl, 6427 CODE_FOR_extll, 6428 CODE_FOR_extql, 6429 CODE_FOR_extwh, 6430 CODE_FOR_extlh, 6431 CODE_FOR_extqh, 6432 CODE_FOR_builtin_insbl, 6433 CODE_FOR_builtin_inswl, 6434 CODE_FOR_builtin_insll, 6435 CODE_FOR_insql, 6436 CODE_FOR_inswh, 6437 CODE_FOR_inslh, 6438 CODE_FOR_insqh, 6439 CODE_FOR_mskbl, 6440 CODE_FOR_mskwl, 6441 CODE_FOR_mskll, 6442 CODE_FOR_mskql, 6443 CODE_FOR_mskwh, 6444 CODE_FOR_msklh, 6445 CODE_FOR_mskqh, 6446 CODE_FOR_umuldi3_highpart, 6447 CODE_FOR_builtin_zap, 6448 CODE_FOR_builtin_zapnot, 6449 CODE_FOR_builtin_amask, 6450 CODE_FOR_builtin_implver, 6451 CODE_FOR_builtin_rpcc, 6452 CODE_FOR_builtin_establish_vms_condition_handler, 6453 CODE_FOR_builtin_revert_vms_condition_handler, 6454 6455 /* TARGET_MAX */ 6456 CODE_FOR_builtin_minub8, 6457 CODE_FOR_builtin_minsb8, 6458 CODE_FOR_builtin_minuw4, 6459 CODE_FOR_builtin_minsw4, 6460 CODE_FOR_builtin_maxub8, 6461 CODE_FOR_builtin_maxsb8, 6462 CODE_FOR_builtin_maxuw4, 6463 CODE_FOR_builtin_maxsw4, 6464 CODE_FOR_builtin_perr, 6465 CODE_FOR_builtin_pklb, 6466 CODE_FOR_builtin_pkwb, 6467 CODE_FOR_builtin_unpkbl, 6468 CODE_FOR_builtin_unpkbw, 6469 6470 /* TARGET_CIX */ 6471 CODE_FOR_ctzdi2, 6472 CODE_FOR_clzdi2, 6473 CODE_FOR_popcountdi2 6474}; 6475 6476struct alpha_builtin_def 6477{ 6478 const char *name; 6479 enum alpha_builtin code; 6480 unsigned int target_mask; 6481 bool is_const; 6482}; 6483 6484static struct alpha_builtin_def const zero_arg_builtins[] = { 6485 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true }, 6486 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false } 6487}; 6488 6489static struct alpha_builtin_def const one_arg_builtins[] = { 6490 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true }, 6491 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true }, 6492 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true }, 6493 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true }, 6494 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true }, 6495 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true }, 6496 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true }, 6497 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true } 6498}; 6499 6500static struct alpha_builtin_def const two_arg_builtins[] = { 6501 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true }, 6502 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true }, 6503 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true }, 6504 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true }, 6505 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true }, 6506 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true }, 6507 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true }, 6508 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true }, 6509 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true }, 6510 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true }, 6511 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true }, 6512 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true }, 6513 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true }, 6514 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true }, 6515 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true }, 6516 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true }, 6517 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true }, 6518 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true }, 6519 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true }, 6520 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true }, 6521 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true }, 6522 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true }, 6523 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true }, 6524 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true }, 6525 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true }, 6526 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true }, 6527 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true }, 6528 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true }, 6529 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true }, 6530 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true }, 6531 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true }, 6532 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true }, 6533 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true }, 6534 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true } 6535}; 6536 6537static GTY(()) tree alpha_dimode_u; 6538static GTY(()) tree alpha_v8qi_u; 6539static GTY(()) tree alpha_v8qi_s; 6540static GTY(()) tree alpha_v4hi_u; 6541static GTY(()) tree alpha_v4hi_s; 6542 6543static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max]; 6544 6545/* Return the alpha builtin for CODE. */ 6546 6547static tree 6548alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 6549{ 6550 if (code >= ALPHA_BUILTIN_max) 6551 return error_mark_node; 6552 return alpha_builtins[code]; 6553} 6554 6555/* Helper function of alpha_init_builtins. Add the built-in specified 6556 by NAME, TYPE, CODE, and ECF. */ 6557 6558static void 6559alpha_builtin_function (const char *name, tree ftype, 6560 enum alpha_builtin code, unsigned ecf) 6561{ 6562 tree decl = add_builtin_function (name, ftype, (int) code, 6563 BUILT_IN_MD, NULL, NULL_TREE); 6564 6565 if (ecf & ECF_CONST) 6566 TREE_READONLY (decl) = 1; 6567 if (ecf & ECF_NOTHROW) 6568 TREE_NOTHROW (decl) = 1; 6569 6570 alpha_builtins [(int) code] = decl; 6571} 6572 6573/* Helper function of alpha_init_builtins. Add the COUNT built-in 6574 functions pointed to by P, with function type FTYPE. */ 6575 6576static void 6577alpha_add_builtins (const struct alpha_builtin_def *p, size_t count, 6578 tree ftype) 6579{ 6580 size_t i; 6581 6582 for (i = 0; i < count; ++i, ++p) 6583 if ((target_flags & p->target_mask) == p->target_mask) 6584 alpha_builtin_function (p->name, ftype, p->code, 6585 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW); 6586} 6587 6588static void 6589alpha_init_builtins (void) 6590{ 6591 tree ftype; 6592 6593 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1); 6594 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8); 6595 alpha_v8qi_s = build_vector_type (intQI_type_node, 8); 6596 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4); 6597 alpha_v4hi_s = build_vector_type (intHI_type_node, 4); 6598 6599 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE); 6600 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype); 6601 6602 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE); 6603 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype); 6604 6605 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, 6606 alpha_dimode_u, NULL_TREE); 6607 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype); 6608 6609 if (TARGET_ABI_OPEN_VMS) 6610 { 6611 ftype = build_function_type_list (ptr_type_node, ptr_type_node, 6612 NULL_TREE); 6613 alpha_builtin_function ("__builtin_establish_vms_condition_handler", 6614 ftype, 6615 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER, 6616 0); 6617 6618 ftype = build_function_type_list (ptr_type_node, void_type_node, 6619 NULL_TREE); 6620 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype, 6621 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0); 6622 6623 vms_patch_builtins (); 6624 } 6625} 6626 6627/* Expand an expression EXP that calls a built-in function, 6628 with result going to TARGET if that's convenient 6629 (and in mode MODE if that's convenient). 6630 SUBTARGET may be used as the target for computing one of EXP's operands. 6631 IGNORE is nonzero if the value is to be ignored. */ 6632 6633static rtx 6634alpha_expand_builtin (tree exp, rtx target, 6635 rtx subtarget ATTRIBUTE_UNUSED, 6636 machine_mode mode ATTRIBUTE_UNUSED, 6637 int ignore ATTRIBUTE_UNUSED) 6638{ 6639#define MAX_ARGS 2 6640 6641 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 6642 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); 6643 tree arg; 6644 call_expr_arg_iterator iter; 6645 enum insn_code icode; 6646 rtx op[MAX_ARGS], pat; 6647 int arity; 6648 bool nonvoid; 6649 6650 if (fcode >= ALPHA_BUILTIN_max) 6651 internal_error ("bad builtin fcode"); 6652 icode = code_for_builtin[fcode]; 6653 if (icode == 0) 6654 internal_error ("bad builtin fcode"); 6655 6656 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 6657 6658 arity = 0; 6659 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) 6660 { 6661 const struct insn_operand_data *insn_op; 6662 6663 if (arg == error_mark_node) 6664 return NULL_RTX; 6665 if (arity > MAX_ARGS) 6666 return NULL_RTX; 6667 6668 insn_op = &insn_data[icode].operand[arity + nonvoid]; 6669 6670 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); 6671 6672 if (!(*insn_op->predicate) (op[arity], insn_op->mode)) 6673 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); 6674 arity++; 6675 } 6676 6677 if (nonvoid) 6678 { 6679 machine_mode tmode = insn_data[icode].operand[0].mode; 6680 if (!target 6681 || GET_MODE (target) != tmode 6682 || !(*insn_data[icode].operand[0].predicate) (target, tmode)) 6683 target = gen_reg_rtx (tmode); 6684 } 6685 6686 switch (arity) 6687 { 6688 case 0: 6689 pat = GEN_FCN (icode) (target); 6690 break; 6691 case 1: 6692 if (nonvoid) 6693 pat = GEN_FCN (icode) (target, op[0]); 6694 else 6695 pat = GEN_FCN (icode) (op[0]); 6696 break; 6697 case 2: 6698 pat = GEN_FCN (icode) (target, op[0], op[1]); 6699 break; 6700 default: 6701 gcc_unreachable (); 6702 } 6703 if (!pat) 6704 return NULL_RTX; 6705 emit_insn (pat); 6706 6707 if (nonvoid) 6708 return target; 6709 else 6710 return const0_rtx; 6711} 6712 6713/* Fold the builtin for the CMPBGE instruction. This is a vector comparison 6714 with an 8-bit output vector. OPINT contains the integer operands; bit N 6715 of OP_CONST is set if OPINT[N] is valid. */ 6716 6717static tree 6718alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const) 6719{ 6720 if (op_const == 3) 6721 { 6722 int i, val; 6723 for (i = 0, val = 0; i < 8; ++i) 6724 { 6725 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff; 6726 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff; 6727 if (c0 >= c1) 6728 val |= 1 << i; 6729 } 6730 return build_int_cst (alpha_dimode_u, val); 6731 } 6732 else if (op_const == 2 && opint[1] == 0) 6733 return build_int_cst (alpha_dimode_u, 0xff); 6734 return NULL; 6735} 6736 6737/* Fold the builtin for the ZAPNOT instruction. This is essentially a 6738 specialized form of an AND operation. Other byte manipulation instructions 6739 are defined in terms of this instruction, so this is also used as a 6740 subroutine for other builtins. 6741 6742 OP contains the tree operands; OPINT contains the extracted integer values. 6743 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only 6744 OPINT may be considered. */ 6745 6746static tree 6747alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[], 6748 long op_const) 6749{ 6750 if (op_const & 2) 6751 { 6752 unsigned HOST_WIDE_INT mask = 0; 6753 int i; 6754 6755 for (i = 0; i < 8; ++i) 6756 if ((opint[1] >> i) & 1) 6757 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8); 6758 6759 if (op_const & 1) 6760 return build_int_cst (alpha_dimode_u, opint[0] & mask); 6761 6762 if (op) 6763 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0], 6764 build_int_cst (alpha_dimode_u, mask)); 6765 } 6766 else if ((op_const & 1) && opint[0] == 0) 6767 return build_int_cst (alpha_dimode_u, 0); 6768 return NULL; 6769} 6770 6771/* Fold the builtins for the EXT family of instructions. */ 6772 6773static tree 6774alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[], 6775 long op_const, unsigned HOST_WIDE_INT bytemask, 6776 bool is_high) 6777{ 6778 long zap_const = 2; 6779 tree *zap_op = NULL; 6780 6781 if (op_const & 2) 6782 { 6783 unsigned HOST_WIDE_INT loc; 6784 6785 loc = opint[1] & 7; 6786 loc *= BITS_PER_UNIT; 6787 6788 if (loc != 0) 6789 { 6790 if (op_const & 1) 6791 { 6792 unsigned HOST_WIDE_INT temp = opint[0]; 6793 if (is_high) 6794 temp <<= loc; 6795 else 6796 temp >>= loc; 6797 opint[0] = temp; 6798 zap_const = 3; 6799 } 6800 } 6801 else 6802 zap_op = op; 6803 } 6804 6805 opint[1] = bytemask; 6806 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const); 6807} 6808 6809/* Fold the builtins for the INS family of instructions. */ 6810 6811static tree 6812alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[], 6813 long op_const, unsigned HOST_WIDE_INT bytemask, 6814 bool is_high) 6815{ 6816 if ((op_const & 1) && opint[0] == 0) 6817 return build_int_cst (alpha_dimode_u, 0); 6818 6819 if (op_const & 2) 6820 { 6821 unsigned HOST_WIDE_INT temp, loc, byteloc; 6822 tree *zap_op = NULL; 6823 6824 loc = opint[1] & 7; 6825 bytemask <<= loc; 6826 6827 temp = opint[0]; 6828 if (is_high) 6829 { 6830 byteloc = (64 - (loc * 8)) & 0x3f; 6831 if (byteloc == 0) 6832 zap_op = op; 6833 else 6834 temp >>= byteloc; 6835 bytemask >>= 8; 6836 } 6837 else 6838 { 6839 byteloc = loc * 8; 6840 if (byteloc == 0) 6841 zap_op = op; 6842 else 6843 temp <<= byteloc; 6844 } 6845 6846 opint[0] = temp; 6847 opint[1] = bytemask; 6848 return alpha_fold_builtin_zapnot (zap_op, opint, op_const); 6849 } 6850 6851 return NULL; 6852} 6853 6854static tree 6855alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[], 6856 long op_const, unsigned HOST_WIDE_INT bytemask, 6857 bool is_high) 6858{ 6859 if (op_const & 2) 6860 { 6861 unsigned HOST_WIDE_INT loc; 6862 6863 loc = opint[1] & 7; 6864 bytemask <<= loc; 6865 6866 if (is_high) 6867 bytemask >>= 8; 6868 6869 opint[1] = bytemask ^ 0xff; 6870 } 6871 6872 return alpha_fold_builtin_zapnot (op, opint, op_const); 6873} 6874 6875static tree 6876alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype) 6877{ 6878 tree op0 = fold_convert (vtype, op[0]); 6879 tree op1 = fold_convert (vtype, op[1]); 6880 tree val = fold_build2 (code, vtype, op0, op1); 6881 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val); 6882} 6883 6884static tree 6885alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const) 6886{ 6887 unsigned HOST_WIDE_INT temp = 0; 6888 int i; 6889 6890 if (op_const != 3) 6891 return NULL; 6892 6893 for (i = 0; i < 8; ++i) 6894 { 6895 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff; 6896 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff; 6897 if (a >= b) 6898 temp += a - b; 6899 else 6900 temp += b - a; 6901 } 6902 6903 return build_int_cst (alpha_dimode_u, temp); 6904} 6905 6906static tree 6907alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const) 6908{ 6909 unsigned HOST_WIDE_INT temp; 6910 6911 if (op_const == 0) 6912 return NULL; 6913 6914 temp = opint[0] & 0xff; 6915 temp |= (opint[0] >> 24) & 0xff00; 6916 6917 return build_int_cst (alpha_dimode_u, temp); 6918} 6919 6920static tree 6921alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const) 6922{ 6923 unsigned HOST_WIDE_INT temp; 6924 6925 if (op_const == 0) 6926 return NULL; 6927 6928 temp = opint[0] & 0xff; 6929 temp |= (opint[0] >> 8) & 0xff00; 6930 temp |= (opint[0] >> 16) & 0xff0000; 6931 temp |= (opint[0] >> 24) & 0xff000000; 6932 6933 return build_int_cst (alpha_dimode_u, temp); 6934} 6935 6936static tree 6937alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const) 6938{ 6939 unsigned HOST_WIDE_INT temp; 6940 6941 if (op_const == 0) 6942 return NULL; 6943 6944 temp = opint[0] & 0xff; 6945 temp |= (opint[0] & 0xff00) << 24; 6946 6947 return build_int_cst (alpha_dimode_u, temp); 6948} 6949 6950static tree 6951alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const) 6952{ 6953 unsigned HOST_WIDE_INT temp; 6954 6955 if (op_const == 0) 6956 return NULL; 6957 6958 temp = opint[0] & 0xff; 6959 temp |= (opint[0] & 0x0000ff00) << 8; 6960 temp |= (opint[0] & 0x00ff0000) << 16; 6961 temp |= (opint[0] & 0xff000000) << 24; 6962 6963 return build_int_cst (alpha_dimode_u, temp); 6964} 6965 6966static tree 6967alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const) 6968{ 6969 unsigned HOST_WIDE_INT temp; 6970 6971 if (op_const == 0) 6972 return NULL; 6973 6974 if (opint[0] == 0) 6975 temp = 64; 6976 else 6977 temp = exact_log2 (opint[0] & -opint[0]); 6978 6979 return build_int_cst (alpha_dimode_u, temp); 6980} 6981 6982static tree 6983alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const) 6984{ 6985 unsigned HOST_WIDE_INT temp; 6986 6987 if (op_const == 0) 6988 return NULL; 6989 6990 if (opint[0] == 0) 6991 temp = 64; 6992 else 6993 temp = 64 - floor_log2 (opint[0]) - 1; 6994 6995 return build_int_cst (alpha_dimode_u, temp); 6996} 6997 6998static tree 6999alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const) 7000{ 7001 unsigned HOST_WIDE_INT temp, op; 7002 7003 if (op_const == 0) 7004 return NULL; 7005 7006 op = opint[0]; 7007 temp = 0; 7008 while (op) 7009 temp++, op &= op - 1; 7010 7011 return build_int_cst (alpha_dimode_u, temp); 7012} 7013 7014/* Fold one of our builtin functions. */ 7015 7016static tree 7017alpha_fold_builtin (tree fndecl, int n_args, tree *op, 7018 bool ignore ATTRIBUTE_UNUSED) 7019{ 7020 unsigned HOST_WIDE_INT opint[MAX_ARGS]; 7021 long op_const = 0; 7022 int i; 7023 7024 if (n_args > MAX_ARGS) 7025 return NULL; 7026 7027 for (i = 0; i < n_args; i++) 7028 { 7029 tree arg = op[i]; 7030 if (arg == error_mark_node) 7031 return NULL; 7032 7033 opint[i] = 0; 7034 if (TREE_CODE (arg) == INTEGER_CST) 7035 { 7036 op_const |= 1L << i; 7037 opint[i] = int_cst_value (arg); 7038 } 7039 } 7040 7041 switch (DECL_MD_FUNCTION_CODE (fndecl)) 7042 { 7043 case ALPHA_BUILTIN_CMPBGE: 7044 return alpha_fold_builtin_cmpbge (opint, op_const); 7045 7046 case ALPHA_BUILTIN_EXTBL: 7047 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false); 7048 case ALPHA_BUILTIN_EXTWL: 7049 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false); 7050 case ALPHA_BUILTIN_EXTLL: 7051 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false); 7052 case ALPHA_BUILTIN_EXTQL: 7053 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false); 7054 case ALPHA_BUILTIN_EXTWH: 7055 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true); 7056 case ALPHA_BUILTIN_EXTLH: 7057 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true); 7058 case ALPHA_BUILTIN_EXTQH: 7059 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true); 7060 7061 case ALPHA_BUILTIN_INSBL: 7062 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false); 7063 case ALPHA_BUILTIN_INSWL: 7064 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false); 7065 case ALPHA_BUILTIN_INSLL: 7066 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false); 7067 case ALPHA_BUILTIN_INSQL: 7068 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false); 7069 case ALPHA_BUILTIN_INSWH: 7070 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true); 7071 case ALPHA_BUILTIN_INSLH: 7072 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true); 7073 case ALPHA_BUILTIN_INSQH: 7074 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true); 7075 7076 case ALPHA_BUILTIN_MSKBL: 7077 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false); 7078 case ALPHA_BUILTIN_MSKWL: 7079 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false); 7080 case ALPHA_BUILTIN_MSKLL: 7081 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false); 7082 case ALPHA_BUILTIN_MSKQL: 7083 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false); 7084 case ALPHA_BUILTIN_MSKWH: 7085 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true); 7086 case ALPHA_BUILTIN_MSKLH: 7087 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true); 7088 case ALPHA_BUILTIN_MSKQH: 7089 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true); 7090 7091 case ALPHA_BUILTIN_ZAP: 7092 opint[1] ^= 0xff; 7093 /* FALLTHRU */ 7094 case ALPHA_BUILTIN_ZAPNOT: 7095 return alpha_fold_builtin_zapnot (op, opint, op_const); 7096 7097 case ALPHA_BUILTIN_MINUB8: 7098 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u); 7099 case ALPHA_BUILTIN_MINSB8: 7100 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s); 7101 case ALPHA_BUILTIN_MINUW4: 7102 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u); 7103 case ALPHA_BUILTIN_MINSW4: 7104 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s); 7105 case ALPHA_BUILTIN_MAXUB8: 7106 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u); 7107 case ALPHA_BUILTIN_MAXSB8: 7108 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s); 7109 case ALPHA_BUILTIN_MAXUW4: 7110 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u); 7111 case ALPHA_BUILTIN_MAXSW4: 7112 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s); 7113 7114 case ALPHA_BUILTIN_PERR: 7115 return alpha_fold_builtin_perr (opint, op_const); 7116 case ALPHA_BUILTIN_PKLB: 7117 return alpha_fold_builtin_pklb (opint, op_const); 7118 case ALPHA_BUILTIN_PKWB: 7119 return alpha_fold_builtin_pkwb (opint, op_const); 7120 case ALPHA_BUILTIN_UNPKBL: 7121 return alpha_fold_builtin_unpkbl (opint, op_const); 7122 case ALPHA_BUILTIN_UNPKBW: 7123 return alpha_fold_builtin_unpkbw (opint, op_const); 7124 7125 case ALPHA_BUILTIN_CTTZ: 7126 return alpha_fold_builtin_cttz (opint, op_const); 7127 case ALPHA_BUILTIN_CTLZ: 7128 return alpha_fold_builtin_ctlz (opint, op_const); 7129 case ALPHA_BUILTIN_CTPOP: 7130 return alpha_fold_builtin_ctpop (opint, op_const); 7131 7132 case ALPHA_BUILTIN_AMASK: 7133 case ALPHA_BUILTIN_IMPLVER: 7134 case ALPHA_BUILTIN_RPCC: 7135 /* None of these are foldable at compile-time. */ 7136 default: 7137 return NULL; 7138 } 7139} 7140 7141bool 7142alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi) 7143{ 7144 bool changed = false; 7145 gimple *stmt = gsi_stmt (*gsi); 7146 tree call = gimple_call_fn (stmt); 7147 gimple *new_stmt = NULL; 7148 7149 if (call) 7150 { 7151 tree fndecl = gimple_call_fndecl (stmt); 7152 7153 if (fndecl) 7154 { 7155 tree arg0, arg1; 7156 7157 switch (DECL_MD_FUNCTION_CODE (fndecl)) 7158 { 7159 case ALPHA_BUILTIN_UMULH: 7160 arg0 = gimple_call_arg (stmt, 0); 7161 arg1 = gimple_call_arg (stmt, 1); 7162 7163 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), 7164 MULT_HIGHPART_EXPR, arg0, arg1); 7165 break; 7166 default: 7167 break; 7168 } 7169 } 7170 } 7171 7172 if (new_stmt) 7173 { 7174 gsi_replace (gsi, new_stmt, true); 7175 changed = true; 7176 } 7177 7178 return changed; 7179} 7180 7181/* This page contains routines that are used to determine what the function 7182 prologue and epilogue code will do and write them out. */ 7183 7184/* Compute the size of the save area in the stack. */ 7185 7186/* These variables are used for communication between the following functions. 7187 They indicate various things about the current function being compiled 7188 that are used to tell what kind of prologue, epilogue and procedure 7189 descriptor to generate. */ 7190 7191/* Nonzero if we need a stack procedure. */ 7192enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2}; 7193static enum alpha_procedure_types alpha_procedure_type; 7194 7195/* Register number (either FP or SP) that is used to unwind the frame. */ 7196static int vms_unwind_regno; 7197 7198/* Register number used to save FP. We need not have one for RA since 7199 we don't modify it for register procedures. This is only defined 7200 for register frame procedures. */ 7201static int vms_save_fp_regno; 7202 7203/* Register number used to reference objects off our PV. */ 7204static int vms_base_regno; 7205 7206/* Compute register masks for saved registers, register save area size, 7207 and total frame size. */ 7208static void 7209alpha_compute_frame_layout (void) 7210{ 7211 unsigned HOST_WIDE_INT sa_mask = 0; 7212 HOST_WIDE_INT frame_size; 7213 int sa_size; 7214 7215 /* When outputting a thunk, we don't have valid register life info, 7216 but assemble_start_function wants to output .frame and .mask 7217 directives. */ 7218 if (!cfun->is_thunk) 7219 { 7220 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) 7221 sa_mask |= HOST_WIDE_INT_1U << HARD_FRAME_POINTER_REGNUM; 7222 7223 /* One for every register we have to save. */ 7224 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++) 7225 if (! call_used_or_fixed_reg_p (i) 7226 && df_regs_ever_live_p (i) && i != REG_RA) 7227 sa_mask |= HOST_WIDE_INT_1U << i; 7228 7229 /* We need to restore these for the handler. */ 7230 if (crtl->calls_eh_return) 7231 { 7232 for (unsigned i = 0; ; ++i) 7233 { 7234 unsigned regno = EH_RETURN_DATA_REGNO (i); 7235 if (regno == INVALID_REGNUM) 7236 break; 7237 sa_mask |= HOST_WIDE_INT_1U << regno; 7238 } 7239 } 7240 7241 /* If any register spilled, then spill the return address also. */ 7242 /* ??? This is required by the Digital stack unwind specification 7243 and isn't needed if we're doing Dwarf2 unwinding. */ 7244 if (sa_mask || alpha_ra_ever_killed ()) 7245 sa_mask |= HOST_WIDE_INT_1U << REG_RA; 7246 } 7247 7248 sa_size = popcount_hwi(sa_mask); 7249 frame_size = get_frame_size (); 7250 7251 if (TARGET_ABI_OPEN_VMS) 7252 { 7253 /* Start with a stack procedure if we make any calls (REG_RA used), or 7254 need a frame pointer, with a register procedure if we otherwise need 7255 at least a slot, and with a null procedure in other cases. */ 7256 if ((sa_mask >> REG_RA) & 1 || frame_pointer_needed) 7257 alpha_procedure_type = PT_STACK; 7258 else if (frame_size != 0) 7259 alpha_procedure_type = PT_REGISTER; 7260 else 7261 alpha_procedure_type = PT_NULL; 7262 7263 /* Don't reserve space for saving FP & RA yet. Do that later after we've 7264 made the final decision on stack procedure vs register procedure. */ 7265 if (alpha_procedure_type == PT_STACK) 7266 sa_size -= 2; 7267 7268 /* Decide whether to refer to objects off our PV via FP or PV. 7269 If we need FP for something else or if we receive a nonlocal 7270 goto (which expects PV to contain the value), we must use PV. 7271 Otherwise, start by assuming we can use FP. */ 7272 7273 vms_base_regno 7274 = (frame_pointer_needed 7275 || cfun->has_nonlocal_label 7276 || alpha_procedure_type == PT_STACK 7277 || crtl->outgoing_args_size) 7278 ? REG_PV : HARD_FRAME_POINTER_REGNUM; 7279 7280 /* If we want to copy PV into FP, we need to find some register 7281 in which to save FP. */ 7282 vms_save_fp_regno = -1; 7283 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM) 7284 for (unsigned i = 0; i < 32; i++) 7285 if (! fixed_regs[i] && call_used_or_fixed_reg_p (i) 7286 && ! df_regs_ever_live_p (i)) 7287 { 7288 vms_save_fp_regno = i; 7289 break; 7290 } 7291 7292 /* A VMS condition handler requires a stack procedure in our 7293 implementation. (not required by the calling standard). */ 7294 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER) 7295 || cfun->machine->uses_condition_handler) 7296 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK; 7297 else if (alpha_procedure_type == PT_NULL) 7298 vms_base_regno = REG_PV; 7299 7300 /* Stack unwinding should be done via FP unless we use it for PV. */ 7301 vms_unwind_regno = (vms_base_regno == REG_PV 7302 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM); 7303 7304 /* If this is a stack procedure, allow space for saving FP, RA and 7305 a condition handler slot if needed. */ 7306 if (alpha_procedure_type == PT_STACK) 7307 sa_size += 2 + cfun->machine->uses_condition_handler; 7308 } 7309 else 7310 { 7311 /* Our size must be even (multiple of 16 bytes). */ 7312 if (sa_size & 1) 7313 sa_size++; 7314 } 7315 sa_size *= 8; 7316 7317 if (TARGET_ABI_OPEN_VMS) 7318 frame_size = ALPHA_ROUND (sa_size 7319 + (alpha_procedure_type == PT_STACK ? 8 : 0) 7320 + frame_size 7321 + crtl->args.pretend_args_size); 7322 else 7323 frame_size = (ALPHA_ROUND (crtl->outgoing_args_size) 7324 + sa_size 7325 + ALPHA_ROUND (frame_size + crtl->args.pretend_args_size)); 7326 7327 cfun->machine->sa_mask = sa_mask; 7328 cfun->machine->sa_size = sa_size; 7329 cfun->machine->frame_size = frame_size; 7330} 7331 7332#undef TARGET_COMPUTE_FRAME_LAYOUT 7333#define TARGET_COMPUTE_FRAME_LAYOUT alpha_compute_frame_layout 7334 7335/* Return 1 if this function can directly return via $26. */ 7336 7337bool 7338direct_return (void) 7339{ 7340 return (TARGET_ABI_OSF 7341 && reload_completed 7342 && cfun->machine->frame_size == 0); 7343} 7344 7345/* Define the offset between two registers, one to be eliminated, 7346 and the other its replacement, at the start of a routine. */ 7347 7348HOST_WIDE_INT 7349alpha_initial_elimination_offset (unsigned int from, 7350 unsigned int to ATTRIBUTE_UNUSED) 7351{ 7352 HOST_WIDE_INT ret; 7353 7354 ret = cfun->machine->sa_size; 7355 ret += ALPHA_ROUND (crtl->outgoing_args_size); 7356 7357 switch (from) 7358 { 7359 case FRAME_POINTER_REGNUM: 7360 break; 7361 7362 case ARG_POINTER_REGNUM: 7363 ret += (ALPHA_ROUND (get_frame_size () 7364 + crtl->args.pretend_args_size) 7365 - crtl->args.pretend_args_size); 7366 break; 7367 7368 default: 7369 gcc_unreachable (); 7370 } 7371 7372 return ret; 7373} 7374 7375#if TARGET_ABI_OPEN_VMS 7376 7377/* Worker function for TARGET_CAN_ELIMINATE. */ 7378 7379static bool 7380alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 7381{ 7382 switch (alpha_procedure_type) 7383 { 7384 case PT_NULL: 7385 /* NULL procedures have no frame of their own and we only 7386 know how to resolve from the current stack pointer. */ 7387 return to == STACK_POINTER_REGNUM; 7388 7389 case PT_REGISTER: 7390 case PT_STACK: 7391 /* We always eliminate except to the stack pointer if there is no 7392 usable frame pointer at hand. */ 7393 return (to != STACK_POINTER_REGNUM 7394 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM); 7395 } 7396 7397 gcc_unreachable (); 7398} 7399 7400/* FROM is to be eliminated for TO. Return the offset so that TO+offset 7401 designates the same location as FROM. */ 7402 7403HOST_WIDE_INT 7404alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to) 7405{ 7406 /* The only possible attempts we ever expect are ARG or FRAME_PTR to 7407 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide 7408 on the proper computations and will need the register save area size 7409 in most cases. */ 7410 7411 HOST_WIDE_INT sa_size = cfun->machine->sa_size; 7412 7413 /* PT_NULL procedures have no frame of their own and we only allow 7414 elimination to the stack pointer. This is the argument pointer and we 7415 resolve the soft frame pointer to that as well. */ 7416 7417 if (alpha_procedure_type == PT_NULL) 7418 return 0; 7419 7420 /* For a PT_STACK procedure the frame layout looks as follows 7421 7422 -----> decreasing addresses 7423 7424 < size rounded up to 16 | likewise > 7425 --------------#------------------------------+++--------------+++-------# 7426 incoming args # pretended args | "frame" | regs sa | PV | outgoing args # 7427 --------------#---------------------------------------------------------# 7428 ^ ^ ^ ^ 7429 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR 7430 7431 7432 PT_REGISTER procedures are similar in that they may have a frame of their 7433 own. They have no regs-sa/pv/outgoing-args area. 7434 7435 We first compute offset to HARD_FRAME_PTR, then add what we need to get 7436 to STACK_PTR if need be. */ 7437 7438 { 7439 HOST_WIDE_INT offset; 7440 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0; 7441 7442 switch (from) 7443 { 7444 case FRAME_POINTER_REGNUM: 7445 offset = ALPHA_ROUND (sa_size + pv_save_size); 7446 break; 7447 case ARG_POINTER_REGNUM: 7448 offset = (ALPHA_ROUND (sa_size + pv_save_size 7449 + get_frame_size () 7450 + crtl->args.pretend_args_size) 7451 - crtl->args.pretend_args_size); 7452 break; 7453 default: 7454 gcc_unreachable (); 7455 } 7456 7457 if (to == STACK_POINTER_REGNUM) 7458 offset += ALPHA_ROUND (crtl->outgoing_args_size); 7459 7460 return offset; 7461 } 7462} 7463 7464#define COMMON_OBJECT "common_object" 7465 7466static tree 7467common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED, 7468 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED, 7469 bool *no_add_attrs ATTRIBUTE_UNUSED) 7470{ 7471 tree decl = *node; 7472 gcc_assert (DECL_P (decl)); 7473 7474 DECL_COMMON (decl) = 1; 7475 return NULL_TREE; 7476} 7477 7478static const struct attribute_spec vms_attribute_table[] = 7479{ 7480 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, 7481 affects_type_identity, handler, exclude } */ 7482 { COMMON_OBJECT, 0, 1, true, false, false, false, common_object_handler, 7483 NULL }, 7484 { NULL, 0, 0, false, false, false, false, NULL, NULL } 7485}; 7486 7487void 7488vms_output_aligned_decl_common(FILE *file, tree decl, const char *name, 7489 unsigned HOST_WIDE_INT size, 7490 unsigned int align) 7491{ 7492 tree attr = DECL_ATTRIBUTES (decl); 7493 fprintf (file, "%s", COMMON_ASM_OP); 7494 assemble_name (file, name); 7495 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size); 7496 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */ 7497 fprintf (file, ",%u", align / BITS_PER_UNIT); 7498 if (attr) 7499 { 7500 attr = lookup_attribute (COMMON_OBJECT, attr); 7501 if (attr) 7502 fprintf (file, ",%s", 7503 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr)))); 7504 } 7505 fputc ('\n', file); 7506} 7507 7508#undef COMMON_OBJECT 7509 7510#endif 7511 7512bool 7513alpha_find_lo_sum_using_gp (rtx insn) 7514{ 7515 subrtx_iterator::array_type array; 7516 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 7517 { 7518 const_rtx x = *iter; 7519 if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx) 7520 return true; 7521 } 7522 return false; 7523} 7524 7525static int 7526alpha_does_function_need_gp (void) 7527{ 7528 rtx_insn *insn; 7529 7530 /* The GP being variable is an OSF abi thing. */ 7531 if (! TARGET_ABI_OSF) 7532 return 0; 7533 7534 /* We need the gp to load the address of __mcount. */ 7535 if (TARGET_PROFILING_NEEDS_GP && crtl->profile) 7536 return 1; 7537 7538 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */ 7539 if (cfun->is_thunk) 7540 return 1; 7541 7542 /* The nonlocal receiver pattern assumes that the gp is valid for 7543 the nested function. Reasonable because it's almost always set 7544 correctly already. For the cases where that's wrong, make sure 7545 the nested function loads its gp on entry. */ 7546 if (crtl->has_nonlocal_goto) 7547 return 1; 7548 7549 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first. 7550 Even if we are a static function, we still need to do this in case 7551 our address is taken and passed to something like qsort. */ 7552 7553 push_topmost_sequence (); 7554 insn = get_insns (); 7555 pop_topmost_sequence (); 7556 7557 for (; insn; insn = NEXT_INSN (insn)) 7558 if (NONDEBUG_INSN_P (insn) 7559 && GET_CODE (PATTERN (insn)) != USE 7560 && GET_CODE (PATTERN (insn)) != CLOBBER 7561 && get_attr_usegp (insn)) 7562 return 1; 7563 7564 return 0; 7565} 7566 7567/* Helper function for alpha_store_data_bypass_p, handle just a single SET 7568 IN_SET. */ 7569 7570static bool 7571alpha_store_data_bypass_p_1 (rtx_insn *out_insn, rtx in_set) 7572{ 7573 if (!MEM_P (SET_DEST (in_set))) 7574 return false; 7575 7576 rtx out_set = single_set (out_insn); 7577 if (out_set) 7578 return !reg_mentioned_p (SET_DEST (out_set), SET_DEST (in_set)); 7579 7580 rtx out_pat = PATTERN (out_insn); 7581 if (GET_CODE (out_pat) != PARALLEL) 7582 return false; 7583 7584 for (int i = 0; i < XVECLEN (out_pat, 0); i++) 7585 { 7586 rtx out_exp = XVECEXP (out_pat, 0, i); 7587 7588 if (GET_CODE (out_exp) == CLOBBER || GET_CODE (out_exp) == USE 7589 || GET_CODE (out_exp) == TRAP_IF) 7590 continue; 7591 7592 gcc_assert (GET_CODE (out_exp) == SET); 7593 7594 if (reg_mentioned_p (SET_DEST (out_exp), SET_DEST (in_set))) 7595 return false; 7596 } 7597 7598 return true; 7599} 7600 7601/* True if the dependency between OUT_INSN and IN_INSN is on the store 7602 data not the address operand(s) of the store. IN_INSN and OUT_INSN 7603 must be either a single_set or a PARALLEL with SETs inside. 7604 7605 This alpha-specific version of store_data_bypass_p ignores TRAP_IF 7606 that would result in assertion failure (and internal compiler error) 7607 in the generic store_data_bypass_p function. */ 7608 7609int 7610alpha_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) 7611{ 7612 rtx in_set = single_set (in_insn); 7613 if (in_set) 7614 return alpha_store_data_bypass_p_1 (out_insn, in_set); 7615 7616 rtx in_pat = PATTERN (in_insn); 7617 if (GET_CODE (in_pat) != PARALLEL) 7618 return false; 7619 7620 for (int i = 0; i < XVECLEN (in_pat, 0); i++) 7621 { 7622 rtx in_exp = XVECEXP (in_pat, 0, i); 7623 7624 if (GET_CODE (in_exp) == CLOBBER || GET_CODE (in_exp) == USE 7625 || GET_CODE (in_exp) == TRAP_IF) 7626 continue; 7627 7628 gcc_assert (GET_CODE (in_exp) == SET); 7629 7630 if (!alpha_store_data_bypass_p_1 (out_insn, in_exp)) 7631 return false; 7632 } 7633 7634 return true; 7635} 7636 7637/* Helper function to set RTX_FRAME_RELATED_P on instructions, including 7638 sequences. */ 7639 7640static rtx_insn * 7641set_frame_related_p (void) 7642{ 7643 rtx_insn *seq = get_insns (); 7644 rtx_insn *insn; 7645 7646 end_sequence (); 7647 7648 if (!seq) 7649 return NULL; 7650 7651 if (INSN_P (seq)) 7652 { 7653 insn = seq; 7654 while (insn != NULL_RTX) 7655 { 7656 RTX_FRAME_RELATED_P (insn) = 1; 7657 insn = NEXT_INSN (insn); 7658 } 7659 seq = emit_insn (seq); 7660 } 7661 else 7662 { 7663 seq = emit_insn (seq); 7664 RTX_FRAME_RELATED_P (seq) = 1; 7665 } 7666 return seq; 7667} 7668 7669#define FRP(exp) (start_sequence (), exp, set_frame_related_p ()) 7670 7671/* Generates a store with the proper unwind info attached. VALUE is 7672 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG 7673 contains SP+FRAME_BIAS, and that is the unwind info that should be 7674 generated. If FRAME_REG != VALUE, then VALUE is being stored on 7675 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */ 7676 7677static void 7678emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias, 7679 HOST_WIDE_INT base_ofs, rtx frame_reg) 7680{ 7681 rtx addr, mem; 7682 rtx_insn *insn; 7683 7684 addr = plus_constant (Pmode, base_reg, base_ofs); 7685 mem = gen_frame_mem (DImode, addr); 7686 7687 insn = emit_move_insn (mem, value); 7688 RTX_FRAME_RELATED_P (insn) = 1; 7689 7690 if (frame_bias || value != frame_reg) 7691 { 7692 if (frame_bias) 7693 { 7694 addr = plus_constant (Pmode, stack_pointer_rtx, 7695 frame_bias + base_ofs); 7696 mem = gen_rtx_MEM (DImode, addr); 7697 } 7698 7699 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 7700 gen_rtx_SET (mem, frame_reg)); 7701 } 7702} 7703 7704static void 7705emit_frame_store (unsigned int regno, rtx base_reg, 7706 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs) 7707{ 7708 rtx reg = gen_rtx_REG (DImode, regno); 7709 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg); 7710} 7711 7712/* Write function prologue. */ 7713 7714/* On vms we have two kinds of functions: 7715 7716 - stack frame (PROC_STACK) 7717 these are 'normal' functions with local vars and which are 7718 calling other functions 7719 - register frame (PROC_REGISTER) 7720 keeps all data in registers, needs no stack 7721 7722 We must pass this to the assembler so it can generate the 7723 proper pdsc (procedure descriptor) 7724 This is done with the '.pdesc' command. 7725 7726 On not-vms, we don't really differentiate between the two, as we can 7727 simply allocate stack without saving registers. */ 7728 7729void 7730alpha_expand_prologue (void) 7731{ 7732 /* Registers to save. */ 7733 unsigned HOST_WIDE_INT sa_mask = cfun->machine->sa_mask; 7734 /* Stack space needed for pushing registers clobbered by us. */ 7735 HOST_WIDE_INT sa_size = cfun->machine->sa_size; 7736 /* Complete stack size needed. */ 7737 HOST_WIDE_INT frame_size = cfun->machine->frame_size; 7738 /* Probed stack size; it additionally includes the size of 7739 the "reserve region" if any. */ 7740 HOST_WIDE_INT probed_size, sa_bias; 7741 /* Offset from base reg to register save area. */ 7742 HOST_WIDE_INT reg_offset; 7743 rtx sa_reg; 7744 7745 if (flag_stack_usage_info) 7746 current_function_static_stack_size = frame_size; 7747 7748 if (TARGET_ABI_OPEN_VMS) 7749 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; 7750 else 7751 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); 7752 7753 /* Emit an insn to reload GP, if needed. */ 7754 if (TARGET_ABI_OSF) 7755 { 7756 alpha_function_needs_gp = alpha_does_function_need_gp (); 7757 if (alpha_function_needs_gp) 7758 emit_insn (gen_prologue_ldgp ()); 7759 } 7760 7761 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert 7762 the call to mcount ourselves, rather than having the linker do it 7763 magically in response to -pg. Since _mcount has special linkage, 7764 don't represent the call as a call. */ 7765 if (TARGET_PROFILING_NEEDS_GP && crtl->profile) 7766 emit_insn (gen_prologue_mcount ()); 7767 7768 /* Adjust the stack by the frame size. If the frame size is > 4096 7769 bytes, we need to be sure we probe somewhere in the first and last 7770 4096 bytes (we can probably get away without the latter test) and 7771 every 8192 bytes in between. If the frame size is > 32768, we 7772 do this in a loop. Otherwise, we generate the explicit probe 7773 instructions. 7774 7775 Note that we are only allowed to adjust sp once in the prologue. */ 7776 7777 probed_size = frame_size; 7778 if (flag_stack_check || flag_stack_clash_protection) 7779 probed_size += get_stack_check_protect (); 7780 7781 if (probed_size <= 32768) 7782 { 7783 if (probed_size > 4096) 7784 { 7785 int probed; 7786 7787 for (probed = 4096; probed < probed_size; probed += 8192) 7788 emit_insn (gen_stack_probe_internal (GEN_INT (-probed))); 7789 7790 /* We only have to do this probe if we aren't saving registers or 7791 if we are probing beyond the frame because of -fstack-check. */ 7792 if ((sa_size == 0 && probed_size > probed - 4096) 7793 || flag_stack_check || flag_stack_clash_protection) 7794 emit_insn (gen_stack_probe_internal (GEN_INT (-probed_size))); 7795 } 7796 7797 if (frame_size != 0) 7798 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, 7799 GEN_INT (-frame_size)))); 7800 } 7801 else 7802 { 7803 /* Here we generate code to set R22 to SP + 4096 and set R23 to the 7804 number of 8192 byte blocks to probe. We then probe each block 7805 in the loop and then set SP to the proper location. If the 7806 amount remaining is > 4096, we have to do one more probe if we 7807 are not saving any registers or if we are probing beyond the 7808 frame because of -fstack-check. */ 7809 7810 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192; 7811 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192; 7812 rtx ptr = gen_rtx_REG (DImode, 22); 7813 rtx count = gen_rtx_REG (DImode, 23); 7814 rtx seq; 7815 7816 emit_move_insn (count, GEN_INT (blocks)); 7817 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096))); 7818 7819 /* Because of the difficulty in emitting a new basic block this 7820 late in the compilation, generate the loop as a single insn. */ 7821 emit_insn (gen_prologue_stack_probe_loop (count, ptr)); 7822 7823 if ((leftover > 4096 && sa_size == 0) 7824 || flag_stack_check || flag_stack_clash_protection) 7825 { 7826 rtx last = gen_rtx_MEM (DImode, 7827 plus_constant (Pmode, ptr, -leftover)); 7828 MEM_VOLATILE_P (last) = 1; 7829 emit_move_insn (last, const0_rtx); 7830 } 7831 7832 if (flag_stack_check || flag_stack_clash_protection) 7833 { 7834 /* If -fstack-check is specified we have to load the entire 7835 constant into a register and subtract from the sp in one go, 7836 because the probed stack size is not equal to the frame size. */ 7837 HOST_WIDE_INT lo, hi; 7838 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; 7839 hi = frame_size - lo; 7840 7841 emit_move_insn (ptr, GEN_INT (hi)); 7842 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo))); 7843 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, 7844 ptr)); 7845 } 7846 else 7847 { 7848 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr, 7849 GEN_INT (-leftover))); 7850 } 7851 7852 /* This alternative is special, because the DWARF code cannot 7853 possibly intuit through the loop above. So we invent this 7854 note it looks at instead. */ 7855 RTX_FRAME_RELATED_P (seq) = 1; 7856 add_reg_note (seq, REG_FRAME_RELATED_EXPR, 7857 gen_rtx_SET (stack_pointer_rtx, 7858 plus_constant (Pmode, stack_pointer_rtx, 7859 -frame_size))); 7860 } 7861 7862 /* Cope with very large offsets to the register save area. */ 7863 sa_bias = 0; 7864 sa_reg = stack_pointer_rtx; 7865 if (reg_offset + sa_size > 0x8000) 7866 { 7867 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; 7868 rtx sa_bias_rtx; 7869 7870 if (low + sa_size <= 0x8000) 7871 sa_bias = reg_offset - low, reg_offset = low; 7872 else 7873 sa_bias = reg_offset, reg_offset = 0; 7874 7875 sa_reg = gen_rtx_REG (DImode, 24); 7876 sa_bias_rtx = GEN_INT (sa_bias); 7877 7878 if (add_operand (sa_bias_rtx, DImode)) 7879 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx)); 7880 else 7881 { 7882 emit_move_insn (sa_reg, sa_bias_rtx); 7883 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg)); 7884 } 7885 } 7886 7887 /* Save regs in stack order. Beginning with VMS PV. */ 7888 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) 7889 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0); 7890 7891 /* Save register RA next, followed by any other registers 7892 that need to be saved. */ 7893 for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi(sa_mask)) 7894 { 7895 emit_frame_store (i, sa_reg, sa_bias, reg_offset); 7896 reg_offset += 8; 7897 sa_mask &= ~(HOST_WIDE_INT_1U << i); 7898 } 7899 7900 if (TARGET_ABI_OPEN_VMS) 7901 { 7902 /* Register frame procedures save the fp. */ 7903 if (alpha_procedure_type == PT_REGISTER) 7904 { 7905 rtx_insn *insn = 7906 emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno), 7907 hard_frame_pointer_rtx); 7908 add_reg_note (insn, REG_CFA_REGISTER, NULL); 7909 RTX_FRAME_RELATED_P (insn) = 1; 7910 } 7911 7912 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV) 7913 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno), 7914 gen_rtx_REG (DImode, REG_PV))); 7915 7916 if (alpha_procedure_type != PT_NULL 7917 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM) 7918 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)); 7919 7920 /* If we have to allocate space for outgoing args, do it now. */ 7921 if (crtl->outgoing_args_size != 0) 7922 { 7923 rtx_insn *seq 7924 = emit_move_insn (stack_pointer_rtx, 7925 plus_constant 7926 (Pmode, hard_frame_pointer_rtx, 7927 - (ALPHA_ROUND 7928 (crtl->outgoing_args_size)))); 7929 7930 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted 7931 if ! frame_pointer_needed. Setting the bit will change the CFA 7932 computation rule to use sp again, which would be wrong if we had 7933 frame_pointer_needed, as this means sp might move unpredictably 7934 later on. 7935 7936 Also, note that 7937 frame_pointer_needed 7938 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM 7939 and 7940 crtl->outgoing_args_size != 0 7941 => alpha_procedure_type != PT_NULL, 7942 7943 so when we are not setting the bit here, we are guaranteed to 7944 have emitted an FRP frame pointer update just before. */ 7945 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed; 7946 } 7947 } 7948 else 7949 { 7950 /* If we need a frame pointer, set it from the stack pointer. */ 7951 if (frame_pointer_needed) 7952 { 7953 if (TARGET_CAN_FAULT_IN_PROLOGUE) 7954 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)); 7955 else 7956 /* This must always be the last instruction in the 7957 prologue, thus we emit a special move + clobber. */ 7958 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx, 7959 stack_pointer_rtx, sa_reg))); 7960 } 7961 } 7962 7963 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into 7964 the prologue, for exception handling reasons, we cannot do this for 7965 any insn that might fault. We could prevent this for mems with a 7966 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we 7967 have to prevent all such scheduling with a blockage. 7968 7969 Linux, on the other hand, never bothered to implement OSF/1's 7970 exception handling, and so doesn't care about such things. Anyone 7971 planning to use dwarf2 frame-unwind info can also omit the blockage. */ 7972 7973 if (! TARGET_CAN_FAULT_IN_PROLOGUE) 7974 emit_insn (gen_blockage ()); 7975} 7976 7977/* Count the number of .file directives, so that .loc is up to date. */ 7978int num_source_filenames = 0; 7979 7980/* Output the textual info surrounding the prologue. */ 7981 7982void 7983alpha_start_function (FILE *file, const char *fnname, 7984 tree decl ATTRIBUTE_UNUSED) 7985{ 7986 unsigned long imask, fmask; 7987 /* Complete stack size needed. */ 7988 HOST_WIDE_INT frame_size = cfun->machine->frame_size; 7989 /* The maximum debuggable frame size. */ 7990 const HOST_WIDE_INT max_frame_size = HOST_WIDE_INT_1 << 31; 7991 /* Offset from base reg to register save area. */ 7992 HOST_WIDE_INT reg_offset; 7993 char *entry_label = (char *) alloca (strlen (fnname) + 6); 7994 char *tramp_label = (char *) alloca (strlen (fnname) + 6); 7995 int i; 7996 7997#if TARGET_ABI_OPEN_VMS 7998 vms_start_function (fnname); 7999#endif 8000 8001 alpha_fnname = fnname; 8002 8003 if (TARGET_ABI_OPEN_VMS) 8004 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; 8005 else 8006 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); 8007 8008 imask = cfun->machine->sa_mask & 0xffffffffu; 8009 fmask = cfun->machine->sa_mask >> 32; 8010 8011 /* Issue function start and label. */ 8012 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive) 8013 { 8014 fputs ("\t.ent ", file); 8015 assemble_name (file, fnname); 8016 putc ('\n', file); 8017 8018 /* If the function needs GP, we'll write the "..ng" label there. 8019 Otherwise, do it here. */ 8020 if (TARGET_ABI_OSF 8021 && ! alpha_function_needs_gp 8022 && ! cfun->is_thunk) 8023 { 8024 putc ('$', file); 8025 assemble_name (file, fnname); 8026 fputs ("..ng:\n", file); 8027 } 8028 } 8029 /* Nested functions on VMS that are potentially called via trampoline 8030 get a special transfer entry point that loads the called functions 8031 procedure descriptor and static chain. */ 8032 if (TARGET_ABI_OPEN_VMS 8033 && !TREE_PUBLIC (decl) 8034 && DECL_CONTEXT (decl) 8035 && !TYPE_P (DECL_CONTEXT (decl)) 8036 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL) 8037 { 8038 strcpy (tramp_label, fnname); 8039 strcat (tramp_label, "..tr"); 8040 ASM_OUTPUT_LABEL (file, tramp_label); 8041 fprintf (file, "\tldq $1,24($27)\n"); 8042 fprintf (file, "\tldq $27,16($27)\n"); 8043 } 8044 8045 strcpy (entry_label, fnname); 8046 if (TARGET_ABI_OPEN_VMS) 8047 strcat (entry_label, "..en"); 8048 8049 ASM_OUTPUT_LABEL (file, entry_label); 8050 inside_function = TRUE; 8051 8052 if (TARGET_ABI_OPEN_VMS) 8053 fprintf (file, "\t.base $%d\n", vms_base_regno); 8054 8055 if (TARGET_ABI_OSF 8056 && TARGET_IEEE_CONFORMANT 8057 && !flag_inhibit_size_directive) 8058 { 8059 /* Set flags in procedure descriptor to request IEEE-conformant 8060 math-library routines. The value we set it to is PDSC_EXC_IEEE 8061 (/usr/include/pdsc.h). */ 8062 fputs ("\t.eflag 48\n", file); 8063 } 8064 8065 /* Set up offsets to alpha virtual arg/local debugging pointer. */ 8066 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size; 8067 alpha_arg_offset = -frame_size + 48; 8068 8069 /* Describe our frame. If the frame size is larger than an integer, 8070 print it as zero to avoid an assembler error. We won't be 8071 properly describing such a frame, but that's the best we can do. */ 8072 if (TARGET_ABI_OPEN_VMS) 8073 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26," 8074 HOST_WIDE_INT_PRINT_DEC "\n", 8075 vms_unwind_regno, 8076 frame_size >= max_frame_size ? 0 : frame_size, 8077 reg_offset); 8078 else if (!flag_inhibit_size_directive) 8079 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n", 8080 (frame_pointer_needed 8081 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM), 8082 frame_size >= max_frame_size ? 0 : frame_size, 8083 crtl->args.pretend_args_size); 8084 8085 /* Describe which registers were spilled. */ 8086 if (TARGET_ABI_OPEN_VMS) 8087 { 8088 if (imask) 8089 /* ??? Does VMS care if mask contains ra? The old code didn't 8090 set it, so I don't here. */ 8091 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA)); 8092 if (fmask) 8093 fprintf (file, "\t.fmask 0x%lx,0\n", fmask); 8094 if (alpha_procedure_type == PT_REGISTER) 8095 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno); 8096 } 8097 else if (!flag_inhibit_size_directive) 8098 { 8099 if (imask) 8100 { 8101 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask, 8102 frame_size >= max_frame_size ? 0 : reg_offset - frame_size); 8103 8104 for (i = 0; i < 32; ++i) 8105 if (imask & (1UL << i)) 8106 reg_offset += 8; 8107 } 8108 8109 if (fmask) 8110 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask, 8111 frame_size >= max_frame_size ? 0 : reg_offset - frame_size); 8112 } 8113 8114#if TARGET_ABI_OPEN_VMS 8115 /* If a user condition handler has been installed at some point, emit 8116 the procedure descriptor bits to point the Condition Handling Facility 8117 at the indirection wrapper, and state the fp offset at which the user 8118 handler may be found. */ 8119 if (cfun->machine->uses_condition_handler) 8120 { 8121 fprintf (file, "\t.handler __gcc_shell_handler\n"); 8122 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET); 8123 } 8124 8125#ifdef TARGET_VMS_CRASH_DEBUG 8126 /* Support of minimal traceback info. */ 8127 switch_to_section (readonly_data_section); 8128 fprintf (file, "\t.align 3\n"); 8129 assemble_name (file, fnname); fputs ("..na:\n", file); 8130 fputs ("\t.ascii \"", file); 8131 assemble_name (file, fnname); 8132 fputs ("\\0\"\n", file); 8133 switch_to_section (text_section); 8134#endif 8135#endif /* TARGET_ABI_OPEN_VMS */ 8136} 8137 8138/* Emit the .prologue note at the scheduled end of the prologue. */ 8139 8140static void 8141alpha_output_function_end_prologue (FILE *file) 8142{ 8143 if (TARGET_ABI_OPEN_VMS) 8144 fputs ("\t.prologue\n", file); 8145 else if (!flag_inhibit_size_directive) 8146 fprintf (file, "\t.prologue %d\n", 8147 alpha_function_needs_gp || cfun->is_thunk); 8148} 8149 8150/* Write function epilogue. */ 8151 8152void 8153alpha_expand_epilogue (void) 8154{ 8155 /* Registers to save. */ 8156 unsigned HOST_WIDE_INT sa_mask = cfun->machine->sa_mask; 8157 /* Stack space needed for pushing registers clobbered by us. */ 8158 HOST_WIDE_INT sa_size = cfun->machine->sa_size; 8159 /* Complete stack size needed. */ 8160 HOST_WIDE_INT frame_size = cfun->machine->frame_size; 8161 /* Offset from base reg to register save area. */ 8162 HOST_WIDE_INT reg_offset; 8163 int fp_is_frame_pointer, fp_offset; 8164 rtx sa_reg, sa_reg_exp = NULL; 8165 rtx sp_adj1, sp_adj2, mem, reg, insn; 8166 rtx eh_ofs; 8167 rtx cfa_restores = NULL_RTX; 8168 8169 if (TARGET_ABI_OPEN_VMS) 8170 { 8171 if (alpha_procedure_type == PT_STACK) 8172 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; 8173 else 8174 reg_offset = 0; 8175 } 8176 else 8177 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); 8178 8179 fp_is_frame_pointer 8180 = (TARGET_ABI_OPEN_VMS 8181 ? alpha_procedure_type == PT_STACK 8182 : frame_pointer_needed); 8183 fp_offset = 0; 8184 sa_reg = stack_pointer_rtx; 8185 8186 if (crtl->calls_eh_return) 8187 eh_ofs = EH_RETURN_STACKADJ_RTX; 8188 else 8189 eh_ofs = NULL_RTX; 8190 8191 if (sa_size) 8192 { 8193 /* If we have a frame pointer, restore SP from it. */ 8194 if (TARGET_ABI_OPEN_VMS 8195 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM 8196 : frame_pointer_needed) 8197 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); 8198 8199 /* Cope with very large offsets to the register save area. */ 8200 if (reg_offset + sa_size > 0x8000) 8201 { 8202 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; 8203 HOST_WIDE_INT bias; 8204 8205 if (low + sa_size <= 0x8000) 8206 bias = reg_offset - low, reg_offset = low; 8207 else 8208 bias = reg_offset, reg_offset = 0; 8209 8210 sa_reg = gen_rtx_REG (DImode, 22); 8211 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias); 8212 8213 emit_move_insn (sa_reg, sa_reg_exp); 8214 } 8215 8216 /* Restore registers in order, excepting a true frame pointer. */ 8217 for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi(sa_mask)) 8218 { 8219 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer) 8220 fp_offset = reg_offset; 8221 else 8222 { 8223 mem = gen_frame_mem (DImode, 8224 plus_constant (Pmode, sa_reg, 8225 reg_offset)); 8226 reg = gen_rtx_REG (DImode, i); 8227 emit_move_insn (reg, mem); 8228 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, 8229 cfa_restores); 8230 } 8231 reg_offset += 8; 8232 sa_mask &= ~(HOST_WIDE_INT_1U << i); 8233 } 8234 } 8235 8236 if (frame_size || eh_ofs) 8237 { 8238 sp_adj1 = stack_pointer_rtx; 8239 8240 if (eh_ofs) 8241 { 8242 sp_adj1 = gen_rtx_REG (DImode, 23); 8243 emit_move_insn (sp_adj1, 8244 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs)); 8245 } 8246 8247 /* If the stack size is large, begin computation into a temporary 8248 register so as not to interfere with a potential fp restore, 8249 which must be consecutive with an SP restore. */ 8250 if (frame_size < 32768 && !cfun->calls_alloca) 8251 sp_adj2 = GEN_INT (frame_size); 8252 else if (frame_size < 0x40007fffL) 8253 { 8254 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; 8255 8256 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low); 8257 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2)) 8258 sp_adj1 = sa_reg; 8259 else 8260 { 8261 sp_adj1 = gen_rtx_REG (DImode, 23); 8262 emit_move_insn (sp_adj1, sp_adj2); 8263 } 8264 sp_adj2 = GEN_INT (low); 8265 } 8266 else 8267 { 8268 rtx tmp = gen_rtx_REG (DImode, 23); 8269 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false); 8270 if (!sp_adj2) 8271 { 8272 /* We can't drop new things to memory this late, afaik, 8273 so build it up by pieces. */ 8274 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size); 8275 gcc_assert (sp_adj2); 8276 } 8277 } 8278 8279 /* From now on, things must be in order. So emit blockages. */ 8280 8281 /* Restore the frame pointer. */ 8282 if (fp_is_frame_pointer) 8283 { 8284 emit_insn (gen_blockage ()); 8285 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, 8286 fp_offset)); 8287 emit_move_insn (hard_frame_pointer_rtx, mem); 8288 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, 8289 hard_frame_pointer_rtx, cfa_restores); 8290 } 8291 else if (TARGET_ABI_OPEN_VMS) 8292 { 8293 emit_insn (gen_blockage ()); 8294 emit_move_insn (hard_frame_pointer_rtx, 8295 gen_rtx_REG (DImode, vms_save_fp_regno)); 8296 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, 8297 hard_frame_pointer_rtx, cfa_restores); 8298 } 8299 8300 /* Restore the stack pointer. */ 8301 emit_insn (gen_blockage ()); 8302 if (sp_adj2 == const0_rtx) 8303 insn = emit_move_insn (stack_pointer_rtx, sp_adj1); 8304 else 8305 insn = emit_move_insn (stack_pointer_rtx, 8306 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2)); 8307 REG_NOTES (insn) = cfa_restores; 8308 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx); 8309 RTX_FRAME_RELATED_P (insn) = 1; 8310 } 8311 else 8312 { 8313 gcc_assert (cfa_restores == NULL); 8314 8315 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER) 8316 { 8317 emit_insn (gen_blockage ()); 8318 insn = emit_move_insn (hard_frame_pointer_rtx, 8319 gen_rtx_REG (DImode, vms_save_fp_regno)); 8320 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); 8321 RTX_FRAME_RELATED_P (insn) = 1; 8322 } 8323 } 8324} 8325 8326/* Output the rest of the textual info surrounding the epilogue. */ 8327 8328void 8329alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED) 8330{ 8331 rtx_insn *insn; 8332 8333 /* We output a nop after noreturn calls at the very end of the function to 8334 ensure that the return address always remains in the caller's code range, 8335 as not doing so might confuse unwinding engines. */ 8336 insn = get_last_insn (); 8337 if (!INSN_P (insn)) 8338 insn = prev_active_insn (insn); 8339 if (insn && CALL_P (insn)) 8340 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL); 8341 8342#if TARGET_ABI_OPEN_VMS 8343 /* Write the linkage entries. */ 8344 alpha_write_linkage (file, fnname); 8345#endif 8346 8347 /* End the function. */ 8348 if (TARGET_ABI_OPEN_VMS 8349 || !flag_inhibit_size_directive) 8350 { 8351 fputs ("\t.end ", file); 8352 assemble_name (file, fnname); 8353 putc ('\n', file); 8354 } 8355 inside_function = FALSE; 8356} 8357 8358#if TARGET_ABI_OSF 8359/* Emit a tail call to FUNCTION after adjusting THIS by DELTA. 8360 8361 In order to avoid the hordes of differences between generated code 8362 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating 8363 lots of code loading up large constants, generate rtl and emit it 8364 instead of going straight to text. 8365 8366 Not sure why this idea hasn't been explored before... */ 8367 8368static void 8369alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 8370 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 8371 tree function) 8372{ 8373 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); 8374 HOST_WIDE_INT hi, lo; 8375 rtx this_rtx, funexp; 8376 rtx_insn *insn; 8377 8378 /* We always require a valid GP. */ 8379 emit_insn (gen_prologue_ldgp ()); 8380 emit_note (NOTE_INSN_PROLOGUE_END); 8381 8382 /* Find the "this" pointer. If the function returns a structure, 8383 the structure return pointer is in $16. */ 8384 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 8385 this_rtx = gen_rtx_REG (Pmode, 17); 8386 else 8387 this_rtx = gen_rtx_REG (Pmode, 16); 8388 8389 /* Add DELTA. When possible we use ldah+lda. Otherwise load the 8390 entire constant for the add. */ 8391 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000; 8392 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; 8393 if (hi + lo == delta) 8394 { 8395 if (hi) 8396 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi))); 8397 if (lo) 8398 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo))); 8399 } 8400 else 8401 { 8402 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta); 8403 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); 8404 } 8405 8406 /* Add a delta stored in the vtable at VCALL_OFFSET. */ 8407 if (vcall_offset) 8408 { 8409 rtx tmp, tmp2; 8410 8411 tmp = gen_rtx_REG (Pmode, 0); 8412 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); 8413 8414 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000; 8415 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; 8416 if (hi + lo == vcall_offset) 8417 { 8418 if (hi) 8419 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi))); 8420 } 8421 else 8422 { 8423 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1), 8424 vcall_offset); 8425 emit_insn (gen_adddi3 (tmp, tmp, tmp2)); 8426 lo = 0; 8427 } 8428 if (lo) 8429 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo)); 8430 else 8431 tmp2 = tmp; 8432 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2)); 8433 8434 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); 8435 } 8436 8437 /* Generate a tail call to the target function. */ 8438 if (! TREE_USED (function)) 8439 { 8440 assemble_external (function); 8441 TREE_USED (function) = 1; 8442 } 8443 funexp = XEXP (DECL_RTL (function), 0); 8444 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 8445 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx)); 8446 SIBLING_CALL_P (insn) = 1; 8447 8448 /* Run just enough of rest_of_compilation to get the insns emitted. 8449 There's not really enough bulk here to make other passes such as 8450 instruction scheduling worth while. */ 8451 insn = get_insns (); 8452 shorten_branches (insn); 8453 assemble_start_function (thunk_fndecl, fnname); 8454 final_start_function (insn, file, 1); 8455 final (insn, file, 1); 8456 final_end_function (); 8457 assemble_end_function (thunk_fndecl, fnname); 8458} 8459#endif /* TARGET_ABI_OSF */ 8460 8461/* Debugging support. */ 8462 8463#include "gstab.h" 8464 8465/* Name of the file containing the current function. */ 8466 8467static const char *current_function_file = ""; 8468 8469/* Offsets to alpha virtual arg/local debugging pointers. */ 8470 8471long alpha_arg_offset; 8472long alpha_auto_offset; 8473 8474/* Emit a new filename to a stream. */ 8475 8476void 8477alpha_output_filename (FILE *stream, const char *name) 8478{ 8479 static int first_time = TRUE; 8480 8481 if (first_time) 8482 { 8483 first_time = FALSE; 8484 ++num_source_filenames; 8485 current_function_file = name; 8486 fprintf (stream, "\t.file\t%d ", num_source_filenames); 8487 output_quoted_string (stream, name); 8488 fprintf (stream, "\n"); 8489 } 8490 8491 else if (name != current_function_file 8492 && strcmp (name, current_function_file) != 0) 8493 { 8494 ++num_source_filenames; 8495 current_function_file = name; 8496 fprintf (stream, "\t.file\t%d ", num_source_filenames); 8497 8498 output_quoted_string (stream, name); 8499 fprintf (stream, "\n"); 8500 } 8501} 8502 8503/* Structure to show the current status of registers and memory. */ 8504 8505struct shadow_summary 8506{ 8507 struct { 8508 unsigned int i : 31; /* Mask of int regs */ 8509 unsigned int fp : 31; /* Mask of fp regs */ 8510 unsigned int mem : 1; /* mem == imem | fpmem */ 8511 } used, defd; 8512}; 8513 8514/* Summary the effects of expression X on the machine. Update SUM, a pointer 8515 to the summary structure. SET is nonzero if the insn is setting the 8516 object, otherwise zero. */ 8517 8518static void 8519summarize_insn (rtx x, struct shadow_summary *sum, int set) 8520{ 8521 const char *format_ptr; 8522 int i, j; 8523 8524 if (x == 0) 8525 return; 8526 8527 switch (GET_CODE (x)) 8528 { 8529 /* ??? Note that this case would be incorrect if the Alpha had a 8530 ZERO_EXTRACT in SET_DEST. */ 8531 case SET: 8532 summarize_insn (SET_SRC (x), sum, 0); 8533 summarize_insn (SET_DEST (x), sum, 1); 8534 break; 8535 8536 case CLOBBER: 8537 summarize_insn (XEXP (x, 0), sum, 1); 8538 break; 8539 8540 case USE: 8541 summarize_insn (XEXP (x, 0), sum, 0); 8542 break; 8543 8544 case ASM_OPERANDS: 8545 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--) 8546 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0); 8547 break; 8548 8549 case PARALLEL: 8550 for (i = XVECLEN (x, 0) - 1; i >= 0; i--) 8551 summarize_insn (XVECEXP (x, 0, i), sum, 0); 8552 break; 8553 8554 case SUBREG: 8555 summarize_insn (SUBREG_REG (x), sum, 0); 8556 break; 8557 8558 case REG: 8559 { 8560 int regno = REGNO (x); 8561 unsigned long mask = ((unsigned long) 1) << (regno % 32); 8562 8563 if (regno == 31 || regno == 63) 8564 break; 8565 8566 if (set) 8567 { 8568 if (regno < 32) 8569 sum->defd.i |= mask; 8570 else 8571 sum->defd.fp |= mask; 8572 } 8573 else 8574 { 8575 if (regno < 32) 8576 sum->used.i |= mask; 8577 else 8578 sum->used.fp |= mask; 8579 } 8580 } 8581 break; 8582 8583 case MEM: 8584 if (set) 8585 sum->defd.mem = 1; 8586 else 8587 sum->used.mem = 1; 8588 8589 /* Find the regs used in memory address computation: */ 8590 summarize_insn (XEXP (x, 0), sum, 0); 8591 break; 8592 8593 case CONST_INT: case CONST_WIDE_INT: case CONST_DOUBLE: 8594 case SYMBOL_REF: case LABEL_REF: case CONST: 8595 case SCRATCH: case ASM_INPUT: 8596 break; 8597 8598 /* Handle common unary and binary ops for efficiency. */ 8599 case COMPARE: case PLUS: case MINUS: case MULT: case DIV: 8600 case MOD: case UDIV: case UMOD: case AND: case IOR: 8601 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: 8602 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: 8603 case NE: case EQ: case GE: case GT: case LE: 8604 case LT: case GEU: case GTU: case LEU: case LTU: 8605 summarize_insn (XEXP (x, 0), sum, 0); 8606 summarize_insn (XEXP (x, 1), sum, 0); 8607 break; 8608 8609 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: 8610 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: 8611 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: 8612 case SQRT: case FFS: 8613 summarize_insn (XEXP (x, 0), sum, 0); 8614 break; 8615 8616 default: 8617 format_ptr = GET_RTX_FORMAT (GET_CODE (x)); 8618 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 8619 switch (format_ptr[i]) 8620 { 8621 case 'e': 8622 summarize_insn (XEXP (x, i), sum, 0); 8623 break; 8624 8625 case 'E': 8626 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 8627 summarize_insn (XVECEXP (x, i, j), sum, 0); 8628 break; 8629 8630 case 'i': 8631 break; 8632 8633 default: 8634 gcc_unreachable (); 8635 } 8636 } 8637} 8638 8639/* Ensure a sufficient number of `trapb' insns are in the code when 8640 the user requests code with a trap precision of functions or 8641 instructions. 8642 8643 In naive mode, when the user requests a trap-precision of 8644 "instruction", a trapb is needed after every instruction that may 8645 generate a trap. This ensures that the code is resumption safe but 8646 it is also slow. 8647 8648 When optimizations are turned on, we delay issuing a trapb as long 8649 as possible. In this context, a trap shadow is the sequence of 8650 instructions that starts with a (potentially) trap generating 8651 instruction and extends to the next trapb or call_pal instruction 8652 (but GCC never generates call_pal by itself). We can delay (and 8653 therefore sometimes omit) a trapb subject to the following 8654 conditions: 8655 8656 (a) On entry to the trap shadow, if any Alpha register or memory 8657 location contains a value that is used as an operand value by some 8658 instruction in the trap shadow (live on entry), then no instruction 8659 in the trap shadow may modify the register or memory location. 8660 8661 (b) Within the trap shadow, the computation of the base register 8662 for a memory load or store instruction may not involve using the 8663 result of an instruction that might generate an UNPREDICTABLE 8664 result. 8665 8666 (c) Within the trap shadow, no register may be used more than once 8667 as a destination register. (This is to make life easier for the 8668 trap-handler.) 8669 8670 (d) The trap shadow may not include any branch instructions. */ 8671 8672static void 8673alpha_handle_trap_shadows (void) 8674{ 8675 struct shadow_summary shadow; 8676 int trap_pending, exception_nesting; 8677 rtx_insn *i, *n; 8678 8679 trap_pending = 0; 8680 exception_nesting = 0; 8681 shadow.used.i = 0; 8682 shadow.used.fp = 0; 8683 shadow.used.mem = 0; 8684 shadow.defd = shadow.used; 8685 8686 for (i = get_insns (); i ; i = NEXT_INSN (i)) 8687 { 8688 if (NOTE_P (i)) 8689 { 8690 switch (NOTE_KIND (i)) 8691 { 8692 case NOTE_INSN_EH_REGION_BEG: 8693 exception_nesting++; 8694 if (trap_pending) 8695 goto close_shadow; 8696 break; 8697 8698 case NOTE_INSN_EH_REGION_END: 8699 exception_nesting--; 8700 if (trap_pending) 8701 goto close_shadow; 8702 break; 8703 8704 case NOTE_INSN_EPILOGUE_BEG: 8705 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC) 8706 goto close_shadow; 8707 break; 8708 } 8709 } 8710 else if (trap_pending) 8711 { 8712 if (alpha_tp == ALPHA_TP_FUNC) 8713 { 8714 if (JUMP_P (i) 8715 && GET_CODE (PATTERN (i)) == RETURN) 8716 goto close_shadow; 8717 } 8718 else if (alpha_tp == ALPHA_TP_INSN) 8719 { 8720 if (optimize > 0) 8721 { 8722 struct shadow_summary sum; 8723 8724 sum.used.i = 0; 8725 sum.used.fp = 0; 8726 sum.used.mem = 0; 8727 sum.defd = sum.used; 8728 8729 switch (GET_CODE (i)) 8730 { 8731 case INSN: 8732 /* Annoyingly, get_attr_trap will die on these. */ 8733 if (GET_CODE (PATTERN (i)) == USE 8734 || GET_CODE (PATTERN (i)) == CLOBBER) 8735 break; 8736 8737 summarize_insn (PATTERN (i), &sum, 0); 8738 8739 if ((sum.defd.i & shadow.defd.i) 8740 || (sum.defd.fp & shadow.defd.fp)) 8741 { 8742 /* (c) would be violated */ 8743 goto close_shadow; 8744 } 8745 8746 /* Combine shadow with summary of current insn: */ 8747 shadow.used.i |= sum.used.i; 8748 shadow.used.fp |= sum.used.fp; 8749 shadow.used.mem |= sum.used.mem; 8750 shadow.defd.i |= sum.defd.i; 8751 shadow.defd.fp |= sum.defd.fp; 8752 shadow.defd.mem |= sum.defd.mem; 8753 8754 if ((sum.defd.i & shadow.used.i) 8755 || (sum.defd.fp & shadow.used.fp) 8756 || (sum.defd.mem & shadow.used.mem)) 8757 { 8758 /* (a) would be violated (also takes care of (b)) */ 8759 gcc_assert (get_attr_trap (i) != TRAP_YES 8760 || (!(sum.defd.i & sum.used.i) 8761 && !(sum.defd.fp & sum.used.fp))); 8762 8763 goto close_shadow; 8764 } 8765 break; 8766 8767 case BARRIER: 8768 /* __builtin_unreachable can expand to no code at all, 8769 leaving (barrier) RTXes in the instruction stream. */ 8770 goto close_shadow_notrapb; 8771 8772 case JUMP_INSN: 8773 case CALL_INSN: 8774 case CODE_LABEL: 8775 goto close_shadow; 8776 8777 case DEBUG_INSN: 8778 break; 8779 8780 default: 8781 gcc_unreachable (); 8782 } 8783 } 8784 else 8785 { 8786 close_shadow: 8787 n = emit_insn_before (gen_trapb (), i); 8788 PUT_MODE (n, TImode); 8789 PUT_MODE (i, TImode); 8790 close_shadow_notrapb: 8791 trap_pending = 0; 8792 shadow.used.i = 0; 8793 shadow.used.fp = 0; 8794 shadow.used.mem = 0; 8795 shadow.defd = shadow.used; 8796 } 8797 } 8798 } 8799 8800 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC) 8801 && NONJUMP_INSN_P (i) 8802 && GET_CODE (PATTERN (i)) != USE 8803 && GET_CODE (PATTERN (i)) != CLOBBER 8804 && get_attr_trap (i) == TRAP_YES) 8805 { 8806 if (optimize && !trap_pending) 8807 summarize_insn (PATTERN (i), &shadow, 0); 8808 trap_pending = 1; 8809 } 8810 } 8811} 8812 8813/* Alpha can only issue instruction groups simultaneously if they are 8814 suitably aligned. This is very processor-specific. */ 8815/* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe 8816 that are marked "fake". These instructions do not exist on that target, 8817 but it is possible to see these insns with deranged combinations of 8818 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting, 8819 choose a result at random. */ 8820 8821enum alphaev4_pipe { 8822 EV4_STOP = 0, 8823 EV4_IB0 = 1, 8824 EV4_IB1 = 2, 8825 EV4_IBX = 4 8826}; 8827 8828enum alphaev5_pipe { 8829 EV5_STOP = 0, 8830 EV5_NONE = 1, 8831 EV5_E01 = 2, 8832 EV5_E0 = 4, 8833 EV5_E1 = 8, 8834 EV5_FAM = 16, 8835 EV5_FA = 32, 8836 EV5_FM = 64 8837}; 8838 8839static enum alphaev4_pipe 8840alphaev4_insn_pipe (rtx_insn *insn) 8841{ 8842 if (recog_memoized (insn) < 0) 8843 return EV4_STOP; 8844 if (get_attr_length (insn) != 4) 8845 return EV4_STOP; 8846 8847 switch (get_attr_type (insn)) 8848 { 8849 case TYPE_ILD: 8850 case TYPE_LDSYM: 8851 case TYPE_FLD: 8852 case TYPE_LD_L: 8853 return EV4_IBX; 8854 8855 case TYPE_IADD: 8856 case TYPE_ILOG: 8857 case TYPE_ICMOV: 8858 case TYPE_ICMP: 8859 case TYPE_FST: 8860 case TYPE_SHIFT: 8861 case TYPE_IMUL: 8862 case TYPE_FBR: 8863 case TYPE_MVI: /* fake */ 8864 return EV4_IB0; 8865 8866 case TYPE_IST: 8867 case TYPE_MISC: 8868 case TYPE_IBR: 8869 case TYPE_JSR: 8870 case TYPE_CALLPAL: 8871 case TYPE_FCPYS: 8872 case TYPE_FCMOV: 8873 case TYPE_FADD: 8874 case TYPE_FDIV: 8875 case TYPE_FMUL: 8876 case TYPE_ST_C: 8877 case TYPE_MB: 8878 case TYPE_FSQRT: /* fake */ 8879 case TYPE_FTOI: /* fake */ 8880 case TYPE_ITOF: /* fake */ 8881 return EV4_IB1; 8882 8883 default: 8884 gcc_unreachable (); 8885 } 8886} 8887 8888static enum alphaev5_pipe 8889alphaev5_insn_pipe (rtx_insn *insn) 8890{ 8891 if (recog_memoized (insn) < 0) 8892 return EV5_STOP; 8893 if (get_attr_length (insn) != 4) 8894 return EV5_STOP; 8895 8896 switch (get_attr_type (insn)) 8897 { 8898 case TYPE_ILD: 8899 case TYPE_FLD: 8900 case TYPE_LDSYM: 8901 case TYPE_IADD: 8902 case TYPE_ILOG: 8903 case TYPE_ICMOV: 8904 case TYPE_ICMP: 8905 return EV5_E01; 8906 8907 case TYPE_IST: 8908 case TYPE_FST: 8909 case TYPE_SHIFT: 8910 case TYPE_IMUL: 8911 case TYPE_MISC: 8912 case TYPE_MVI: 8913 case TYPE_LD_L: 8914 case TYPE_ST_C: 8915 case TYPE_MB: 8916 case TYPE_FTOI: /* fake */ 8917 case TYPE_ITOF: /* fake */ 8918 return EV5_E0; 8919 8920 case TYPE_IBR: 8921 case TYPE_JSR: 8922 case TYPE_CALLPAL: 8923 return EV5_E1; 8924 8925 case TYPE_FCPYS: 8926 return EV5_FAM; 8927 8928 case TYPE_FBR: 8929 case TYPE_FCMOV: 8930 case TYPE_FADD: 8931 case TYPE_FDIV: 8932 case TYPE_FSQRT: /* fake */ 8933 return EV5_FA; 8934 8935 case TYPE_FMUL: 8936 return EV5_FM; 8937 8938 default: 8939 gcc_unreachable (); 8940 } 8941} 8942 8943/* IN_USE is a mask of the slots currently filled within the insn group. 8944 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then 8945 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1. 8946 8947 LEN is, of course, the length of the group in bytes. */ 8948 8949static rtx_insn * 8950alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen) 8951{ 8952 int len, in_use; 8953 8954 len = in_use = 0; 8955 8956 if (! INSN_P (insn) 8957 || GET_CODE (PATTERN (insn)) == CLOBBER 8958 || GET_CODE (PATTERN (insn)) == USE) 8959 goto next_and_done; 8960 8961 while (1) 8962 { 8963 enum alphaev4_pipe pipe; 8964 8965 pipe = alphaev4_insn_pipe (insn); 8966 switch (pipe) 8967 { 8968 case EV4_STOP: 8969 /* Force complex instructions to start new groups. */ 8970 if (in_use) 8971 goto done; 8972 8973 /* If this is a completely unrecognized insn, it's an asm. 8974 We don't know how long it is, so record length as -1 to 8975 signal a needed realignment. */ 8976 if (recog_memoized (insn) < 0) 8977 len = -1; 8978 else 8979 len = get_attr_length (insn); 8980 goto next_and_done; 8981 8982 case EV4_IBX: 8983 if (in_use & EV4_IB0) 8984 { 8985 if (in_use & EV4_IB1) 8986 goto done; 8987 in_use |= EV4_IB1; 8988 } 8989 else 8990 in_use |= EV4_IB0 | EV4_IBX; 8991 break; 8992 8993 case EV4_IB0: 8994 if (in_use & EV4_IB0) 8995 { 8996 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1)) 8997 goto done; 8998 in_use |= EV4_IB1; 8999 } 9000 in_use |= EV4_IB0; 9001 break; 9002 9003 case EV4_IB1: 9004 if (in_use & EV4_IB1) 9005 goto done; 9006 in_use |= EV4_IB1; 9007 break; 9008 9009 default: 9010 gcc_unreachable (); 9011 } 9012 len += 4; 9013 9014 /* Haifa doesn't do well scheduling branches. */ 9015 if (JUMP_P (insn)) 9016 goto next_and_done; 9017 9018 next: 9019 insn = next_nonnote_insn (insn); 9020 9021 if (!insn || ! INSN_P (insn)) 9022 goto done; 9023 9024 /* Let Haifa tell us where it thinks insn group boundaries are. */ 9025 if (GET_MODE (insn) == TImode) 9026 goto done; 9027 9028 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE) 9029 goto next; 9030 } 9031 9032 next_and_done: 9033 insn = next_nonnote_insn (insn); 9034 9035 done: 9036 *plen = len; 9037 *pin_use = in_use; 9038 return insn; 9039} 9040 9041/* IN_USE is a mask of the slots currently filled within the insn group. 9042 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then 9043 the insn in EV5_E0 can be swapped by the hardware into EV5_E1. 9044 9045 LEN is, of course, the length of the group in bytes. */ 9046 9047static rtx_insn * 9048alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen) 9049{ 9050 int len, in_use; 9051 9052 len = in_use = 0; 9053 9054 if (! INSN_P (insn) 9055 || GET_CODE (PATTERN (insn)) == CLOBBER 9056 || GET_CODE (PATTERN (insn)) == USE) 9057 goto next_and_done; 9058 9059 while (1) 9060 { 9061 enum alphaev5_pipe pipe; 9062 9063 pipe = alphaev5_insn_pipe (insn); 9064 switch (pipe) 9065 { 9066 case EV5_STOP: 9067 /* Force complex instructions to start new groups. */ 9068 if (in_use) 9069 goto done; 9070 9071 /* If this is a completely unrecognized insn, it's an asm. 9072 We don't know how long it is, so record length as -1 to 9073 signal a needed realignment. */ 9074 if (recog_memoized (insn) < 0) 9075 len = -1; 9076 else 9077 len = get_attr_length (insn); 9078 goto next_and_done; 9079 9080 /* ??? Most of the places below, we would like to assert never 9081 happen, as it would indicate an error either in Haifa, or 9082 in the scheduling description. Unfortunately, Haifa never 9083 schedules the last instruction of the BB, so we don't have 9084 an accurate TI bit to go off. */ 9085 case EV5_E01: 9086 if (in_use & EV5_E0) 9087 { 9088 if (in_use & EV5_E1) 9089 goto done; 9090 in_use |= EV5_E1; 9091 } 9092 else 9093 in_use |= EV5_E0 | EV5_E01; 9094 break; 9095 9096 case EV5_E0: 9097 if (in_use & EV5_E0) 9098 { 9099 if (!(in_use & EV5_E01) || (in_use & EV5_E1)) 9100 goto done; 9101 in_use |= EV5_E1; 9102 } 9103 in_use |= EV5_E0; 9104 break; 9105 9106 case EV5_E1: 9107 if (in_use & EV5_E1) 9108 goto done; 9109 in_use |= EV5_E1; 9110 break; 9111 9112 case EV5_FAM: 9113 if (in_use & EV5_FA) 9114 { 9115 if (in_use & EV5_FM) 9116 goto done; 9117 in_use |= EV5_FM; 9118 } 9119 else 9120 in_use |= EV5_FA | EV5_FAM; 9121 break; 9122 9123 case EV5_FA: 9124 if (in_use & EV5_FA) 9125 goto done; 9126 in_use |= EV5_FA; 9127 break; 9128 9129 case EV5_FM: 9130 if (in_use & EV5_FM) 9131 goto done; 9132 in_use |= EV5_FM; 9133 break; 9134 9135 case EV5_NONE: 9136 break; 9137 9138 default: 9139 gcc_unreachable (); 9140 } 9141 len += 4; 9142 9143 /* Haifa doesn't do well scheduling branches. */ 9144 /* ??? If this is predicted not-taken, slotting continues, except 9145 that no more IBR, FBR, or JSR insns may be slotted. */ 9146 if (JUMP_P (insn)) 9147 goto next_and_done; 9148 9149 next: 9150 insn = next_nonnote_insn (insn); 9151 9152 if (!insn || ! INSN_P (insn)) 9153 goto done; 9154 9155 /* Let Haifa tell us where it thinks insn group boundaries are. */ 9156 if (GET_MODE (insn) == TImode) 9157 goto done; 9158 9159 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE) 9160 goto next; 9161 } 9162 9163 next_and_done: 9164 insn = next_nonnote_insn (insn); 9165 9166 done: 9167 *plen = len; 9168 *pin_use = in_use; 9169 return insn; 9170} 9171 9172static rtx 9173alphaev4_next_nop (int *pin_use) 9174{ 9175 int in_use = *pin_use; 9176 rtx nop; 9177 9178 if (!(in_use & EV4_IB0)) 9179 { 9180 in_use |= EV4_IB0; 9181 nop = gen_nop (); 9182 } 9183 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX) 9184 { 9185 in_use |= EV4_IB1; 9186 nop = gen_nop (); 9187 } 9188 else if (TARGET_FP && !(in_use & EV4_IB1)) 9189 { 9190 in_use |= EV4_IB1; 9191 nop = gen_fnop (); 9192 } 9193 else 9194 nop = gen_unop (); 9195 9196 *pin_use = in_use; 9197 return nop; 9198} 9199 9200static rtx 9201alphaev5_next_nop (int *pin_use) 9202{ 9203 int in_use = *pin_use; 9204 rtx nop; 9205 9206 if (!(in_use & EV5_E1)) 9207 { 9208 in_use |= EV5_E1; 9209 nop = gen_nop (); 9210 } 9211 else if (TARGET_FP && !(in_use & EV5_FA)) 9212 { 9213 in_use |= EV5_FA; 9214 nop = gen_fnop (); 9215 } 9216 else if (TARGET_FP && !(in_use & EV5_FM)) 9217 { 9218 in_use |= EV5_FM; 9219 nop = gen_fnop (); 9220 } 9221 else 9222 nop = gen_unop (); 9223 9224 *pin_use = in_use; 9225 return nop; 9226} 9227 9228/* The instruction group alignment main loop. */ 9229 9230static void 9231alpha_align_insns_1 (unsigned int max_align, 9232 rtx_insn *(*next_group) (rtx_insn *, int *, int *), 9233 rtx (*next_nop) (int *)) 9234{ 9235 /* ALIGN is the known alignment for the insn group. */ 9236 unsigned int align; 9237 /* OFS is the offset of the current insn in the insn group. */ 9238 int ofs; 9239 int prev_in_use, in_use, len, ldgp; 9240 rtx_insn *i, *next; 9241 9242 /* Let shorten branches care for assigning alignments to code labels. */ 9243 shorten_branches (get_insns ()); 9244 9245 unsigned int option_alignment = align_functions.levels[0].get_value (); 9246 if (option_alignment < 4) 9247 align = 4; 9248 else if ((unsigned int) option_alignment < max_align) 9249 align = option_alignment; 9250 else 9251 align = max_align; 9252 9253 ofs = prev_in_use = 0; 9254 i = get_insns (); 9255 if (NOTE_P (i)) 9256 i = next_nonnote_insn (i); 9257 9258 ldgp = alpha_function_needs_gp ? 8 : 0; 9259 9260 while (i) 9261 { 9262 next = (*next_group) (i, &in_use, &len); 9263 9264 /* When we see a label, resync alignment etc. */ 9265 if (LABEL_P (i)) 9266 { 9267 unsigned int new_align 9268 = label_to_alignment (i).levels[0].get_value (); 9269 9270 if (new_align >= align) 9271 { 9272 align = new_align < max_align ? new_align : max_align; 9273 ofs = 0; 9274 } 9275 9276 else if (ofs & (new_align-1)) 9277 ofs = (ofs | (new_align-1)) + 1; 9278 gcc_assert (!len); 9279 } 9280 9281 /* Handle complex instructions special. */ 9282 else if (in_use == 0) 9283 { 9284 /* Asms will have length < 0. This is a signal that we have 9285 lost alignment knowledge. Assume, however, that the asm 9286 will not mis-align instructions. */ 9287 if (len < 0) 9288 { 9289 ofs = 0; 9290 align = 4; 9291 len = 0; 9292 } 9293 } 9294 9295 /* If the known alignment is smaller than the recognized insn group, 9296 realign the output. */ 9297 else if ((int) align < len) 9298 { 9299 unsigned int new_log_align = len > 8 ? 4 : 3; 9300 rtx_insn *prev, *where; 9301 9302 where = prev = prev_nonnote_insn (i); 9303 if (!where || !LABEL_P (where)) 9304 where = i; 9305 9306 /* Can't realign between a call and its gp reload. */ 9307 if (! (TARGET_EXPLICIT_RELOCS 9308 && prev && CALL_P (prev))) 9309 { 9310 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where); 9311 align = 1 << new_log_align; 9312 ofs = 0; 9313 } 9314 } 9315 9316 /* We may not insert padding inside the initial ldgp sequence. */ 9317 else if (ldgp > 0) 9318 ldgp -= len; 9319 9320 /* If the group won't fit in the same INT16 as the previous, 9321 we need to add padding to keep the group together. Rather 9322 than simply leaving the insn filling to the assembler, we 9323 can make use of the knowledge of what sorts of instructions 9324 were issued in the previous group to make sure that all of 9325 the added nops are really free. */ 9326 else if (ofs + len > (int) align) 9327 { 9328 int nop_count = (align - ofs) / 4; 9329 rtx_insn *where; 9330 9331 /* Insert nops before labels, branches, and calls to truly merge 9332 the execution of the nops with the previous instruction group. */ 9333 where = prev_nonnote_insn (i); 9334 if (where) 9335 { 9336 if (LABEL_P (where)) 9337 { 9338 rtx_insn *where2 = prev_nonnote_insn (where); 9339 if (where2 && JUMP_P (where2)) 9340 where = where2; 9341 } 9342 else if (NONJUMP_INSN_P (where)) 9343 where = i; 9344 } 9345 else 9346 where = i; 9347 9348 do 9349 emit_insn_before ((*next_nop)(&prev_in_use), where); 9350 while (--nop_count); 9351 ofs = 0; 9352 } 9353 9354 ofs = (ofs + len) & (align - 1); 9355 prev_in_use = in_use; 9356 i = next; 9357 } 9358} 9359 9360static void 9361alpha_align_insns (void) 9362{ 9363 if (alpha_tune == PROCESSOR_EV4) 9364 alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop); 9365 else if (alpha_tune == PROCESSOR_EV5) 9366 alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop); 9367 else 9368 gcc_unreachable (); 9369} 9370 9371/* Insert an unop between sibcall or noreturn function call and GP load. */ 9372 9373static void 9374alpha_pad_function_end (void) 9375{ 9376 rtx_insn *insn, *next; 9377 9378 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 9379 { 9380 if (!CALL_P (insn) 9381 || !(SIBLING_CALL_P (insn) 9382 || find_reg_note (insn, REG_NORETURN, NULL_RTX))) 9383 continue; 9384 9385 next = next_active_insn (insn); 9386 if (next) 9387 { 9388 rtx pat = PATTERN (next); 9389 9390 if (GET_CODE (pat) == SET 9391 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE 9392 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1) 9393 emit_insn_after (gen_unop (), insn); 9394 } 9395 } 9396} 9397 9398/* Machine dependent reorg pass. */ 9399 9400static void 9401alpha_reorg (void) 9402{ 9403 /* Workaround for a linker error that triggers when an exception 9404 handler immediatelly follows a sibcall or a noreturn function. 9405 9406In the sibcall case: 9407 9408 The instruction stream from an object file: 9409 9410 1d8: 00 00 fb 6b jmp (t12) 9411 1dc: 00 00 ba 27 ldah gp,0(ra) 9412 1e0: 00 00 bd 23 lda gp,0(gp) 9413 1e4: 00 00 7d a7 ldq t12,0(gp) 9414 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec> 9415 9416 was converted in the final link pass to: 9417 9418 12003aa88: 67 fa ff c3 br 120039428 <...> 9419 12003aa8c: 00 00 fe 2f unop 9420 12003aa90: 00 00 fe 2f unop 9421 12003aa94: 48 83 7d a7 ldq t12,-31928(gp) 9422 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec> 9423 9424And in the noreturn case: 9425 9426 The instruction stream from an object file: 9427 9428 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58> 9429 58: 00 00 ba 27 ldah gp,0(ra) 9430 5c: 00 00 bd 23 lda gp,0(gp) 9431 60: 00 00 7d a7 ldq t12,0(gp) 9432 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68> 9433 9434 was converted in the final link pass to: 9435 9436 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8> 9437 fdb28: 00 00 fe 2f unop 9438 fdb2c: 00 00 fe 2f unop 9439 fdb30: 30 82 7d a7 ldq t12,-32208(gp) 9440 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68> 9441 9442 GP load instructions were wrongly cleared by the linker relaxation 9443 pass. This workaround prevents removal of GP loads by inserting 9444 an unop instruction between a sibcall or noreturn function call and 9445 exception handler prologue. */ 9446 9447 if (current_function_has_exception_handlers ()) 9448 alpha_pad_function_end (); 9449 9450 /* CALL_PAL that implements trap insn, updates program counter to point 9451 after the insn. In case trap is the last insn in the function, 9452 emit NOP to guarantee that PC remains inside function boundaries. 9453 This workaround is needed to get reliable backtraces. */ 9454 9455 rtx_insn *insn = prev_active_insn (get_last_insn ()); 9456 9457 if (insn && NONJUMP_INSN_P (insn)) 9458 { 9459 rtx pat = PATTERN (insn); 9460 if (GET_CODE (pat) == PARALLEL) 9461 { 9462 rtx vec = XVECEXP (pat, 0, 0); 9463 if (GET_CODE (vec) == TRAP_IF 9464 && XEXP (vec, 0) == const1_rtx) 9465 emit_insn_after (gen_unop (), insn); 9466 } 9467 } 9468} 9469 9470static void 9471alpha_file_start (void) 9472{ 9473 default_file_start (); 9474 9475 fputs ("\t.set noreorder\n", asm_out_file); 9476 fputs ("\t.set volatile\n", asm_out_file); 9477 if (TARGET_ABI_OSF) 9478 fputs ("\t.set noat\n", asm_out_file); 9479 if (TARGET_EXPLICIT_RELOCS) 9480 fputs ("\t.set nomacro\n", asm_out_file); 9481 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX) 9482 { 9483 const char *arch; 9484 9485 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX) 9486 arch = "ev6"; 9487 else if (TARGET_MAX) 9488 arch = "pca56"; 9489 else if (TARGET_BWX) 9490 arch = "ev56"; 9491 else if (alpha_cpu == PROCESSOR_EV5) 9492 arch = "ev5"; 9493 else 9494 arch = "ev4"; 9495 9496 fprintf (asm_out_file, "\t.arch %s\n", arch); 9497 } 9498} 9499 9500/* Since we don't have a .dynbss section, we should not allow global 9501 relocations in the .rodata section. */ 9502 9503static int 9504alpha_elf_reloc_rw_mask (void) 9505{ 9506 return flag_pic ? 3 : 2; 9507} 9508 9509/* Return a section for X. The only special thing we do here is to 9510 honor small data. */ 9511 9512static section * 9513alpha_elf_select_rtx_section (machine_mode mode, rtx x, 9514 unsigned HOST_WIDE_INT align) 9515{ 9516 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value) 9517 /* ??? Consider using mergeable sdata sections. */ 9518 return sdata_section; 9519 else 9520 return default_elf_select_rtx_section (mode, x, align); 9521} 9522 9523static unsigned int 9524alpha_elf_section_type_flags (tree decl, const char *name, int reloc) 9525{ 9526 unsigned int flags = 0; 9527 9528 if (strcmp (name, ".sdata") == 0 9529 || startswith (name, ".sdata.") 9530 || startswith (name, ".gnu.linkonce.s.") 9531 || strcmp (name, ".sbss") == 0 9532 || startswith (name, ".sbss.") 9533 || startswith (name, ".gnu.linkonce.sb.")) 9534 flags = SECTION_SMALL; 9535 9536 flags |= default_section_type_flags (decl, name, reloc); 9537 return flags; 9538} 9539 9540/* Structure to collect function names for final output in link section. */ 9541/* Note that items marked with GTY can't be ifdef'ed out. */ 9542 9543enum reloc_kind 9544{ 9545 KIND_LINKAGE, 9546 KIND_CODEADDR 9547}; 9548 9549struct GTY(()) alpha_links 9550{ 9551 rtx func; 9552 rtx linkage; 9553 enum reloc_kind rkind; 9554}; 9555 9556#if TARGET_ABI_OPEN_VMS 9557 9558/* Return the VMS argument type corresponding to MODE. */ 9559 9560enum avms_arg_type 9561alpha_arg_type (machine_mode mode) 9562{ 9563 switch (mode) 9564 { 9565 case E_SFmode: 9566 return TARGET_FLOAT_VAX ? FF : FS; 9567 case E_DFmode: 9568 return TARGET_FLOAT_VAX ? FD : FT; 9569 default: 9570 return I64; 9571 } 9572} 9573 9574/* Return an rtx for an integer representing the VMS Argument Information 9575 register value. */ 9576 9577rtx 9578alpha_arg_info_reg_val (CUMULATIVE_ARGS cum) 9579{ 9580 unsigned HOST_WIDE_INT regval = cum.num_args; 9581 int i; 9582 9583 for (i = 0; i < 6; i++) 9584 regval |= ((int) cum.atypes[i]) << (i * 3 + 8); 9585 9586 return GEN_INT (regval); 9587} 9588 9589 9590/* Return a SYMBOL_REF representing the reference to the .linkage entry 9591 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if 9592 this is the reference to the linkage pointer value, 0 if this is the 9593 reference to the function entry value. RFLAG is 1 if this a reduced 9594 reference (code address only), 0 if this is a full reference. */ 9595 9596rtx 9597alpha_use_linkage (rtx func, bool lflag, bool rflag) 9598{ 9599 struct alpha_links *al = NULL; 9600 const char *name = XSTR (func, 0); 9601 9602 if (cfun->machine->links) 9603 { 9604 /* Is this name already defined? */ 9605 alpha_links **slot = cfun->machine->links->get (name); 9606 if (slot) 9607 al = *slot; 9608 } 9609 else 9610 cfun->machine->links 9611 = hash_map<nofree_string_hash, alpha_links *>::create_ggc (64); 9612 9613 if (al == NULL) 9614 { 9615 size_t buf_len; 9616 char *linksym; 9617 tree id; 9618 9619 if (name[0] == '*') 9620 name++; 9621 9622 /* Follow transparent alias, as this is used for CRTL translations. */ 9623 id = maybe_get_identifier (name); 9624 if (id) 9625 { 9626 while (IDENTIFIER_TRANSPARENT_ALIAS (id)) 9627 id = TREE_CHAIN (id); 9628 name = IDENTIFIER_POINTER (id); 9629 } 9630 9631 buf_len = strlen (name) + 8 + 9; 9632 linksym = (char *) alloca (buf_len); 9633 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name); 9634 9635 al = ggc_alloc<alpha_links> (); 9636 al->func = func; 9637 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym)); 9638 9639 cfun->machine->links->put (ggc_strdup (name), al); 9640 } 9641 9642 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE; 9643 9644 if (lflag) 9645 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8)); 9646 else 9647 return al->linkage; 9648} 9649 9650static int 9651alpha_write_one_linkage (const char *name, alpha_links *link, FILE *stream) 9652{ 9653 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0)); 9654 if (link->rkind == KIND_CODEADDR) 9655 { 9656 /* External and used, request code address. */ 9657 fprintf (stream, "\t.code_address "); 9658 } 9659 else 9660 { 9661 if (!SYMBOL_REF_EXTERNAL_P (link->func) 9662 && SYMBOL_REF_LOCAL_P (link->func)) 9663 { 9664 /* Locally defined, build linkage pair. */ 9665 fprintf (stream, "\t.quad %s..en\n", name); 9666 fprintf (stream, "\t.quad "); 9667 } 9668 else 9669 { 9670 /* External, request linkage pair. */ 9671 fprintf (stream, "\t.linkage "); 9672 } 9673 } 9674 assemble_name (stream, name); 9675 fputs ("\n", stream); 9676 9677 return 0; 9678} 9679 9680static void 9681alpha_write_linkage (FILE *stream, const char *funname) 9682{ 9683 fprintf (stream, "\t.link\n"); 9684 fprintf (stream, "\t.align 3\n"); 9685 in_section = NULL; 9686 9687#ifdef TARGET_VMS_CRASH_DEBUG 9688 fputs ("\t.name ", stream); 9689 assemble_name (stream, funname); 9690 fputs ("..na\n", stream); 9691#endif 9692 9693 ASM_OUTPUT_LABEL (stream, funname); 9694 fprintf (stream, "\t.pdesc "); 9695 assemble_name (stream, funname); 9696 fprintf (stream, "..en,%s\n", 9697 alpha_procedure_type == PT_STACK ? "stack" 9698 : alpha_procedure_type == PT_REGISTER ? "reg" : "null"); 9699 9700 if (cfun->machine->links) 9701 { 9702 hash_map<nofree_string_hash, alpha_links *>::iterator iter 9703 = cfun->machine->links->begin (); 9704 for (; iter != cfun->machine->links->end (); ++iter) 9705 alpha_write_one_linkage ((*iter).first, (*iter).second, stream); 9706 } 9707} 9708 9709/* Switch to an arbitrary section NAME with attributes as specified 9710 by FLAGS. ALIGN specifies any known alignment requirements for 9711 the section; 0 if the default should be used. */ 9712 9713static void 9714vms_asm_named_section (const char *name, unsigned int flags, 9715 tree decl ATTRIBUTE_UNUSED) 9716{ 9717 fputc ('\n', asm_out_file); 9718 fprintf (asm_out_file, ".section\t%s", name); 9719 9720 if (flags & SECTION_DEBUG) 9721 fprintf (asm_out_file, ",NOWRT"); 9722 9723 fputc ('\n', asm_out_file); 9724} 9725 9726/* Record an element in the table of global constructors. SYMBOL is 9727 a SYMBOL_REF of the function to be called; PRIORITY is a number 9728 between 0 and MAX_INIT_PRIORITY. 9729 9730 Differs from default_ctors_section_asm_out_constructor in that the 9731 width of the .ctors entry is always 64 bits, rather than the 32 bits 9732 used by a normal pointer. */ 9733 9734static void 9735vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED) 9736{ 9737 switch_to_section (ctors_section); 9738 assemble_align (BITS_PER_WORD); 9739 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1); 9740} 9741 9742static void 9743vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED) 9744{ 9745 switch_to_section (dtors_section); 9746 assemble_align (BITS_PER_WORD); 9747 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1); 9748} 9749#else 9750rtx 9751alpha_use_linkage (rtx func ATTRIBUTE_UNUSED, 9752 bool lflag ATTRIBUTE_UNUSED, 9753 bool rflag ATTRIBUTE_UNUSED) 9754{ 9755 return NULL_RTX; 9756} 9757 9758#endif /* TARGET_ABI_OPEN_VMS */ 9759 9760static void 9761alpha_init_libfuncs (void) 9762{ 9763 if (TARGET_ABI_OPEN_VMS) 9764 { 9765 /* Use the VMS runtime library functions for division and 9766 remainder. */ 9767 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I"); 9768 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L"); 9769 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI"); 9770 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL"); 9771 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I"); 9772 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L"); 9773 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI"); 9774 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL"); 9775#ifdef MEM_LIBFUNCS_INIT 9776 MEM_LIBFUNCS_INIT; 9777#endif 9778 } 9779} 9780 9781/* On the Alpha, we use this to disable the floating-point registers 9782 when they don't exist. */ 9783 9784static void 9785alpha_conditional_register_usage (void) 9786{ 9787 int i; 9788 if (! TARGET_FPREGS) 9789 for (i = 32; i < 63; i++) 9790 fixed_regs[i] = call_used_regs[i] = 1; 9791} 9792 9793/* Canonicalize a comparison from one we don't have to one we do have. */ 9794 9795static void 9796alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1, 9797 bool op0_preserve_value) 9798{ 9799 if (!op0_preserve_value 9800 && (*code == GE || *code == GT || *code == GEU || *code == GTU) 9801 && (REG_P (*op1) || *op1 == const0_rtx)) 9802 { 9803 std::swap (*op0, *op1); 9804 *code = (int)swap_condition ((enum rtx_code)*code); 9805 } 9806 9807 if ((*code == LT || *code == LTU) 9808 && CONST_INT_P (*op1) && INTVAL (*op1) == 256) 9809 { 9810 *code = *code == LT ? LE : LEU; 9811 *op1 = GEN_INT (255); 9812 } 9813} 9814 9815/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ 9816 9817static void 9818alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 9819{ 9820 const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17); 9821 9822 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv; 9823 tree new_fenv_var, reload_fenv, restore_fnenv; 9824 tree update_call, atomic_feraiseexcept, hold_fnclex; 9825 9826 /* Assume OSF/1 compatible interfaces. */ 9827 if (!TARGET_ABI_OSF) 9828 return; 9829 9830 /* Generate the equivalent of : 9831 unsigned long fenv_var; 9832 fenv_var = __ieee_get_fp_control (); 9833 9834 unsigned long masked_fenv; 9835 masked_fenv = fenv_var & mask; 9836 9837 __ieee_set_fp_control (masked_fenv); */ 9838 9839 fenv_var = create_tmp_var_raw (long_unsigned_type_node); 9840 get_fpscr 9841 = build_fn_decl ("__ieee_get_fp_control", 9842 build_function_type_list (long_unsigned_type_node, NULL)); 9843 set_fpscr 9844 = build_fn_decl ("__ieee_set_fp_control", 9845 build_function_type_list (void_type_node, NULL)); 9846 mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK); 9847 ld_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, fenv_var, 9848 build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE); 9849 masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask); 9850 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv); 9851 *hold = build2 (COMPOUND_EXPR, void_type_node, 9852 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), 9853 hold_fnclex); 9854 9855 /* Store the value of masked_fenv to clear the exceptions: 9856 __ieee_set_fp_control (masked_fenv); */ 9857 9858 *clear = build_call_expr (set_fpscr, 1, masked_fenv); 9859 9860 /* Generate the equivalent of : 9861 unsigned long new_fenv_var; 9862 new_fenv_var = __ieee_get_fp_control (); 9863 9864 __ieee_set_fp_control (fenv_var); 9865 9866 __atomic_feraiseexcept (new_fenv_var); */ 9867 9868 new_fenv_var = create_tmp_var_raw (long_unsigned_type_node); 9869 reload_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, new_fenv_var, 9870 build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE); 9871 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var); 9872 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 9873 update_call 9874 = build_call_expr (atomic_feraiseexcept, 1, 9875 fold_convert (integer_type_node, new_fenv_var)); 9876 *update = build2 (COMPOUND_EXPR, void_type_node, 9877 build2 (COMPOUND_EXPR, void_type_node, 9878 reload_fenv, restore_fnenv), update_call); 9879} 9880 9881/* Implement TARGET_HARD_REGNO_MODE_OK. On Alpha, the integer registers 9882 can hold any mode. The floating-point registers can hold 64-bit 9883 integers as well, but not smaller values. */ 9884 9885static bool 9886alpha_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 9887{ 9888 if (IN_RANGE (regno, 32, 62)) 9889 return (mode == SFmode 9890 || mode == DFmode 9891 || mode == DImode 9892 || mode == SCmode 9893 || mode == DCmode); 9894 return true; 9895} 9896 9897/* Implement TARGET_MODES_TIEABLE_P. This asymmetric test is true when 9898 MODE1 could be put in an FP register but MODE2 could not. */ 9899 9900static bool 9901alpha_modes_tieable_p (machine_mode mode1, machine_mode mode2) 9902{ 9903 return (alpha_hard_regno_mode_ok (32, mode1) 9904 ? alpha_hard_regno_mode_ok (32, mode2) 9905 : true); 9906} 9907 9908/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ 9909 9910static bool 9911alpha_can_change_mode_class (machine_mode from, machine_mode to, 9912 reg_class_t rclass) 9913{ 9914 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to) 9915 || !reg_classes_intersect_p (FLOAT_REGS, rclass)); 9916} 9917 9918/* Initialize the GCC target structure. */ 9919#if TARGET_ABI_OPEN_VMS 9920# undef TARGET_ATTRIBUTE_TABLE 9921# define TARGET_ATTRIBUTE_TABLE vms_attribute_table 9922# undef TARGET_CAN_ELIMINATE 9923# define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate 9924#endif 9925 9926#undef TARGET_IN_SMALL_DATA_P 9927#define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p 9928 9929#undef TARGET_ASM_ALIGNED_HI_OP 9930#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" 9931#undef TARGET_ASM_ALIGNED_DI_OP 9932#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 9933 9934/* Default unaligned ops are provided for ELF systems. To get unaligned 9935 data for non-ELF systems, we have to turn off auto alignment. */ 9936#if TARGET_ABI_OPEN_VMS 9937#undef TARGET_ASM_UNALIGNED_HI_OP 9938#define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t" 9939#undef TARGET_ASM_UNALIGNED_SI_OP 9940#define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t" 9941#undef TARGET_ASM_UNALIGNED_DI_OP 9942#define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t" 9943#endif 9944 9945#undef TARGET_ASM_RELOC_RW_MASK 9946#define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask 9947#undef TARGET_ASM_SELECT_RTX_SECTION 9948#define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section 9949#undef TARGET_SECTION_TYPE_FLAGS 9950#define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags 9951 9952#undef TARGET_ASM_FUNCTION_END_PROLOGUE 9953#define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue 9954 9955#undef TARGET_INIT_LIBFUNCS 9956#define TARGET_INIT_LIBFUNCS alpha_init_libfuncs 9957 9958#undef TARGET_LEGITIMIZE_ADDRESS 9959#define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address 9960#undef TARGET_MODE_DEPENDENT_ADDRESS_P 9961#define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p 9962 9963#undef TARGET_ASM_FILE_START 9964#define TARGET_ASM_FILE_START alpha_file_start 9965 9966#undef TARGET_SCHED_ADJUST_COST 9967#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost 9968#undef TARGET_SCHED_ISSUE_RATE 9969#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate 9970#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 9971#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 9972 alpha_multipass_dfa_lookahead 9973 9974#undef TARGET_HAVE_TLS 9975#define TARGET_HAVE_TLS HAVE_AS_TLS 9976 9977#undef TARGET_BUILTIN_DECL 9978#define TARGET_BUILTIN_DECL alpha_builtin_decl 9979#undef TARGET_INIT_BUILTINS 9980#define TARGET_INIT_BUILTINS alpha_init_builtins 9981#undef TARGET_EXPAND_BUILTIN 9982#define TARGET_EXPAND_BUILTIN alpha_expand_builtin 9983#undef TARGET_FOLD_BUILTIN 9984#define TARGET_FOLD_BUILTIN alpha_fold_builtin 9985#undef TARGET_GIMPLE_FOLD_BUILTIN 9986#define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin 9987 9988#undef TARGET_FUNCTION_OK_FOR_SIBCALL 9989#define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall 9990#undef TARGET_CANNOT_COPY_INSN_P 9991#define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p 9992#undef TARGET_LEGITIMATE_CONSTANT_P 9993#define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p 9994#undef TARGET_CANNOT_FORCE_CONST_MEM 9995#define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem 9996 9997#if TARGET_ABI_OSF 9998#undef TARGET_ASM_OUTPUT_MI_THUNK 9999#define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf 10000#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 10001#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 10002#undef TARGET_STDARG_OPTIMIZE_HOOK 10003#define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook 10004#endif 10005 10006#undef TARGET_PRINT_OPERAND 10007#define TARGET_PRINT_OPERAND alpha_print_operand 10008#undef TARGET_PRINT_OPERAND_ADDRESS 10009#define TARGET_PRINT_OPERAND_ADDRESS alpha_print_operand_address 10010#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 10011#define TARGET_PRINT_OPERAND_PUNCT_VALID_P alpha_print_operand_punct_valid_p 10012 10013/* Use 16-bits anchor. */ 10014#undef TARGET_MIN_ANCHOR_OFFSET 10015#define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1 10016#undef TARGET_MAX_ANCHOR_OFFSET 10017#define TARGET_MAX_ANCHOR_OFFSET 0x7fff 10018#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 10019#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true 10020 10021#undef TARGET_REGISTER_MOVE_COST 10022#define TARGET_REGISTER_MOVE_COST alpha_register_move_cost 10023#undef TARGET_MEMORY_MOVE_COST 10024#define TARGET_MEMORY_MOVE_COST alpha_memory_move_cost 10025#undef TARGET_RTX_COSTS 10026#define TARGET_RTX_COSTS alpha_rtx_costs 10027#undef TARGET_ADDRESS_COST 10028#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 10029 10030#undef TARGET_MACHINE_DEPENDENT_REORG 10031#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg 10032 10033#undef TARGET_PROMOTE_FUNCTION_MODE 10034#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote 10035#undef TARGET_PROMOTE_PROTOTYPES 10036#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false 10037 10038#undef TARGET_FUNCTION_VALUE 10039#define TARGET_FUNCTION_VALUE alpha_function_value 10040#undef TARGET_LIBCALL_VALUE 10041#define TARGET_LIBCALL_VALUE alpha_libcall_value 10042#undef TARGET_FUNCTION_VALUE_REGNO_P 10043#define TARGET_FUNCTION_VALUE_REGNO_P alpha_function_value_regno_p 10044#undef TARGET_RETURN_IN_MEMORY 10045#define TARGET_RETURN_IN_MEMORY alpha_return_in_memory 10046#undef TARGET_PASS_BY_REFERENCE 10047#define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference 10048#undef TARGET_SETUP_INCOMING_VARARGS 10049#define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs 10050#undef TARGET_STRICT_ARGUMENT_NAMING 10051#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 10052#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED 10053#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true 10054#undef TARGET_SPLIT_COMPLEX_ARG 10055#define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg 10056#undef TARGET_GIMPLIFY_VA_ARG_EXPR 10057#define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg 10058#undef TARGET_ARG_PARTIAL_BYTES 10059#define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes 10060#undef TARGET_FUNCTION_ARG 10061#define TARGET_FUNCTION_ARG alpha_function_arg 10062#undef TARGET_FUNCTION_ARG_ADVANCE 10063#define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance 10064#undef TARGET_TRAMPOLINE_INIT 10065#define TARGET_TRAMPOLINE_INIT alpha_trampoline_init 10066 10067#undef TARGET_INSTANTIATE_DECLS 10068#define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls 10069 10070#undef TARGET_SECONDARY_RELOAD 10071#define TARGET_SECONDARY_RELOAD alpha_secondary_reload 10072#undef TARGET_SECONDARY_MEMORY_NEEDED 10073#define TARGET_SECONDARY_MEMORY_NEEDED alpha_secondary_memory_needed 10074#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE 10075#define TARGET_SECONDARY_MEMORY_NEEDED_MODE alpha_secondary_memory_needed_mode 10076 10077#undef TARGET_SCALAR_MODE_SUPPORTED_P 10078#define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p 10079#undef TARGET_VECTOR_MODE_SUPPORTED_P 10080#define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p 10081 10082#undef TARGET_BUILD_BUILTIN_VA_LIST 10083#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list 10084 10085#undef TARGET_EXPAND_BUILTIN_VA_START 10086#define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start 10087 10088#undef TARGET_OPTION_OVERRIDE 10089#define TARGET_OPTION_OVERRIDE alpha_option_override 10090 10091#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 10092#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \ 10093 alpha_override_options_after_change 10094 10095#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 10096#undef TARGET_MANGLE_TYPE 10097#define TARGET_MANGLE_TYPE alpha_mangle_type 10098#endif 10099 10100#undef TARGET_LRA_P 10101#define TARGET_LRA_P hook_bool_void_false 10102 10103#undef TARGET_LEGITIMATE_ADDRESS_P 10104#define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p 10105 10106#undef TARGET_CONDITIONAL_REGISTER_USAGE 10107#define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage 10108 10109#undef TARGET_CANONICALIZE_COMPARISON 10110#define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison 10111 10112#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 10113#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv 10114 10115#undef TARGET_HARD_REGNO_MODE_OK 10116#define TARGET_HARD_REGNO_MODE_OK alpha_hard_regno_mode_ok 10117 10118#undef TARGET_MODES_TIEABLE_P 10119#define TARGET_MODES_TIEABLE_P alpha_modes_tieable_p 10120 10121#undef TARGET_CAN_CHANGE_MODE_CLASS 10122#define TARGET_CAN_CHANGE_MODE_CLASS alpha_can_change_mode_class 10123 10124struct gcc_target targetm = TARGET_INITIALIZER; 10125 10126 10127#include "gt-alpha.h" 10128