1/* Subroutines used for code generation on the DEC Alpha.
2   Copyright (C) 1992-2015 Free Software Foundation, Inc.
3   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "rtl.h"
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
36#include "tree.h"
37#include "fold-const.h"
38#include "stor-layout.h"
39#include "calls.h"
40#include "varasm.h"
41#include "regs.h"
42#include "hard-reg-set.h"
43#include "insn-config.h"
44#include "conditions.h"
45#include "output.h"
46#include "insn-attr.h"
47#include "flags.h"
48#include "recog.h"
49#include "hashtab.h"
50#include "function.h"
51#include "statistics.h"
52#include "real.h"
53#include "fixed-value.h"
54#include "expmed.h"
55#include "dojump.h"
56#include "explow.h"
57#include "emit-rtl.h"
58#include "stmt.h"
59#include "expr.h"
60#include "insn-codes.h"
61#include "optabs.h"
62#include "reload.h"
63#include "obstack.h"
64#include "except.h"
65#include "diagnostic-core.h"
66#include "ggc.h"
67#include "tm_p.h"
68#include "target.h"
69#include "target-def.h"
70#include "common/common-target.h"
71#include "debug.h"
72#include "langhooks.h"
73#include "hash-map.h"
74#include "hash-table.h"
75#include "predict.h"
76#include "dominance.h"
77#include "cfg.h"
78#include "cfgrtl.h"
79#include "cfganal.h"
80#include "lcm.h"
81#include "cfgbuild.h"
82#include "cfgcleanup.h"
83#include "basic-block.h"
84#include "tree-ssa-alias.h"
85#include "internal-fn.h"
86#include "gimple-fold.h"
87#include "tree-eh.h"
88#include "gimple-expr.h"
89#include "is-a.h"
90#include "gimple.h"
91#include "tree-pass.h"
92#include "context.h"
93#include "pass_manager.h"
94#include "gimple-iterator.h"
95#include "gimplify.h"
96#include "gimple-ssa.h"
97#include "stringpool.h"
98#include "tree-ssanames.h"
99#include "tree-stdarg.h"
100#include "tm-constrs.h"
101#include "df.h"
102#include "libfuncs.h"
103#include "opts.h"
104#include "params.h"
105#include "builtins.h"
106#include "rtl-iter.h"
107
108/* Specify which cpu to schedule for.  */
109enum processor_type alpha_tune;
110
111/* Which cpu we're generating code for.  */
112enum processor_type alpha_cpu;
113
114static const char * const alpha_cpu_name[] =
115{
116  "ev4", "ev5", "ev6"
117};
118
119/* Specify how accurate floating-point traps need to be.  */
120
121enum alpha_trap_precision alpha_tp;
122
123/* Specify the floating-point rounding mode.  */
124
125enum alpha_fp_rounding_mode alpha_fprm;
126
127/* Specify which things cause traps.  */
128
129enum alpha_fp_trap_mode alpha_fptm;
130
131/* Nonzero if inside of a function, because the Alpha asm can't
132   handle .files inside of functions.  */
133
134static int inside_function = FALSE;
135
136/* The number of cycles of latency we should assume on memory reads.  */
137
138int alpha_memory_latency = 3;
139
140/* Whether the function needs the GP.  */
141
142static int alpha_function_needs_gp;
143
144/* The assembler name of the current function.  */
145
146static const char *alpha_fnname;
147
148/* The next explicit relocation sequence number.  */
149extern GTY(()) int alpha_next_sequence_number;
150int alpha_next_sequence_number = 1;
151
152/* The literal and gpdisp sequence numbers for this insn, as printed
153   by %# and %* respectively.  */
154extern GTY(()) int alpha_this_literal_sequence_number;
155extern GTY(()) int alpha_this_gpdisp_sequence_number;
156int alpha_this_literal_sequence_number;
157int alpha_this_gpdisp_sequence_number;
158
159/* Costs of various operations on the different architectures.  */
160
161struct alpha_rtx_cost_data
162{
163  unsigned char fp_add;
164  unsigned char fp_mult;
165  unsigned char fp_div_sf;
166  unsigned char fp_div_df;
167  unsigned char int_mult_si;
168  unsigned char int_mult_di;
169  unsigned char int_shift;
170  unsigned char int_cmov;
171  unsigned short int_div;
172};
173
174static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
175{
176  { /* EV4 */
177    COSTS_N_INSNS (6),		/* fp_add */
178    COSTS_N_INSNS (6),		/* fp_mult */
179    COSTS_N_INSNS (34),		/* fp_div_sf */
180    COSTS_N_INSNS (63),		/* fp_div_df */
181    COSTS_N_INSNS (23),		/* int_mult_si */
182    COSTS_N_INSNS (23),		/* int_mult_di */
183    COSTS_N_INSNS (2),		/* int_shift */
184    COSTS_N_INSNS (2),		/* int_cmov */
185    COSTS_N_INSNS (97),		/* int_div */
186  },
187  { /* EV5 */
188    COSTS_N_INSNS (4),		/* fp_add */
189    COSTS_N_INSNS (4),		/* fp_mult */
190    COSTS_N_INSNS (15),		/* fp_div_sf */
191    COSTS_N_INSNS (22),		/* fp_div_df */
192    COSTS_N_INSNS (8),		/* int_mult_si */
193    COSTS_N_INSNS (12),		/* int_mult_di */
194    COSTS_N_INSNS (1) + 1,	/* int_shift */
195    COSTS_N_INSNS (1),		/* int_cmov */
196    COSTS_N_INSNS (83),		/* int_div */
197  },
198  { /* EV6 */
199    COSTS_N_INSNS (4),		/* fp_add */
200    COSTS_N_INSNS (4),		/* fp_mult */
201    COSTS_N_INSNS (12),		/* fp_div_sf */
202    COSTS_N_INSNS (15),		/* fp_div_df */
203    COSTS_N_INSNS (7),		/* int_mult_si */
204    COSTS_N_INSNS (7),		/* int_mult_di */
205    COSTS_N_INSNS (1),		/* int_shift */
206    COSTS_N_INSNS (2),		/* int_cmov */
207    COSTS_N_INSNS (86),		/* int_div */
208  },
209};
210
211/* Similar but tuned for code size instead of execution latency.  The
212   extra +N is fractional cost tuning based on latency.  It's used to
213   encourage use of cheaper insns like shift, but only if there's just
214   one of them.  */
215
216static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
217{
218  COSTS_N_INSNS (1),		/* fp_add */
219  COSTS_N_INSNS (1),		/* fp_mult */
220  COSTS_N_INSNS (1),		/* fp_div_sf */
221  COSTS_N_INSNS (1) + 1,	/* fp_div_df */
222  COSTS_N_INSNS (1) + 1,	/* int_mult_si */
223  COSTS_N_INSNS (1) + 2,	/* int_mult_di */
224  COSTS_N_INSNS (1),		/* int_shift */
225  COSTS_N_INSNS (1),		/* int_cmov */
226  COSTS_N_INSNS (6),		/* int_div */
227};
228
229/* Get the number of args of a function in one of two ways.  */
230#if TARGET_ABI_OPEN_VMS
231#define NUM_ARGS crtl->args.info.num_args
232#else
233#define NUM_ARGS crtl->args.info
234#endif
235
236#define REG_PV 27
237#define REG_RA 26
238
239/* Declarations of static functions.  */
240static struct machine_function *alpha_init_machine_status (void);
241static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
242static void alpha_handle_trap_shadows (void);
243static void alpha_align_insns (void);
244static void alpha_override_options_after_change (void);
245
246#if TARGET_ABI_OPEN_VMS
247static void alpha_write_linkage (FILE *, const char *);
248static bool vms_valid_pointer_mode (machine_mode);
249#else
250#define vms_patch_builtins()  gcc_unreachable()
251#endif
252
253static unsigned int
254rest_of_handle_trap_shadows (void)
255{
256  alpha_handle_trap_shadows ();
257  return 0;
258}
259
260namespace {
261
262const pass_data pass_data_handle_trap_shadows =
263{
264  RTL_PASS,
265  "trap_shadows",			/* name */
266  OPTGROUP_NONE,			/* optinfo_flags */
267  TV_NONE,				/* tv_id */
268  0,					/* properties_required */
269  0,					/* properties_provided */
270  0,					/* properties_destroyed */
271  0,					/* todo_flags_start */
272  TODO_df_finish,			/* todo_flags_finish */
273};
274
275class pass_handle_trap_shadows : public rtl_opt_pass
276{
277public:
278  pass_handle_trap_shadows(gcc::context *ctxt)
279    : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
280  {}
281
282  /* opt_pass methods: */
283  virtual bool gate (function *)
284    {
285      return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
286    }
287
288  virtual unsigned int execute (function *)
289    {
290      return rest_of_handle_trap_shadows ();
291    }
292
293}; // class pass_handle_trap_shadows
294
295} // anon namespace
296
297rtl_opt_pass *
298make_pass_handle_trap_shadows (gcc::context *ctxt)
299{
300  return new pass_handle_trap_shadows (ctxt);
301}
302
303static unsigned int
304rest_of_align_insns (void)
305{
306  alpha_align_insns ();
307  return 0;
308}
309
310namespace {
311
312const pass_data pass_data_align_insns =
313{
314  RTL_PASS,
315  "align_insns",			/* name */
316  OPTGROUP_NONE,			/* optinfo_flags */
317  TV_NONE,				/* tv_id */
318  0,					/* properties_required */
319  0,					/* properties_provided */
320  0,					/* properties_destroyed */
321  0,					/* todo_flags_start */
322  TODO_df_finish,			/* todo_flags_finish */
323};
324
325class pass_align_insns : public rtl_opt_pass
326{
327public:
328  pass_align_insns(gcc::context *ctxt)
329    : rtl_opt_pass(pass_data_align_insns, ctxt)
330  {}
331
332  /* opt_pass methods: */
333  virtual bool gate (function *)
334    {
335      /* Due to the number of extra trapb insns, don't bother fixing up
336	 alignment when trap precision is instruction.  Moreover, we can
337	 only do our job when sched2 is run.  */
338      return ((alpha_tune == PROCESSOR_EV4
339	       || alpha_tune == PROCESSOR_EV5)
340	      && optimize && !optimize_size
341	      && alpha_tp != ALPHA_TP_INSN
342	      && flag_schedule_insns_after_reload);
343    }
344
345  virtual unsigned int execute (function *)
346    {
347      return rest_of_align_insns ();
348    }
349
350}; // class pass_align_insns
351
352} // anon namespace
353
354rtl_opt_pass *
355make_pass_align_insns (gcc::context *ctxt)
356{
357  return new pass_align_insns (ctxt);
358}
359
360#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
361/* Implement TARGET_MANGLE_TYPE.  */
362
363static const char *
364alpha_mangle_type (const_tree type)
365{
366  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
367      && TARGET_LONG_DOUBLE_128)
368    return "g";
369
370  /* For all other types, use normal C++ mangling.  */
371  return NULL;
372}
373#endif
374
375/* Parse target option strings.  */
376
377static void
378alpha_option_override (void)
379{
380  static const struct cpu_table {
381    const char *const name;
382    const enum processor_type processor;
383    const int flags;
384    const unsigned short line_size; /* in bytes */
385    const unsigned short l1_size;   /* in kb.  */
386    const unsigned short l2_size;   /* in kb.  */
387  } cpu_table[] = {
388    /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
389       EV4/EV45 had 128k to 16M 32-byte direct Bcache.  LCA45
390       had 64k to 8M 8-byte direct Bcache.  */
391    { "ev4",	PROCESSOR_EV4, 0, 32, 8, 8*1024 },
392    { "21064",	PROCESSOR_EV4, 0, 32, 8, 8*1024 },
393    { "ev45",	PROCESSOR_EV4, 0, 32, 16, 16*1024 },
394
395    /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
396       and 1M to 16M 64 byte L3 (not modeled).
397       PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
398       PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache.  */
399    { "ev5",	PROCESSOR_EV5, 0, 32, 8, 96 },
400    { "21164",	PROCESSOR_EV5, 0, 32, 8, 96 },
401    { "ev56",	PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
402    { "21164a",	PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
403    { "pca56",	PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
404    { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
405    { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
406
407    /* EV6 had 64k 64 byte L1, 1M to 16M Bcache.  */
408    { "ev6",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
409    { "21264",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
410    { "ev67",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
411      64, 64, 16*1024 },
412    { "21264a",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
413      64, 64, 16*1024 }
414  };
415
416  opt_pass *pass_handle_trap_shadows = make_pass_handle_trap_shadows (g);
417  struct register_pass_info handle_trap_shadows_info
418    = { pass_handle_trap_shadows, "eh_ranges",
419	1, PASS_POS_INSERT_AFTER
420      };
421
422  opt_pass *pass_align_insns = make_pass_align_insns (g);
423  struct register_pass_info align_insns_info
424    = { pass_align_insns, "shorten",
425	1, PASS_POS_INSERT_BEFORE
426      };
427
428  int const ct_size = ARRAY_SIZE (cpu_table);
429  int line_size = 0, l1_size = 0, l2_size = 0;
430  int i;
431
432#ifdef SUBTARGET_OVERRIDE_OPTIONS
433  SUBTARGET_OVERRIDE_OPTIONS;
434#endif
435
436  /* Default to full IEEE compliance mode for Go language.  */
437  if (strcmp (lang_hooks.name, "GNU Go") == 0
438      && !(target_flags_explicit & MASK_IEEE))
439    target_flags |= MASK_IEEE;
440
441  alpha_fprm = ALPHA_FPRM_NORM;
442  alpha_tp = ALPHA_TP_PROG;
443  alpha_fptm = ALPHA_FPTM_N;
444
445  if (TARGET_IEEE)
446    {
447      alpha_tp = ALPHA_TP_INSN;
448      alpha_fptm = ALPHA_FPTM_SU;
449    }
450  if (TARGET_IEEE_WITH_INEXACT)
451    {
452      alpha_tp = ALPHA_TP_INSN;
453      alpha_fptm = ALPHA_FPTM_SUI;
454    }
455
456  if (alpha_tp_string)
457    {
458      if (! strcmp (alpha_tp_string, "p"))
459	alpha_tp = ALPHA_TP_PROG;
460      else if (! strcmp (alpha_tp_string, "f"))
461	alpha_tp = ALPHA_TP_FUNC;
462      else if (! strcmp (alpha_tp_string, "i"))
463	alpha_tp = ALPHA_TP_INSN;
464      else
465	error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
466    }
467
468  if (alpha_fprm_string)
469    {
470      if (! strcmp (alpha_fprm_string, "n"))
471	alpha_fprm = ALPHA_FPRM_NORM;
472      else if (! strcmp (alpha_fprm_string, "m"))
473	alpha_fprm = ALPHA_FPRM_MINF;
474      else if (! strcmp (alpha_fprm_string, "c"))
475	alpha_fprm = ALPHA_FPRM_CHOP;
476      else if (! strcmp (alpha_fprm_string,"d"))
477	alpha_fprm = ALPHA_FPRM_DYN;
478      else
479	error ("bad value %qs for -mfp-rounding-mode switch",
480	       alpha_fprm_string);
481    }
482
483  if (alpha_fptm_string)
484    {
485      if (strcmp (alpha_fptm_string, "n") == 0)
486	alpha_fptm = ALPHA_FPTM_N;
487      else if (strcmp (alpha_fptm_string, "u") == 0)
488	alpha_fptm = ALPHA_FPTM_U;
489      else if (strcmp (alpha_fptm_string, "su") == 0)
490	alpha_fptm = ALPHA_FPTM_SU;
491      else if (strcmp (alpha_fptm_string, "sui") == 0)
492	alpha_fptm = ALPHA_FPTM_SUI;
493      else
494	error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
495    }
496
497  if (alpha_cpu_string)
498    {
499      for (i = 0; i < ct_size; i++)
500	if (! strcmp (alpha_cpu_string, cpu_table [i].name))
501	  {
502	    alpha_tune = alpha_cpu = cpu_table[i].processor;
503	    line_size = cpu_table[i].line_size;
504	    l1_size = cpu_table[i].l1_size;
505	    l2_size = cpu_table[i].l2_size;
506	    target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
507	    target_flags |= cpu_table[i].flags;
508	    break;
509	  }
510      if (i == ct_size)
511	error ("bad value %qs for -mcpu switch", alpha_cpu_string);
512    }
513
514  if (alpha_tune_string)
515    {
516      for (i = 0; i < ct_size; i++)
517	if (! strcmp (alpha_tune_string, cpu_table [i].name))
518	  {
519	    alpha_tune = cpu_table[i].processor;
520	    line_size = cpu_table[i].line_size;
521	    l1_size = cpu_table[i].l1_size;
522	    l2_size = cpu_table[i].l2_size;
523	    break;
524	  }
525      if (i == ct_size)
526	error ("bad value %qs for -mtune switch", alpha_tune_string);
527    }
528
529  if (line_size)
530    maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
531			   global_options.x_param_values,
532			   global_options_set.x_param_values);
533  if (l1_size)
534    maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
535			   global_options.x_param_values,
536			   global_options_set.x_param_values);
537  if (l2_size)
538    maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
539			   global_options.x_param_values,
540			   global_options_set.x_param_values);
541
542  /* Do some sanity checks on the above options.  */
543
544  if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
545      && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
546    {
547      warning (0, "fp software completion requires -mtrap-precision=i");
548      alpha_tp = ALPHA_TP_INSN;
549    }
550
551  if (alpha_cpu == PROCESSOR_EV6)
552    {
553      /* Except for EV6 pass 1 (not released), we always have precise
554	 arithmetic traps.  Which means we can do software completion
555	 without minding trap shadows.  */
556      alpha_tp = ALPHA_TP_PROG;
557    }
558
559  if (TARGET_FLOAT_VAX)
560    {
561      if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
562	{
563	  warning (0, "rounding mode not supported for VAX floats");
564	  alpha_fprm = ALPHA_FPRM_NORM;
565	}
566      if (alpha_fptm == ALPHA_FPTM_SUI)
567	{
568	  warning (0, "trap mode not supported for VAX floats");
569	  alpha_fptm = ALPHA_FPTM_SU;
570	}
571      if (target_flags_explicit & MASK_LONG_DOUBLE_128)
572	warning (0, "128-bit long double not supported for VAX floats");
573      target_flags &= ~MASK_LONG_DOUBLE_128;
574    }
575
576  {
577    char *end;
578    int lat;
579
580    if (!alpha_mlat_string)
581      alpha_mlat_string = "L1";
582
583    if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
584	&& (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
585      ;
586    else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
587	     && ISDIGIT ((unsigned char)alpha_mlat_string[1])
588	     && alpha_mlat_string[2] == '\0')
589      {
590	static int const cache_latency[][4] =
591	{
592	  { 3, 30, -1 },	/* ev4 -- Bcache is a guess */
593	  { 2, 12, 38 },	/* ev5 -- Bcache from PC164 LMbench numbers */
594	  { 3, 12, 30 },	/* ev6 -- Bcache from DS20 LMbench.  */
595	};
596
597	lat = alpha_mlat_string[1] - '0';
598	if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
599	  {
600	    warning (0, "L%d cache latency unknown for %s",
601		     lat, alpha_cpu_name[alpha_tune]);
602	    lat = 3;
603	  }
604	else
605	  lat = cache_latency[alpha_tune][lat-1];
606      }
607    else if (! strcmp (alpha_mlat_string, "main"))
608      {
609	/* Most current memories have about 370ns latency.  This is
610	   a reasonable guess for a fast cpu.  */
611	lat = 150;
612      }
613    else
614      {
615	warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
616	lat = 3;
617      }
618
619    alpha_memory_latency = lat;
620  }
621
622  /* Default the definition of "small data" to 8 bytes.  */
623  if (!global_options_set.x_g_switch_value)
624    g_switch_value = 8;
625
626  /* Infer TARGET_SMALL_DATA from -fpic/-fPIC.  */
627  if (flag_pic == 1)
628    target_flags |= MASK_SMALL_DATA;
629  else if (flag_pic == 2)
630    target_flags &= ~MASK_SMALL_DATA;
631
632  alpha_override_options_after_change ();
633
634  /* Register variables and functions with the garbage collector.  */
635
636  /* Set up function hooks.  */
637  init_machine_status = alpha_init_machine_status;
638
639  /* Tell the compiler when we're using VAX floating point.  */
640  if (TARGET_FLOAT_VAX)
641    {
642      REAL_MODE_FORMAT (SFmode) = &vax_f_format;
643      REAL_MODE_FORMAT (DFmode) = &vax_g_format;
644      REAL_MODE_FORMAT (TFmode) = NULL;
645    }
646
647#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
648  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
649    target_flags |= MASK_LONG_DOUBLE_128;
650#endif
651
652  /* This needs to be done at start up.  It's convenient to do it here.  */
653  register_pass (&handle_trap_shadows_info);
654  register_pass (&align_insns_info);
655}
656
657/* Implement targetm.override_options_after_change.  */
658
659static void
660alpha_override_options_after_change (void)
661{
662  /* Align labels and loops for optimal branching.  */
663  /* ??? Kludge these by not doing anything if we don't optimize.  */
664  if (optimize > 0)
665    {
666      if (align_loops <= 0)
667	align_loops = 16;
668      if (align_jumps <= 0)
669	align_jumps = 16;
670    }
671  if (align_functions <= 0)
672    align_functions = 16;
673}
674
675/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
676
677int
678zap_mask (HOST_WIDE_INT value)
679{
680  int i;
681
682  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
683       i++, value >>= 8)
684    if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
685      return 0;
686
687  return 1;
688}
689
690/* Return true if OP is valid for a particular TLS relocation.
691   We are already guaranteed that OP is a CONST.  */
692
693int
694tls_symbolic_operand_1 (rtx op, int size, int unspec)
695{
696  op = XEXP (op, 0);
697
698  if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
699    return 0;
700  op = XVECEXP (op, 0, 0);
701
702  if (GET_CODE (op) != SYMBOL_REF)
703    return 0;
704
705  switch (SYMBOL_REF_TLS_MODEL (op))
706    {
707    case TLS_MODEL_LOCAL_DYNAMIC:
708      return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
709    case TLS_MODEL_INITIAL_EXEC:
710      return unspec == UNSPEC_TPREL && size == 64;
711    case TLS_MODEL_LOCAL_EXEC:
712      return unspec == UNSPEC_TPREL && size == alpha_tls_size;
713    default:
714      gcc_unreachable ();
715    }
716}
717
718/* Used by aligned_memory_operand and unaligned_memory_operand to
719   resolve what reload is going to do with OP if it's a register.  */
720
721rtx
722resolve_reload_operand (rtx op)
723{
724  if (reload_in_progress)
725    {
726      rtx tmp = op;
727      if (GET_CODE (tmp) == SUBREG)
728	tmp = SUBREG_REG (tmp);
729      if (REG_P (tmp)
730	  && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
731	{
732	  op = reg_equiv_memory_loc (REGNO (tmp));
733	  if (op == 0)
734	    return 0;
735	}
736    }
737  return op;
738}
739
740/* The scalar modes supported differs from the default check-what-c-supports
741   version in that sometimes TFmode is available even when long double
742   indicates only DFmode.  */
743
744static bool
745alpha_scalar_mode_supported_p (machine_mode mode)
746{
747  switch (mode)
748    {
749    case QImode:
750    case HImode:
751    case SImode:
752    case DImode:
753    case TImode: /* via optabs.c */
754      return true;
755
756    case SFmode:
757    case DFmode:
758      return true;
759
760    case TFmode:
761      return TARGET_HAS_XFLOATING_LIBS;
762
763    default:
764      return false;
765    }
766}
767
768/* Alpha implements a couple of integer vector mode operations when
769   TARGET_MAX is enabled.  We do not check TARGET_MAX here, however,
770   which allows the vectorizer to operate on e.g. move instructions,
771   or when expand_vector_operations can do something useful.  */
772
773static bool
774alpha_vector_mode_supported_p (machine_mode mode)
775{
776  return mode == V8QImode || mode == V4HImode || mode == V2SImode;
777}
778
779/* Return 1 if this function can directly return via $26.  */
780
781int
782direct_return (void)
783{
784  return (TARGET_ABI_OSF
785	  && reload_completed
786	  && alpha_sa_size () == 0
787	  && get_frame_size () == 0
788	  && crtl->outgoing_args_size == 0
789	  && crtl->args.pretend_args_size == 0);
790}
791
792/* Return the TLS model to use for SYMBOL.  */
793
794static enum tls_model
795tls_symbolic_operand_type (rtx symbol)
796{
797  enum tls_model model;
798
799  if (GET_CODE (symbol) != SYMBOL_REF)
800    return TLS_MODEL_NONE;
801  model = SYMBOL_REF_TLS_MODEL (symbol);
802
803  /* Local-exec with a 64-bit size is the same code as initial-exec.  */
804  if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
805    model = TLS_MODEL_INITIAL_EXEC;
806
807  return model;
808}
809
810/* Return true if the function DECL will share the same GP as any
811   function in the current unit of translation.  */
812
813static bool
814decl_has_samegp (const_tree decl)
815{
816  /* Functions that are not local can be overridden, and thus may
817     not share the same gp.  */
818  if (!(*targetm.binds_local_p) (decl))
819    return false;
820
821  /* If -msmall-data is in effect, assume that there is only one GP
822     for the module, and so any local symbol has this property.  We
823     need explicit relocations to be able to enforce this for symbols
824     not defined in this unit of translation, however.  */
825  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
826    return true;
827
828  /* Functions that are not external are defined in this UoT.  */
829  /* ??? Irritatingly, static functions not yet emitted are still
830     marked "external".  Apply this to non-static functions only.  */
831  return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
832}
833
834/* Return true if EXP should be placed in the small data section.  */
835
836static bool
837alpha_in_small_data_p (const_tree exp)
838{
839  /* We want to merge strings, so we never consider them small data.  */
840  if (TREE_CODE (exp) == STRING_CST)
841    return false;
842
843  /* Functions are never in the small data area.  Duh.  */
844  if (TREE_CODE (exp) == FUNCTION_DECL)
845    return false;
846
847  /* COMMON symbols are never small data.  */
848  if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp))
849    return false;
850
851  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
852    {
853      const char *section = DECL_SECTION_NAME (exp);
854      if (strcmp (section, ".sdata") == 0
855	  || strcmp (section, ".sbss") == 0)
856	return true;
857    }
858  else
859    {
860      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
861
862      /* If this is an incomplete type with size 0, then we can't put it
863	 in sdata because it might be too big when completed.  */
864      if (size > 0 && size <= g_switch_value)
865	return true;
866    }
867
868  return false;
869}
870
871#if TARGET_ABI_OPEN_VMS
872static bool
873vms_valid_pointer_mode (machine_mode mode)
874{
875  return (mode == SImode || mode == DImode);
876}
877
878static bool
879alpha_linkage_symbol_p (const char *symname)
880{
881  int symlen = strlen (symname);
882
883  if (symlen > 4)
884    return strcmp (&symname [symlen - 4], "..lk") == 0;
885
886  return false;
887}
888
889#define LINKAGE_SYMBOL_REF_P(X) \
890  ((GET_CODE (X) == SYMBOL_REF   \
891    && alpha_linkage_symbol_p (XSTR (X, 0))) \
892   || (GET_CODE (X) == CONST                 \
893       && GET_CODE (XEXP (X, 0)) == PLUS     \
894       && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
895       && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
896#endif
897
898/* legitimate_address_p recognizes an RTL expression that is a valid
899   memory address for an instruction.  The MODE argument is the
900   machine mode for the MEM expression that wants to use this address.
901
902   For Alpha, we have either a constant address or the sum of a
903   register and a constant address, or just a register.  For DImode,
904   any of those forms can be surrounded with an AND that clear the
905   low-order three bits; this is an "unaligned" access.  */
906
907static bool
908alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict)
909{
910  /* If this is an ldq_u type address, discard the outer AND.  */
911  if (mode == DImode
912      && GET_CODE (x) == AND
913      && CONST_INT_P (XEXP (x, 1))
914      && INTVAL (XEXP (x, 1)) == -8)
915    x = XEXP (x, 0);
916
917  /* Discard non-paradoxical subregs.  */
918  if (GET_CODE (x) == SUBREG
919      && (GET_MODE_SIZE (GET_MODE (x))
920	  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
921    x = SUBREG_REG (x);
922
923  /* Unadorned general registers are valid.  */
924  if (REG_P (x)
925      && (strict
926	  ? STRICT_REG_OK_FOR_BASE_P (x)
927	  : NONSTRICT_REG_OK_FOR_BASE_P (x)))
928    return true;
929
930  /* Constant addresses (i.e. +/- 32k) are valid.  */
931  if (CONSTANT_ADDRESS_P (x))
932    return true;
933
934#if TARGET_ABI_OPEN_VMS
935  if (LINKAGE_SYMBOL_REF_P (x))
936    return true;
937#endif
938
939  /* Register plus a small constant offset is valid.  */
940  if (GET_CODE (x) == PLUS)
941    {
942      rtx ofs = XEXP (x, 1);
943      x = XEXP (x, 0);
944
945      /* Discard non-paradoxical subregs.  */
946      if (GET_CODE (x) == SUBREG
947          && (GET_MODE_SIZE (GET_MODE (x))
948	      < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
949	x = SUBREG_REG (x);
950
951      if (REG_P (x))
952	{
953	  if (! strict
954	      && NONSTRICT_REG_OK_FP_BASE_P (x)
955	      && CONST_INT_P (ofs))
956	    return true;
957	  if ((strict
958	       ? STRICT_REG_OK_FOR_BASE_P (x)
959	       : NONSTRICT_REG_OK_FOR_BASE_P (x))
960	      && CONSTANT_ADDRESS_P (ofs))
961	    return true;
962	}
963    }
964
965  /* If we're managing explicit relocations, LO_SUM is valid, as are small
966     data symbols.  Avoid explicit relocations of modes larger than word
967     mode since i.e. $LC0+8($1) can fold around +/- 32k offset.  */
968  else if (TARGET_EXPLICIT_RELOCS
969	   && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
970    {
971      if (small_symbolic_operand (x, Pmode))
972	return true;
973
974      if (GET_CODE (x) == LO_SUM)
975	{
976	  rtx ofs = XEXP (x, 1);
977	  x = XEXP (x, 0);
978
979	  /* Discard non-paradoxical subregs.  */
980	  if (GET_CODE (x) == SUBREG
981	      && (GET_MODE_SIZE (GET_MODE (x))
982		  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
983	    x = SUBREG_REG (x);
984
985	  /* Must have a valid base register.  */
986	  if (! (REG_P (x)
987		 && (strict
988		     ? STRICT_REG_OK_FOR_BASE_P (x)
989		     : NONSTRICT_REG_OK_FOR_BASE_P (x))))
990	    return false;
991
992	  /* The symbol must be local.  */
993	  if (local_symbolic_operand (ofs, Pmode)
994	      || dtp32_symbolic_operand (ofs, Pmode)
995	      || tp32_symbolic_operand (ofs, Pmode))
996	    return true;
997	}
998    }
999
1000  return false;
1001}
1002
1003/* Build the SYMBOL_REF for __tls_get_addr.  */
1004
1005static GTY(()) rtx tls_get_addr_libfunc;
1006
1007static rtx
1008get_tls_get_addr (void)
1009{
1010  if (!tls_get_addr_libfunc)
1011    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1012  return tls_get_addr_libfunc;
1013}
1014
1015/* Try machine-dependent ways of modifying an illegitimate address
1016   to be legitimate.  If we find one, return the new, valid address.  */
1017
1018static rtx
1019alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
1020{
1021  HOST_WIDE_INT addend;
1022
1023  /* If the address is (plus reg const_int) and the CONST_INT is not a
1024     valid offset, compute the high part of the constant and add it to
1025     the register.  Then our address is (plus temp low-part-const).  */
1026  if (GET_CODE (x) == PLUS
1027      && REG_P (XEXP (x, 0))
1028      && CONST_INT_P (XEXP (x, 1))
1029      && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
1030    {
1031      addend = INTVAL (XEXP (x, 1));
1032      x = XEXP (x, 0);
1033      goto split_addend;
1034    }
1035
1036  /* If the address is (const (plus FOO const_int)), find the low-order
1037     part of the CONST_INT.  Then load FOO plus any high-order part of the
1038     CONST_INT into a register.  Our address is (plus reg low-part-const).
1039     This is done to reduce the number of GOT entries.  */
1040  if (can_create_pseudo_p ()
1041      && GET_CODE (x) == CONST
1042      && GET_CODE (XEXP (x, 0)) == PLUS
1043      && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
1044    {
1045      addend = INTVAL (XEXP (XEXP (x, 0), 1));
1046      x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
1047      goto split_addend;
1048    }
1049
1050  /* If we have a (plus reg const), emit the load as in (2), then add
1051     the two registers, and finally generate (plus reg low-part-const) as
1052     our address.  */
1053  if (can_create_pseudo_p ()
1054      && GET_CODE (x) == PLUS
1055      && REG_P (XEXP (x, 0))
1056      && GET_CODE (XEXP (x, 1)) == CONST
1057      && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
1058      && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
1059    {
1060      addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1061      x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1062			       XEXP (XEXP (XEXP (x, 1), 0), 0),
1063			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
1064      goto split_addend;
1065    }
1066
1067  /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1068     Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1069     around +/- 32k offset.  */
1070  if (TARGET_EXPLICIT_RELOCS
1071      && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1072      && symbolic_operand (x, Pmode))
1073    {
1074      rtx r0, r16, eqv, tga, tp, insn, dest, seq;
1075
1076      switch (tls_symbolic_operand_type (x))
1077	{
1078	case TLS_MODEL_NONE:
1079	  break;
1080
1081	case TLS_MODEL_GLOBAL_DYNAMIC:
1082	  start_sequence ();
1083
1084	  r0 = gen_rtx_REG (Pmode, 0);
1085	  r16 = gen_rtx_REG (Pmode, 16);
1086	  tga = get_tls_get_addr ();
1087	  dest = gen_reg_rtx (Pmode);
1088	  seq = GEN_INT (alpha_next_sequence_number++);
1089
1090	  emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1091	  insn = gen_call_value_osf_tlsgd (r0, tga, seq);
1092	  insn = emit_call_insn (insn);
1093	  RTL_CONST_CALL_P (insn) = 1;
1094	  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1095
1096          insn = get_insns ();
1097	  end_sequence ();
1098
1099	  emit_libcall_block (insn, dest, r0, x);
1100	  return dest;
1101
1102	case TLS_MODEL_LOCAL_DYNAMIC:
1103	  start_sequence ();
1104
1105	  r0 = gen_rtx_REG (Pmode, 0);
1106	  r16 = gen_rtx_REG (Pmode, 16);
1107	  tga = get_tls_get_addr ();
1108	  scratch = gen_reg_rtx (Pmode);
1109	  seq = GEN_INT (alpha_next_sequence_number++);
1110
1111	  emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1112	  insn = gen_call_value_osf_tlsldm (r0, tga, seq);
1113	  insn = emit_call_insn (insn);
1114	  RTL_CONST_CALL_P (insn) = 1;
1115	  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1116
1117          insn = get_insns ();
1118	  end_sequence ();
1119
1120	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1121				UNSPEC_TLSLDM_CALL);
1122	  emit_libcall_block (insn, scratch, r0, eqv);
1123
1124	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1125	  eqv = gen_rtx_CONST (Pmode, eqv);
1126
1127	  if (alpha_tls_size == 64)
1128	    {
1129	      dest = gen_reg_rtx (Pmode);
1130	      emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
1131	      emit_insn (gen_adddi3 (dest, dest, scratch));
1132	      return dest;
1133	    }
1134	  if (alpha_tls_size == 32)
1135	    {
1136	      insn = gen_rtx_HIGH (Pmode, eqv);
1137	      insn = gen_rtx_PLUS (Pmode, scratch, insn);
1138	      scratch = gen_reg_rtx (Pmode);
1139	      emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
1140	    }
1141	  return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1142
1143	case TLS_MODEL_INITIAL_EXEC:
1144	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1145	  eqv = gen_rtx_CONST (Pmode, eqv);
1146	  tp = gen_reg_rtx (Pmode);
1147	  scratch = gen_reg_rtx (Pmode);
1148	  dest = gen_reg_rtx (Pmode);
1149
1150	  emit_insn (gen_get_thread_pointerdi (tp));
1151	  emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
1152	  emit_insn (gen_adddi3 (dest, tp, scratch));
1153	  return dest;
1154
1155	case TLS_MODEL_LOCAL_EXEC:
1156	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1157	  eqv = gen_rtx_CONST (Pmode, eqv);
1158	  tp = gen_reg_rtx (Pmode);
1159
1160	  emit_insn (gen_get_thread_pointerdi (tp));
1161	  if (alpha_tls_size == 32)
1162	    {
1163	      insn = gen_rtx_HIGH (Pmode, eqv);
1164	      insn = gen_rtx_PLUS (Pmode, tp, insn);
1165	      tp = gen_reg_rtx (Pmode);
1166	      emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
1167	    }
1168	  return gen_rtx_LO_SUM (Pmode, tp, eqv);
1169
1170	default:
1171	  gcc_unreachable ();
1172	}
1173
1174      if (local_symbolic_operand (x, Pmode))
1175	{
1176	  if (small_symbolic_operand (x, Pmode))
1177	    return x;
1178	  else
1179	    {
1180	      if (can_create_pseudo_p ())
1181	        scratch = gen_reg_rtx (Pmode);
1182	      emit_insn (gen_rtx_SET (VOIDmode, scratch,
1183				      gen_rtx_HIGH (Pmode, x)));
1184	      return gen_rtx_LO_SUM (Pmode, scratch, x);
1185	    }
1186	}
1187    }
1188
1189  return NULL;
1190
1191 split_addend:
1192  {
1193    HOST_WIDE_INT low, high;
1194
1195    low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1196    addend -= low;
1197    high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1198    addend -= high;
1199
1200    if (addend)
1201      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1202			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
1203			       1, OPTAB_LIB_WIDEN);
1204    if (high)
1205      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1206			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
1207			       1, OPTAB_LIB_WIDEN);
1208
1209    return plus_constant (Pmode, x, low);
1210  }
1211}
1212
1213
1214/* Try machine-dependent ways of modifying an illegitimate address
1215   to be legitimate.  Return X or the new, valid address.  */
1216
1217static rtx
1218alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1219			  machine_mode mode)
1220{
1221  rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1222  return new_x ? new_x : x;
1223}
1224
1225/* Return true if ADDR has an effect that depends on the machine mode it
1226   is used for.  On the Alpha this is true only for the unaligned modes.
1227   We can simplify the test since we know that the address must be valid.  */
1228
1229static bool
1230alpha_mode_dependent_address_p (const_rtx addr,
1231				addr_space_t as ATTRIBUTE_UNUSED)
1232{
1233  return GET_CODE (addr) == AND;
1234}
1235
1236/* Primarily this is required for TLS symbols, but given that our move
1237   patterns *ought* to be able to handle any symbol at any time, we
1238   should never be spilling symbolic operands to the constant pool, ever.  */
1239
1240static bool
1241alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1242{
1243  enum rtx_code code = GET_CODE (x);
1244  return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1245}
1246
1247/* We do not allow indirect calls to be optimized into sibling calls, nor
1248   can we allow a call to a function with a different GP to be optimized
1249   into a sibcall.  */
1250
1251static bool
1252alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1253{
1254  /* Can't do indirect tail calls, since we don't know if the target
1255     uses the same GP.  */
1256  if (!decl)
1257    return false;
1258
1259  /* Otherwise, we can make a tail call if the target function shares
1260     the same GP.  */
1261  return decl_has_samegp (decl);
1262}
1263
1264bool
1265some_small_symbolic_operand_int (rtx x)
1266{
1267  subrtx_var_iterator::array_type array;
1268  FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
1269    {
1270      rtx x = *iter;
1271      /* Don't re-split.  */
1272      if (GET_CODE (x) == LO_SUM)
1273	iter.skip_subrtxes ();
1274      else if (small_symbolic_operand (x, Pmode))
1275	return true;
1276    }
1277  return false;
1278}
1279
1280rtx
1281split_small_symbolic_operand (rtx x)
1282{
1283  x = copy_insn (x);
1284  subrtx_ptr_iterator::array_type array;
1285  FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
1286    {
1287      rtx *ptr = *iter;
1288      rtx x = *ptr;
1289      /* Don't re-split.  */
1290      if (GET_CODE (x) == LO_SUM)
1291	iter.skip_subrtxes ();
1292      else if (small_symbolic_operand (x, Pmode))
1293	{
1294	  *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1295	  iter.skip_subrtxes ();
1296	}
1297    }
1298  return x;
1299}
1300
1301/* Indicate that INSN cannot be duplicated.  This is true for any insn
1302   that we've marked with gpdisp relocs, since those have to stay in
1303   1-1 correspondence with one another.
1304
1305   Technically we could copy them if we could set up a mapping from one
1306   sequence number to another, across the set of insns to be duplicated.
1307   This seems overly complicated and error-prone since interblock motion
1308   from sched-ebb could move one of the pair of insns to a different block.
1309
1310   Also cannot allow jsr insns to be duplicated.  If they throw exceptions,
1311   then they'll be in a different block from their ldgp.  Which could lead
1312   the bb reorder code to think that it would be ok to copy just the block
1313   containing the call and branch to the block containing the ldgp.  */
1314
1315static bool
1316alpha_cannot_copy_insn_p (rtx_insn *insn)
1317{
1318  if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1319    return false;
1320  if (recog_memoized (insn) >= 0)
1321    return get_attr_cannot_copy (insn);
1322  else
1323    return false;
1324}
1325
1326
1327/* Try a machine-dependent way of reloading an illegitimate address
1328   operand.  If we find one, push the reload and return the new rtx.  */
1329
1330rtx
1331alpha_legitimize_reload_address (rtx x,
1332				 machine_mode mode ATTRIBUTE_UNUSED,
1333				 int opnum, int type,
1334				 int ind_levels ATTRIBUTE_UNUSED)
1335{
1336  /* We must recognize output that we have already generated ourselves.  */
1337  if (GET_CODE (x) == PLUS
1338      && GET_CODE (XEXP (x, 0)) == PLUS
1339      && REG_P (XEXP (XEXP (x, 0), 0))
1340      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1341      && CONST_INT_P (XEXP (x, 1)))
1342    {
1343      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1344		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1345		   opnum, (enum reload_type) type);
1346      return x;
1347    }
1348
1349  /* We wish to handle large displacements off a base register by
1350     splitting the addend across an ldah and the mem insn.  This
1351     cuts number of extra insns needed from 3 to 1.  */
1352  if (GET_CODE (x) == PLUS
1353      && REG_P (XEXP (x, 0))
1354      && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1355      && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1356      && GET_CODE (XEXP (x, 1)) == CONST_INT)
1357    {
1358      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1359      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1360      HOST_WIDE_INT high
1361	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1362
1363      /* Check for 32-bit overflow.  */
1364      if (high + low != val)
1365	return NULL_RTX;
1366
1367      /* Reload the high part into a base reg; leave the low part
1368	 in the mem directly.  */
1369      x = gen_rtx_PLUS (GET_MODE (x),
1370			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1371				      GEN_INT (high)),
1372			GEN_INT (low));
1373
1374      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1375		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1376		   opnum, (enum reload_type) type);
1377      return x;
1378    }
1379
1380  return NULL_RTX;
1381}
1382
1383/* Compute a (partial) cost for rtx X.  Return true if the complete
1384   cost has been computed, and false if subexpressions should be
1385   scanned.  In either case, *TOTAL contains the cost result.  */
1386
1387static bool
1388alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
1389		 bool speed)
1390{
1391  machine_mode mode = GET_MODE (x);
1392  bool float_mode_p = FLOAT_MODE_P (mode);
1393  const struct alpha_rtx_cost_data *cost_data;
1394
1395  if (!speed)
1396    cost_data = &alpha_rtx_cost_size;
1397  else
1398    cost_data = &alpha_rtx_cost_data[alpha_tune];
1399
1400  switch (code)
1401    {
1402    case CONST_INT:
1403      /* If this is an 8-bit constant, return zero since it can be used
1404	 nearly anywhere with no cost.  If it is a valid operand for an
1405	 ADD or AND, likewise return 0 if we know it will be used in that
1406	 context.  Otherwise, return 2 since it might be used there later.
1407	 All other constants take at least two insns.  */
1408      if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1409	{
1410	  *total = 0;
1411	  return true;
1412	}
1413      /* FALLTHRU */
1414
1415    case CONST_DOUBLE:
1416      if (x == CONST0_RTX (mode))
1417	*total = 0;
1418      else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1419	       || (outer_code == AND && and_operand (x, VOIDmode)))
1420	*total = 0;
1421      else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1422	*total = 2;
1423      else
1424	*total = COSTS_N_INSNS (2);
1425      return true;
1426
1427    case CONST:
1428    case SYMBOL_REF:
1429    case LABEL_REF:
1430      if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1431	*total = COSTS_N_INSNS (outer_code != MEM);
1432      else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1433	*total = COSTS_N_INSNS (1 + (outer_code != MEM));
1434      else if (tls_symbolic_operand_type (x))
1435	/* Estimate of cost for call_pal rduniq.  */
1436	/* ??? How many insns do we emit here?  More than one...  */
1437	*total = COSTS_N_INSNS (15);
1438      else
1439	/* Otherwise we do a load from the GOT.  */
1440	*total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1441      return true;
1442
1443    case HIGH:
1444      /* This is effectively an add_operand.  */
1445      *total = 2;
1446      return true;
1447
1448    case PLUS:
1449    case MINUS:
1450      if (float_mode_p)
1451	*total = cost_data->fp_add;
1452      else if (GET_CODE (XEXP (x, 0)) == MULT
1453	       && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1454	{
1455	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
1456			      (enum rtx_code) outer_code, opno, speed)
1457		    + rtx_cost (XEXP (x, 1),
1458				(enum rtx_code) outer_code, opno, speed)
1459		    + COSTS_N_INSNS (1));
1460	  return true;
1461	}
1462      return false;
1463
1464    case MULT:
1465      if (float_mode_p)
1466	*total = cost_data->fp_mult;
1467      else if (mode == DImode)
1468	*total = cost_data->int_mult_di;
1469      else
1470	*total = cost_data->int_mult_si;
1471      return false;
1472
1473    case ASHIFT:
1474      if (CONST_INT_P (XEXP (x, 1))
1475	  && INTVAL (XEXP (x, 1)) <= 3)
1476	{
1477	  *total = COSTS_N_INSNS (1);
1478	  return false;
1479	}
1480      /* FALLTHRU */
1481
1482    case ASHIFTRT:
1483    case LSHIFTRT:
1484      *total = cost_data->int_shift;
1485      return false;
1486
1487    case IF_THEN_ELSE:
1488      if (float_mode_p)
1489        *total = cost_data->fp_add;
1490      else
1491        *total = cost_data->int_cmov;
1492      return false;
1493
1494    case DIV:
1495    case UDIV:
1496    case MOD:
1497    case UMOD:
1498      if (!float_mode_p)
1499	*total = cost_data->int_div;
1500      else if (mode == SFmode)
1501        *total = cost_data->fp_div_sf;
1502      else
1503        *total = cost_data->fp_div_df;
1504      return false;
1505
1506    case MEM:
1507      *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1508      return true;
1509
1510    case NEG:
1511      if (! float_mode_p)
1512	{
1513	  *total = COSTS_N_INSNS (1);
1514	  return false;
1515	}
1516      /* FALLTHRU */
1517
1518    case ABS:
1519      if (! float_mode_p)
1520	{
1521	  *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1522	  return false;
1523	}
1524      /* FALLTHRU */
1525
1526    case FLOAT:
1527    case UNSIGNED_FLOAT:
1528    case FIX:
1529    case UNSIGNED_FIX:
1530    case FLOAT_TRUNCATE:
1531      *total = cost_data->fp_add;
1532      return false;
1533
1534    case FLOAT_EXTEND:
1535      if (MEM_P (XEXP (x, 0)))
1536	*total = 0;
1537      else
1538	*total = cost_data->fp_add;
1539      return false;
1540
1541    default:
1542      return false;
1543    }
1544}
1545
1546/* REF is an alignable memory location.  Place an aligned SImode
1547   reference into *PALIGNED_MEM and the number of bits to shift into
1548   *PBITNUM.  SCRATCH is a free register for use in reloading out
1549   of range stack slots.  */
1550
1551void
1552get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1553{
1554  rtx base;
1555  HOST_WIDE_INT disp, offset;
1556
1557  gcc_assert (MEM_P (ref));
1558
1559  if (reload_in_progress)
1560    {
1561      base = find_replacement (&XEXP (ref, 0));
1562      gcc_assert (memory_address_p (GET_MODE (ref), base));
1563    }
1564  else
1565    base = XEXP (ref, 0);
1566
1567  if (GET_CODE (base) == PLUS)
1568    disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1569  else
1570    disp = 0;
1571
1572  /* Find the byte offset within an aligned word.  If the memory itself is
1573     claimed to be aligned, believe it.  Otherwise, aligned_memory_operand
1574     will have examined the base register and determined it is aligned, and
1575     thus displacements from it are naturally alignable.  */
1576  if (MEM_ALIGN (ref) >= 32)
1577    offset = 0;
1578  else
1579    offset = disp & 3;
1580
1581  /* The location should not cross aligned word boundary.  */
1582  gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1583	      <= GET_MODE_SIZE (SImode));
1584
1585  /* Access the entire aligned word.  */
1586  *paligned_mem = widen_memory_access (ref, SImode, -offset);
1587
1588  /* Convert the byte offset within the word to a bit offset.  */
1589  offset *= BITS_PER_UNIT;
1590  *pbitnum = GEN_INT (offset);
1591}
1592
1593/* Similar, but just get the address.  Handle the two reload cases.
1594   Add EXTRA_OFFSET to the address we return.  */
1595
1596rtx
1597get_unaligned_address (rtx ref)
1598{
1599  rtx base;
1600  HOST_WIDE_INT offset = 0;
1601
1602  gcc_assert (MEM_P (ref));
1603
1604  if (reload_in_progress)
1605    {
1606      base = find_replacement (&XEXP (ref, 0));
1607      gcc_assert (memory_address_p (GET_MODE (ref), base));
1608    }
1609  else
1610    base = XEXP (ref, 0);
1611
1612  if (GET_CODE (base) == PLUS)
1613    offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1614
1615  return plus_constant (Pmode, base, offset);
1616}
1617
1618/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1619   X is always returned in a register.  */
1620
1621rtx
1622get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1623{
1624  if (GET_CODE (addr) == PLUS)
1625    {
1626      ofs += INTVAL (XEXP (addr, 1));
1627      addr = XEXP (addr, 0);
1628    }
1629
1630  return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1631			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1632}
1633
1634/* On the Alpha, all (non-symbolic) constants except zero go into
1635   a floating-point register via memory.  Note that we cannot
1636   return anything that is not a subset of RCLASS, and that some
1637   symbolic constants cannot be dropped to memory.  */
1638
1639enum reg_class
1640alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1641{
1642  /* Zero is present in any register class.  */
1643  if (x == CONST0_RTX (GET_MODE (x)))
1644    return rclass;
1645
1646  /* These sorts of constants we can easily drop to memory.  */
1647  if (CONST_INT_P (x)
1648      || GET_CODE (x) == CONST_DOUBLE
1649      || GET_CODE (x) == CONST_VECTOR)
1650    {
1651      if (rclass == FLOAT_REGS)
1652	return NO_REGS;
1653      if (rclass == ALL_REGS)
1654	return GENERAL_REGS;
1655      return rclass;
1656    }
1657
1658  /* All other kinds of constants should not (and in the case of HIGH
1659     cannot) be dropped to memory -- instead we use a GENERAL_REGS
1660     secondary reload.  */
1661  if (CONSTANT_P (x))
1662    return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1663
1664  return rclass;
1665}
1666
1667/* Inform reload about cases where moving X with a mode MODE to a register in
1668   RCLASS requires an extra scratch or immediate register.  Return the class
1669   needed for the immediate register.  */
1670
1671static reg_class_t
1672alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1673			machine_mode mode, secondary_reload_info *sri)
1674{
1675  enum reg_class rclass = (enum reg_class) rclass_i;
1676
1677  /* Loading and storing HImode or QImode values to and from memory
1678     usually requires a scratch register.  */
1679  if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1680    {
1681      if (any_memory_operand (x, mode))
1682	{
1683	  if (in_p)
1684	    {
1685	      if (!aligned_memory_operand (x, mode))
1686		sri->icode = direct_optab_handler (reload_in_optab, mode);
1687	    }
1688	  else
1689	    sri->icode = direct_optab_handler (reload_out_optab, mode);
1690	  return NO_REGS;
1691	}
1692    }
1693
1694  /* We also cannot do integral arithmetic into FP regs, as might result
1695     from register elimination into a DImode fp register.  */
1696  if (rclass == FLOAT_REGS)
1697    {
1698      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1699	return GENERAL_REGS;
1700      if (in_p && INTEGRAL_MODE_P (mode)
1701	  && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1702	return GENERAL_REGS;
1703    }
1704
1705  return NO_REGS;
1706}
1707
1708/* Given SEQ, which is an INSN list, look for any MEMs in either
1709   a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1710   volatile flags from REF into each of the MEMs found.  If REF is not
1711   a MEM, don't do anything.  */
1712
1713void
1714alpha_set_memflags (rtx seq, rtx ref)
1715{
1716  rtx_insn *insn;
1717
1718  if (!MEM_P (ref))
1719    return;
1720
1721  /* This is only called from alpha.md, after having had something
1722     generated from one of the insn patterns.  So if everything is
1723     zero, the pattern is already up-to-date.  */
1724  if (!MEM_VOLATILE_P (ref)
1725      && !MEM_NOTRAP_P (ref)
1726      && !MEM_READONLY_P (ref))
1727    return;
1728
1729  subrtx_var_iterator::array_type array;
1730  for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1731    if (INSN_P (insn))
1732      FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
1733	{
1734	  rtx x = *iter;
1735	  if (MEM_P (x))
1736	    {
1737	      MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
1738	      MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
1739	      MEM_READONLY_P (x) = MEM_READONLY_P (ref);
1740	      /* Sadly, we cannot use alias sets because the extra
1741		 aliasing produced by the AND interferes.  Given that
1742		 two-byte quantities are the only thing we would be
1743		 able to differentiate anyway, there does not seem to
1744		 be any point in convoluting the early out of the
1745		 alias check.  */
1746	      iter.skip_subrtxes ();
1747	    }
1748	}
1749    else
1750      gcc_unreachable ();
1751}
1752
1753static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT,
1754				 int, bool);
1755
1756/* Internal routine for alpha_emit_set_const to check for N or below insns.
1757   If NO_OUTPUT is true, then we only check to see if N insns are possible,
1758   and return pc_rtx if successful.  */
1759
1760static rtx
1761alpha_emit_set_const_1 (rtx target, machine_mode mode,
1762			HOST_WIDE_INT c, int n, bool no_output)
1763{
1764  HOST_WIDE_INT new_const;
1765  int i, bits;
1766  /* Use a pseudo if highly optimizing and still generating RTL.  */
1767  rtx subtarget
1768    = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1769  rtx temp, insn;
1770
1771  /* If this is a sign-extended 32-bit constant, we can do this in at most
1772     three insns, so do it if we have enough insns left.  We always have
1773     a sign-extended 32-bit constant when compiling on a narrow machine.  */
1774
1775  if (HOST_BITS_PER_WIDE_INT != 64
1776      || c >> 31 == -1 || c >> 31 == 0)
1777    {
1778      HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1779      HOST_WIDE_INT tmp1 = c - low;
1780      HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1781      HOST_WIDE_INT extra = 0;
1782
1783      /* If HIGH will be interpreted as negative but the constant is
1784	 positive, we must adjust it to do two ldha insns.  */
1785
1786      if ((high & 0x8000) != 0 && c >= 0)
1787	{
1788	  extra = 0x4000;
1789	  tmp1 -= 0x40000000;
1790	  high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1791	}
1792
1793      if (c == low || (low == 0 && extra == 0))
1794	{
1795	  /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1796	     but that meant that we can't handle INT_MIN on 32-bit machines
1797	     (like NT/Alpha), because we recurse indefinitely through
1798	     emit_move_insn to gen_movdi.  So instead, since we know exactly
1799	     what we want, create it explicitly.  */
1800
1801	  if (no_output)
1802	    return pc_rtx;
1803	  if (target == NULL)
1804	    target = gen_reg_rtx (mode);
1805	  emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1806	  return target;
1807	}
1808      else if (n >= 2 + (extra != 0))
1809	{
1810	  if (no_output)
1811	    return pc_rtx;
1812	  if (!can_create_pseudo_p ())
1813	    {
1814	      emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1815	      temp = target;
1816	    }
1817	  else
1818	    temp = copy_to_suggested_reg (GEN_INT (high << 16),
1819					  subtarget, mode);
1820
1821	  /* As of 2002-02-23, addsi3 is only available when not optimizing.
1822	     This means that if we go through expand_binop, we'll try to
1823	     generate extensions, etc, which will require new pseudos, which
1824	     will fail during some split phases.  The SImode add patterns
1825	     still exist, but are not named.  So build the insns by hand.  */
1826
1827	  if (extra != 0)
1828	    {
1829	      if (! subtarget)
1830		subtarget = gen_reg_rtx (mode);
1831	      insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1832	      insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1833	      emit_insn (insn);
1834	      temp = subtarget;
1835	    }
1836
1837	  if (target == NULL)
1838	    target = gen_reg_rtx (mode);
1839	  insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1840	  insn = gen_rtx_SET (VOIDmode, target, insn);
1841	  emit_insn (insn);
1842	  return target;
1843	}
1844    }
1845
1846  /* If we couldn't do it that way, try some other methods.  But if we have
1847     no instructions left, don't bother.  Likewise, if this is SImode and
1848     we can't make pseudos, we can't do anything since the expand_binop
1849     and expand_unop calls will widen and try to make pseudos.  */
1850
1851  if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1852    return 0;
1853
1854  /* Next, see if we can load a related constant and then shift and possibly
1855     negate it to get the constant we want.  Try this once each increasing
1856     numbers of insns.  */
1857
1858  for (i = 1; i < n; i++)
1859    {
1860      /* First, see if minus some low bits, we've an easy load of
1861	 high bits.  */
1862
1863      new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1864      if (new_const != 0)
1865	{
1866          temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1867	  if (temp)
1868	    {
1869	      if (no_output)
1870		return temp;
1871	      return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1872				   target, 0, OPTAB_WIDEN);
1873	    }
1874	}
1875
1876      /* Next try complementing.  */
1877      temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1878      if (temp)
1879	{
1880	  if (no_output)
1881	    return temp;
1882	  return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1883	}
1884
1885      /* Next try to form a constant and do a left shift.  We can do this
1886	 if some low-order bits are zero; the exact_log2 call below tells
1887	 us that information.  The bits we are shifting out could be any
1888	 value, but here we'll just try the 0- and sign-extended forms of
1889	 the constant.  To try to increase the chance of having the same
1890	 constant in more than one insn, start at the highest number of
1891	 bits to shift, but try all possibilities in case a ZAPNOT will
1892	 be useful.  */
1893
1894      bits = exact_log2 (c & -c);
1895      if (bits > 0)
1896	for (; bits > 0; bits--)
1897	  {
1898	    new_const = c >> bits;
1899	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1900	    if (!temp && c < 0)
1901	      {
1902		new_const = (unsigned HOST_WIDE_INT)c >> bits;
1903		temp = alpha_emit_set_const (subtarget, mode, new_const,
1904					     i, no_output);
1905	      }
1906	    if (temp)
1907	      {
1908		if (no_output)
1909		  return temp;
1910	        return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1911				     target, 0, OPTAB_WIDEN);
1912	      }
1913	  }
1914
1915      /* Now try high-order zero bits.  Here we try the shifted-in bits as
1916	 all zero and all ones.  Be careful to avoid shifting outside the
1917	 mode and to avoid shifting outside the host wide int size.  */
1918      /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1919	 confuse the recursive call and set all of the high 32 bits.  */
1920
1921      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1922	      - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1923      if (bits > 0)
1924	for (; bits > 0; bits--)
1925	  {
1926	    new_const = c << bits;
1927	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1928	    if (!temp)
1929	      {
1930		new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1931	        temp = alpha_emit_set_const (subtarget, mode, new_const,
1932					     i, no_output);
1933	      }
1934	    if (temp)
1935	      {
1936		if (no_output)
1937		  return temp;
1938		return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1939				     target, 1, OPTAB_WIDEN);
1940	      }
1941	  }
1942
1943      /* Now try high-order 1 bits.  We get that with a sign-extension.
1944	 But one bit isn't enough here.  Be careful to avoid shifting outside
1945	 the mode and to avoid shifting outside the host wide int size.  */
1946
1947      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1948	      - floor_log2 (~ c) - 2);
1949      if (bits > 0)
1950	for (; bits > 0; bits--)
1951	  {
1952	    new_const = c << bits;
1953	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1954	    if (!temp)
1955	      {
1956		new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1957	        temp = alpha_emit_set_const (subtarget, mode, new_const,
1958					     i, no_output);
1959	      }
1960	    if (temp)
1961	      {
1962		if (no_output)
1963		  return temp;
1964		return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1965				     target, 0, OPTAB_WIDEN);
1966	      }
1967	  }
1968    }
1969
1970#if HOST_BITS_PER_WIDE_INT == 64
1971  /* Finally, see if can load a value into the target that is the same as the
1972     constant except that all bytes that are 0 are changed to be 0xff.  If we
1973     can, then we can do a ZAPNOT to obtain the desired constant.  */
1974
1975  new_const = c;
1976  for (i = 0; i < 64; i += 8)
1977    if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1978      new_const |= (HOST_WIDE_INT) 0xff << i;
1979
1980  /* We are only called for SImode and DImode.  If this is SImode, ensure that
1981     we are sign extended to a full word.  */
1982
1983  if (mode == SImode)
1984    new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1985
1986  if (new_const != c)
1987    {
1988      temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1989      if (temp)
1990	{
1991	  if (no_output)
1992	    return temp;
1993	  return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1994			       target, 0, OPTAB_WIDEN);
1995	}
1996    }
1997#endif
1998
1999  return 0;
2000}
2001
2002/* Try to output insns to set TARGET equal to the constant C if it can be
2003   done in less than N insns.  Do all computations in MODE.  Returns the place
2004   where the output has been placed if it can be done and the insns have been
2005   emitted.  If it would take more than N insns, zero is returned and no
2006   insns and emitted.  */
2007
2008static rtx
2009alpha_emit_set_const (rtx target, machine_mode mode,
2010		      HOST_WIDE_INT c, int n, bool no_output)
2011{
2012  machine_mode orig_mode = mode;
2013  rtx orig_target = target;
2014  rtx result = 0;
2015  int i;
2016
2017  /* If we can't make any pseudos, TARGET is an SImode hard register, we
2018     can't load this constant in one insn, do this in DImode.  */
2019  if (!can_create_pseudo_p () && mode == SImode
2020      && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
2021    {
2022      result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
2023      if (result)
2024	return result;
2025
2026      target = no_output ? NULL : gen_lowpart (DImode, target);
2027      mode = DImode;
2028    }
2029  else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2030    {
2031      target = no_output ? NULL : gen_lowpart (DImode, target);
2032      mode = DImode;
2033    }
2034
2035  /* Try 1 insn, then 2, then up to N.  */
2036  for (i = 1; i <= n; i++)
2037    {
2038      result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2039      if (result)
2040	{
2041	  rtx_insn *insn;
2042	  rtx set;
2043
2044	  if (no_output)
2045	    return result;
2046
2047	  insn = get_last_insn ();
2048	  set = single_set (insn);
2049	  if (! CONSTANT_P (SET_SRC (set)))
2050	    set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2051	  break;
2052	}
2053    }
2054
2055  /* Allow for the case where we changed the mode of TARGET.  */
2056  if (result)
2057    {
2058      if (result == target)
2059	result = orig_target;
2060      else if (mode != orig_mode)
2061	result = gen_lowpart (orig_mode, result);
2062    }
2063
2064  return result;
2065}
2066
2067/* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2068   fall back to a straight forward decomposition.  We do this to avoid
2069   exponential run times encountered when looking for longer sequences
2070   with alpha_emit_set_const.  */
2071
2072static rtx
2073alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
2074{
2075  HOST_WIDE_INT d1, d2, d3, d4;
2076
2077  /* Decompose the entire word */
2078#if HOST_BITS_PER_WIDE_INT >= 64
2079  gcc_assert (c2 == -(c1 < 0));
2080  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2081  c1 -= d1;
2082  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2083  c1 = (c1 - d2) >> 32;
2084  d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2085  c1 -= d3;
2086  d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2087  gcc_assert (c1 == d4);
2088#else
2089  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2090  c1 -= d1;
2091  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2092  gcc_assert (c1 == d2);
2093  c2 += (d2 < 0);
2094  d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
2095  c2 -= d3;
2096  d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2097  gcc_assert (c2 == d4);
2098#endif
2099
2100  /* Construct the high word */
2101  if (d4)
2102    {
2103      emit_move_insn (target, GEN_INT (d4));
2104      if (d3)
2105	emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2106    }
2107  else
2108    emit_move_insn (target, GEN_INT (d3));
2109
2110  /* Shift it into place */
2111  emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2112
2113  /* Add in the low bits.  */
2114  if (d2)
2115    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2116  if (d1)
2117    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2118
2119  return target;
2120}
2121
2122/* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
2123   the low 64 bits.  */
2124
2125static void
2126alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
2127{
2128  HOST_WIDE_INT i0, i1;
2129
2130  if (GET_CODE (x) == CONST_VECTOR)
2131    x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2132
2133
2134  if (CONST_INT_P (x))
2135    {
2136      i0 = INTVAL (x);
2137      i1 = -(i0 < 0);
2138    }
2139  else if (HOST_BITS_PER_WIDE_INT >= 64)
2140    {
2141      i0 = CONST_DOUBLE_LOW (x);
2142      i1 = -(i0 < 0);
2143    }
2144  else
2145    {
2146      i0 = CONST_DOUBLE_LOW (x);
2147      i1 = CONST_DOUBLE_HIGH (x);
2148    }
2149
2150  *p0 = i0;
2151  *p1 = i1;
2152}
2153
2154/* Implement TARGET_LEGITIMATE_CONSTANT_P.  This is all constants for which
2155   we are willing to load the value into a register via a move pattern.
2156   Normally this is all symbolic constants, integral constants that
2157   take three or fewer instructions, and floating-point zero.  */
2158
2159bool
2160alpha_legitimate_constant_p (machine_mode mode, rtx x)
2161{
2162  HOST_WIDE_INT i0, i1;
2163
2164  switch (GET_CODE (x))
2165    {
2166    case LABEL_REF:
2167    case HIGH:
2168      return true;
2169
2170    case CONST:
2171      if (GET_CODE (XEXP (x, 0)) == PLUS
2172	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2173	x = XEXP (XEXP (x, 0), 0);
2174      else
2175	return true;
2176
2177      if (GET_CODE (x) != SYMBOL_REF)
2178	return true;
2179
2180      /* FALLTHRU */
2181
2182    case SYMBOL_REF:
2183      /* TLS symbols are never valid.  */
2184      return SYMBOL_REF_TLS_MODEL (x) == 0;
2185
2186    case CONST_DOUBLE:
2187      if (x == CONST0_RTX (mode))
2188	return true;
2189      if (FLOAT_MODE_P (mode))
2190	return false;
2191      goto do_integer;
2192
2193    case CONST_VECTOR:
2194      if (x == CONST0_RTX (mode))
2195	return true;
2196      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2197	return false;
2198      if (GET_MODE_SIZE (mode) != 8)
2199	return false;
2200      goto do_integer;
2201
2202    case CONST_INT:
2203    do_integer:
2204      if (TARGET_BUILD_CONSTANTS)
2205	return true;
2206      alpha_extract_integer (x, &i0, &i1);
2207      if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
2208        return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
2209      return false;
2210
2211    default:
2212      return false;
2213    }
2214}
2215
2216/* Operand 1 is known to be a constant, and should require more than one
2217   instruction to load.  Emit that multi-part load.  */
2218
2219bool
2220alpha_split_const_mov (machine_mode mode, rtx *operands)
2221{
2222  HOST_WIDE_INT i0, i1;
2223  rtx temp = NULL_RTX;
2224
2225  alpha_extract_integer (operands[1], &i0, &i1);
2226
2227  if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2228    temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2229
2230  if (!temp && TARGET_BUILD_CONSTANTS)
2231    temp = alpha_emit_set_long_const (operands[0], i0, i1);
2232
2233  if (temp)
2234    {
2235      if (!rtx_equal_p (operands[0], temp))
2236	emit_move_insn (operands[0], temp);
2237      return true;
2238    }
2239
2240  return false;
2241}
2242
2243/* Expand a move instruction; return true if all work is done.
2244   We don't handle non-bwx subword loads here.  */
2245
2246bool
2247alpha_expand_mov (machine_mode mode, rtx *operands)
2248{
2249  rtx tmp;
2250
2251  /* If the output is not a register, the input must be.  */
2252  if (MEM_P (operands[0])
2253      && ! reg_or_0_operand (operands[1], mode))
2254    operands[1] = force_reg (mode, operands[1]);
2255
2256  /* Allow legitimize_address to perform some simplifications.  */
2257  if (mode == Pmode && symbolic_operand (operands[1], mode))
2258    {
2259      tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2260      if (tmp)
2261	{
2262	  if (tmp == operands[0])
2263	    return true;
2264	  operands[1] = tmp;
2265	  return false;
2266	}
2267    }
2268
2269  /* Early out for non-constants and valid constants.  */
2270  if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2271    return false;
2272
2273  /* Split large integers.  */
2274  if (CONST_INT_P (operands[1])
2275      || GET_CODE (operands[1]) == CONST_DOUBLE
2276      || GET_CODE (operands[1]) == CONST_VECTOR)
2277    {
2278      if (alpha_split_const_mov (mode, operands))
2279	return true;
2280    }
2281
2282  /* Otherwise we've nothing left but to drop the thing to memory.  */
2283  tmp = force_const_mem (mode, operands[1]);
2284
2285  if (tmp == NULL_RTX)
2286    return false;
2287
2288  if (reload_in_progress)
2289    {
2290      emit_move_insn (operands[0], XEXP (tmp, 0));
2291      operands[1] = replace_equiv_address (tmp, operands[0]);
2292    }
2293  else
2294    operands[1] = validize_mem (tmp);
2295  return false;
2296}
2297
2298/* Expand a non-bwx QImode or HImode move instruction;
2299   return true if all work is done.  */
2300
2301bool
2302alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
2303{
2304  rtx seq;
2305
2306  /* If the output is not a register, the input must be.  */
2307  if (MEM_P (operands[0]))
2308    operands[1] = force_reg (mode, operands[1]);
2309
2310  /* Handle four memory cases, unaligned and aligned for either the input
2311     or the output.  The only case where we can be called during reload is
2312     for aligned loads; all other cases require temporaries.  */
2313
2314  if (any_memory_operand (operands[1], mode))
2315    {
2316      if (aligned_memory_operand (operands[1], mode))
2317	{
2318	  if (reload_in_progress)
2319	    {
2320	      if (mode == QImode)
2321		seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2322	      else
2323		seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2324	      emit_insn (seq);
2325	    }
2326	  else
2327	    {
2328	      rtx aligned_mem, bitnum;
2329	      rtx scratch = gen_reg_rtx (SImode);
2330	      rtx subtarget;
2331	      bool copyout;
2332
2333	      get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2334
2335	      subtarget = operands[0];
2336	      if (REG_P (subtarget))
2337		subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2338	      else
2339		subtarget = gen_reg_rtx (DImode), copyout = true;
2340
2341	      if (mode == QImode)
2342		seq = gen_aligned_loadqi (subtarget, aligned_mem,
2343					  bitnum, scratch);
2344	      else
2345		seq = gen_aligned_loadhi (subtarget, aligned_mem,
2346					  bitnum, scratch);
2347	      emit_insn (seq);
2348
2349	      if (copyout)
2350		emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2351	    }
2352	}
2353      else
2354	{
2355	  /* Don't pass these as parameters since that makes the generated
2356	     code depend on parameter evaluation order which will cause
2357	     bootstrap failures.  */
2358
2359	  rtx temp1, temp2, subtarget, ua;
2360	  bool copyout;
2361
2362	  temp1 = gen_reg_rtx (DImode);
2363	  temp2 = gen_reg_rtx (DImode);
2364
2365	  subtarget = operands[0];
2366	  if (REG_P (subtarget))
2367	    subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2368	  else
2369	    subtarget = gen_reg_rtx (DImode), copyout = true;
2370
2371	  ua = get_unaligned_address (operands[1]);
2372	  if (mode == QImode)
2373	    seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2374	  else
2375	    seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2376
2377	  alpha_set_memflags (seq, operands[1]);
2378	  emit_insn (seq);
2379
2380	  if (copyout)
2381	    emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2382	}
2383      return true;
2384    }
2385
2386  if (any_memory_operand (operands[0], mode))
2387    {
2388      if (aligned_memory_operand (operands[0], mode))
2389	{
2390	  rtx aligned_mem, bitnum;
2391	  rtx temp1 = gen_reg_rtx (SImode);
2392	  rtx temp2 = gen_reg_rtx (SImode);
2393
2394	  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2395
2396	  emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2397					temp1, temp2));
2398	}
2399      else
2400	{
2401	  rtx temp1 = gen_reg_rtx (DImode);
2402	  rtx temp2 = gen_reg_rtx (DImode);
2403	  rtx temp3 = gen_reg_rtx (DImode);
2404	  rtx ua = get_unaligned_address (operands[0]);
2405
2406	  if (mode == QImode)
2407	    seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2408	  else
2409	    seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2410
2411	  alpha_set_memflags (seq, operands[0]);
2412	  emit_insn (seq);
2413	}
2414      return true;
2415    }
2416
2417  return false;
2418}
2419
2420/* Implement the movmisalign patterns.  One of the operands is a memory
2421   that is not naturally aligned.  Emit instructions to load it.  */
2422
2423void
2424alpha_expand_movmisalign (machine_mode mode, rtx *operands)
2425{
2426  /* Honor misaligned loads, for those we promised to do so.  */
2427  if (MEM_P (operands[1]))
2428    {
2429      rtx tmp;
2430
2431      if (register_operand (operands[0], mode))
2432	tmp = operands[0];
2433      else
2434	tmp = gen_reg_rtx (mode);
2435
2436      alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2437      if (tmp != operands[0])
2438	emit_move_insn (operands[0], tmp);
2439    }
2440  else if (MEM_P (operands[0]))
2441    {
2442      if (!reg_or_0_operand (operands[1], mode))
2443	operands[1] = force_reg (mode, operands[1]);
2444      alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2445    }
2446  else
2447    gcc_unreachable ();
2448}
2449
2450/* Generate an unsigned DImode to FP conversion.  This is the same code
2451   optabs would emit if we didn't have TFmode patterns.
2452
2453   For SFmode, this is the only construction I've found that can pass
2454   gcc.c-torture/execute/ieee/rbug.c.  No scenario that uses DFmode
2455   intermediates will work, because you'll get intermediate rounding
2456   that ruins the end result.  Some of this could be fixed by turning
2457   on round-to-positive-infinity, but that requires diddling the fpsr,
2458   which kills performance.  I tried turning this around and converting
2459   to a negative number, so that I could turn on /m, but either I did
2460   it wrong or there's something else cause I wound up with the exact
2461   same single-bit error.  There is a branch-less form of this same code:
2462
2463	srl     $16,1,$1
2464	and     $16,1,$2
2465	cmplt   $16,0,$3
2466	or      $1,$2,$2
2467	cmovge  $16,$16,$2
2468	itoft	$3,$f10
2469	itoft	$2,$f11
2470	cvtqs   $f11,$f11
2471	adds    $f11,$f11,$f0
2472	fcmoveq $f10,$f11,$f0
2473
2474   I'm not using it because it's the same number of instructions as
2475   this branch-full form, and it has more serialized long latency
2476   instructions on the critical path.
2477
2478   For DFmode, we can avoid rounding errors by breaking up the word
2479   into two pieces, converting them separately, and adding them back:
2480
2481   LC0: .long 0,0x5f800000
2482
2483	itoft	$16,$f11
2484	lda	$2,LC0
2485	cmplt	$16,0,$1
2486	cpyse	$f11,$f31,$f10
2487	cpyse	$f31,$f11,$f11
2488	s4addq	$1,$2,$1
2489	lds	$f12,0($1)
2490	cvtqt	$f10,$f10
2491	cvtqt	$f11,$f11
2492	addt	$f12,$f10,$f0
2493	addt	$f0,$f11,$f0
2494
2495   This doesn't seem to be a clear-cut win over the optabs form.
2496   It probably all depends on the distribution of numbers being
2497   converted -- in the optabs form, all but high-bit-set has a
2498   much lower minimum execution time.  */
2499
2500void
2501alpha_emit_floatuns (rtx operands[2])
2502{
2503  rtx neglab, donelab, i0, i1, f0, in, out;
2504  machine_mode mode;
2505
2506  out = operands[0];
2507  in = force_reg (DImode, operands[1]);
2508  mode = GET_MODE (out);
2509  neglab = gen_label_rtx ();
2510  donelab = gen_label_rtx ();
2511  i0 = gen_reg_rtx (DImode);
2512  i1 = gen_reg_rtx (DImode);
2513  f0 = gen_reg_rtx (mode);
2514
2515  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2516
2517  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2518  emit_jump_insn (gen_jump (donelab));
2519  emit_barrier ();
2520
2521  emit_label (neglab);
2522
2523  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2524  emit_insn (gen_anddi3 (i1, in, const1_rtx));
2525  emit_insn (gen_iordi3 (i0, i0, i1));
2526  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2527  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2528
2529  emit_label (donelab);
2530}
2531
2532/* Generate the comparison for a conditional branch.  */
2533
2534void
2535alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
2536{
2537  enum rtx_code cmp_code, branch_code;
2538  machine_mode branch_mode = VOIDmode;
2539  enum rtx_code code = GET_CODE (operands[0]);
2540  rtx op0 = operands[1], op1 = operands[2];
2541  rtx tem;
2542
2543  if (cmp_mode == TFmode)
2544    {
2545      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2546      op1 = const0_rtx;
2547      cmp_mode = DImode;
2548    }
2549
2550  /* The general case: fold the comparison code to the types of compares
2551     that we have, choosing the branch as necessary.  */
2552  switch (code)
2553    {
2554    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2555    case UNORDERED:
2556      /* We have these compares.  */
2557      cmp_code = code, branch_code = NE;
2558      break;
2559
2560    case NE:
2561    case ORDERED:
2562      /* These must be reversed.  */
2563      cmp_code = reverse_condition (code), branch_code = EQ;
2564      break;
2565
2566    case GE:  case GT: case GEU:  case GTU:
2567      /* For FP, we swap them, for INT, we reverse them.  */
2568      if (cmp_mode == DFmode)
2569	{
2570	  cmp_code = swap_condition (code);
2571	  branch_code = NE;
2572	  std::swap (op0, op1);
2573	}
2574      else
2575	{
2576	  cmp_code = reverse_condition (code);
2577	  branch_code = EQ;
2578	}
2579      break;
2580
2581    default:
2582      gcc_unreachable ();
2583    }
2584
2585  if (cmp_mode == DFmode)
2586    {
2587      if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2588	{
2589	  /* When we are not as concerned about non-finite values, and we
2590	     are comparing against zero, we can branch directly.  */
2591	  if (op1 == CONST0_RTX (DFmode))
2592	    cmp_code = UNKNOWN, branch_code = code;
2593	  else if (op0 == CONST0_RTX (DFmode))
2594	    {
2595	      /* Undo the swap we probably did just above.  */
2596	      std::swap (op0, op1);
2597	      branch_code = swap_condition (cmp_code);
2598	      cmp_code = UNKNOWN;
2599	    }
2600	}
2601      else
2602	{
2603	  /* ??? We mark the branch mode to be CCmode to prevent the
2604	     compare and branch from being combined, since the compare
2605	     insn follows IEEE rules that the branch does not.  */
2606	  branch_mode = CCmode;
2607	}
2608    }
2609  else
2610    {
2611      /* The following optimizations are only for signed compares.  */
2612      if (code != LEU && code != LTU && code != GEU && code != GTU)
2613	{
2614	  /* Whee.  Compare and branch against 0 directly.  */
2615	  if (op1 == const0_rtx)
2616	    cmp_code = UNKNOWN, branch_code = code;
2617
2618	  /* If the constants doesn't fit into an immediate, but can
2619 	     be generated by lda/ldah, we adjust the argument and
2620 	     compare against zero, so we can use beq/bne directly.  */
2621	  /* ??? Don't do this when comparing against symbols, otherwise
2622	     we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2623	     be declared false out of hand (at least for non-weak).  */
2624	  else if (CONST_INT_P (op1)
2625		   && (code == EQ || code == NE)
2626		   && !(symbolic_operand (op0, VOIDmode)
2627			|| (REG_P (op0) && REG_POINTER (op0))))
2628	    {
2629	      rtx n_op1 = GEN_INT (-INTVAL (op1));
2630
2631	      if (! satisfies_constraint_I (op1)
2632		  && (satisfies_constraint_K (n_op1)
2633		      || satisfies_constraint_L (n_op1)))
2634		cmp_code = PLUS, branch_code = code, op1 = n_op1;
2635	    }
2636	}
2637
2638      if (!reg_or_0_operand (op0, DImode))
2639	op0 = force_reg (DImode, op0);
2640      if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2641	op1 = force_reg (DImode, op1);
2642    }
2643
2644  /* Emit an initial compare instruction, if necessary.  */
2645  tem = op0;
2646  if (cmp_code != UNKNOWN)
2647    {
2648      tem = gen_reg_rtx (cmp_mode);
2649      emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2650    }
2651
2652  /* Emit the branch instruction.  */
2653  tem = gen_rtx_SET (VOIDmode, pc_rtx,
2654		     gen_rtx_IF_THEN_ELSE (VOIDmode,
2655					   gen_rtx_fmt_ee (branch_code,
2656							   branch_mode, tem,
2657							   CONST0_RTX (cmp_mode)),
2658					   gen_rtx_LABEL_REF (VOIDmode,
2659							      operands[3]),
2660					   pc_rtx));
2661  emit_jump_insn (tem);
2662}
2663
2664/* Certain simplifications can be done to make invalid setcc operations
2665   valid.  Return the final comparison, or NULL if we can't work.  */
2666
2667bool
2668alpha_emit_setcc (rtx operands[], machine_mode cmp_mode)
2669{
2670  enum rtx_code cmp_code;
2671  enum rtx_code code = GET_CODE (operands[1]);
2672  rtx op0 = operands[2], op1 = operands[3];
2673  rtx tmp;
2674
2675  if (cmp_mode == TFmode)
2676    {
2677      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2678      op1 = const0_rtx;
2679      cmp_mode = DImode;
2680    }
2681
2682  if (cmp_mode == DFmode && !TARGET_FIX)
2683    return 0;
2684
2685  /* The general case: fold the comparison code to the types of compares
2686     that we have, choosing the branch as necessary.  */
2687
2688  cmp_code = UNKNOWN;
2689  switch (code)
2690    {
2691    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2692    case UNORDERED:
2693      /* We have these compares.  */
2694      if (cmp_mode == DFmode)
2695	cmp_code = code, code = NE;
2696      break;
2697
2698    case NE:
2699      if (cmp_mode == DImode && op1 == const0_rtx)
2700	break;
2701      /* FALLTHRU */
2702
2703    case ORDERED:
2704      cmp_code = reverse_condition (code);
2705      code = EQ;
2706      break;
2707
2708    case GE:  case GT: case GEU:  case GTU:
2709      /* These normally need swapping, but for integer zero we have
2710	 special patterns that recognize swapped operands.  */
2711      if (cmp_mode == DImode && op1 == const0_rtx)
2712	break;
2713      code = swap_condition (code);
2714      if (cmp_mode == DFmode)
2715	cmp_code = code, code = NE;
2716      std::swap (op0, op1);
2717      break;
2718
2719    default:
2720      gcc_unreachable ();
2721    }
2722
2723  if (cmp_mode == DImode)
2724    {
2725      if (!register_operand (op0, DImode))
2726	op0 = force_reg (DImode, op0);
2727      if (!reg_or_8bit_operand (op1, DImode))
2728	op1 = force_reg (DImode, op1);
2729    }
2730
2731  /* Emit an initial compare instruction, if necessary.  */
2732  if (cmp_code != UNKNOWN)
2733    {
2734      tmp = gen_reg_rtx (cmp_mode);
2735      emit_insn (gen_rtx_SET (VOIDmode, tmp,
2736			      gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
2737
2738      op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2739      op1 = const0_rtx;
2740    }
2741
2742  /* Emit the setcc instruction.  */
2743  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2744			  gen_rtx_fmt_ee (code, DImode, op0, op1)));
2745  return true;
2746}
2747
2748
2749/* Rewrite a comparison against zero CMP of the form
2750   (CODE (cc0) (const_int 0)) so it can be written validly in
2751   a conditional move (if_then_else CMP ...).
2752   If both of the operands that set cc0 are nonzero we must emit
2753   an insn to perform the compare (it can't be done within
2754   the conditional move).  */
2755
2756rtx
2757alpha_emit_conditional_move (rtx cmp, machine_mode mode)
2758{
2759  enum rtx_code code = GET_CODE (cmp);
2760  enum rtx_code cmov_code = NE;
2761  rtx op0 = XEXP (cmp, 0);
2762  rtx op1 = XEXP (cmp, 1);
2763  machine_mode cmp_mode
2764    = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2765  machine_mode cmov_mode = VOIDmode;
2766  int local_fast_math = flag_unsafe_math_optimizations;
2767  rtx tem;
2768
2769  if (cmp_mode == TFmode)
2770    {
2771      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2772      op1 = const0_rtx;
2773      cmp_mode = DImode;
2774    }
2775
2776  gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2777
2778  if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2779    {
2780      enum rtx_code cmp_code;
2781
2782      if (! TARGET_FIX)
2783	return 0;
2784
2785      /* If we have fp<->int register move instructions, do a cmov by
2786	 performing the comparison in fp registers, and move the
2787	 zero/nonzero value to integer registers, where we can then
2788	 use a normal cmov, or vice-versa.  */
2789
2790      switch (code)
2791	{
2792	case EQ: case LE: case LT: case LEU: case LTU:
2793	case UNORDERED:
2794	  /* We have these compares.  */
2795	  cmp_code = code, code = NE;
2796	  break;
2797
2798	case NE:
2799	case ORDERED:
2800	  /* These must be reversed.  */
2801	  cmp_code = reverse_condition (code), code = EQ;
2802	  break;
2803
2804	case GE: case GT: case GEU: case GTU:
2805	  /* These normally need swapping, but for integer zero we have
2806	     special patterns that recognize swapped operands.  */
2807	  if (cmp_mode == DImode && op1 == const0_rtx)
2808	    cmp_code = code, code = NE;
2809	  else
2810	    {
2811	      cmp_code = swap_condition (code);
2812	      code = NE;
2813	      std::swap (op0, op1);
2814	    }
2815	  break;
2816
2817	default:
2818	  gcc_unreachable ();
2819	}
2820
2821      if (cmp_mode == DImode)
2822	{
2823	  if (!reg_or_0_operand (op0, DImode))
2824	    op0 = force_reg (DImode, op0);
2825	  if (!reg_or_8bit_operand (op1, DImode))
2826	    op1 = force_reg (DImode, op1);
2827	}
2828
2829      tem = gen_reg_rtx (cmp_mode);
2830      emit_insn (gen_rtx_SET (VOIDmode, tem,
2831			      gen_rtx_fmt_ee (cmp_code, cmp_mode,
2832					      op0, op1)));
2833
2834      cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2835      op0 = gen_lowpart (cmp_mode, tem);
2836      op1 = CONST0_RTX (cmp_mode);
2837      cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2838      local_fast_math = 1;
2839    }
2840
2841  if (cmp_mode == DImode)
2842    {
2843      if (!reg_or_0_operand (op0, DImode))
2844	op0 = force_reg (DImode, op0);
2845      if (!reg_or_8bit_operand (op1, DImode))
2846	op1 = force_reg (DImode, op1);
2847    }
2848
2849  /* We may be able to use a conditional move directly.
2850     This avoids emitting spurious compares.  */
2851  if (signed_comparison_operator (cmp, VOIDmode)
2852      && (cmp_mode == DImode || local_fast_math)
2853      && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2854    return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2855
2856  /* We can't put the comparison inside the conditional move;
2857     emit a compare instruction and put that inside the
2858     conditional move.  Make sure we emit only comparisons we have;
2859     swap or reverse as necessary.  */
2860
2861  if (!can_create_pseudo_p ())
2862    return NULL_RTX;
2863
2864  switch (code)
2865    {
2866    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2867    case UNORDERED:
2868      /* We have these compares: */
2869      break;
2870
2871    case NE:
2872    case ORDERED:
2873      /* These must be reversed.  */
2874      code = reverse_condition (code);
2875      cmov_code = EQ;
2876      break;
2877
2878    case GE:  case GT:  case GEU:  case GTU:
2879      /* These normally need swapping, but for integer zero we have
2880	 special patterns that recognize swapped operands.  */
2881      if (cmp_mode == DImode && op1 == const0_rtx)
2882	break;
2883      code = swap_condition (code);
2884      std::swap (op0, op1);
2885      break;
2886
2887    default:
2888      gcc_unreachable ();
2889    }
2890
2891  if (cmp_mode == DImode)
2892    {
2893      if (!reg_or_0_operand (op0, DImode))
2894	op0 = force_reg (DImode, op0);
2895      if (!reg_or_8bit_operand (op1, DImode))
2896	op1 = force_reg (DImode, op1);
2897    }
2898
2899  /* ??? We mark the branch mode to be CCmode to prevent the compare
2900     and cmov from being combined, since the compare insn follows IEEE
2901     rules that the cmov does not.  */
2902  if (cmp_mode == DFmode && !local_fast_math)
2903    cmov_mode = CCmode;
2904
2905  tem = gen_reg_rtx (cmp_mode);
2906  emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2907  return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2908}
2909
2910/* Simplify a conditional move of two constants into a setcc with
2911   arithmetic.  This is done with a splitter since combine would
2912   just undo the work if done during code generation.  It also catches
2913   cases we wouldn't have before cse.  */
2914
2915int
2916alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2917			      rtx t_rtx, rtx f_rtx)
2918{
2919  HOST_WIDE_INT t, f, diff;
2920  machine_mode mode;
2921  rtx target, subtarget, tmp;
2922
2923  mode = GET_MODE (dest);
2924  t = INTVAL (t_rtx);
2925  f = INTVAL (f_rtx);
2926  diff = t - f;
2927
2928  if (((code == NE || code == EQ) && diff < 0)
2929      || (code == GE || code == GT))
2930    {
2931      code = reverse_condition (code);
2932      diff = t, t = f, f = diff;
2933      diff = t - f;
2934    }
2935
2936  subtarget = target = dest;
2937  if (mode != DImode)
2938    {
2939      target = gen_lowpart (DImode, dest);
2940      if (can_create_pseudo_p ())
2941        subtarget = gen_reg_rtx (DImode);
2942      else
2943	subtarget = target;
2944    }
2945  /* Below, we must be careful to use copy_rtx on target and subtarget
2946     in intermediate insns, as they may be a subreg rtx, which may not
2947     be shared.  */
2948
2949  if (f == 0 && exact_log2 (diff) > 0
2950      /* On EV6, we've got enough shifters to make non-arithmetic shifts
2951	 viable over a longer latency cmove.  On EV5, the E0 slot is a
2952	 scarce resource, and on EV4 shift has the same latency as a cmove.  */
2953      && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2954    {
2955      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2956      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2957
2958      tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2959			    GEN_INT (exact_log2 (t)));
2960      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2961    }
2962  else if (f == 0 && t == -1)
2963    {
2964      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2965      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2966
2967      emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2968    }
2969  else if (diff == 1 || diff == 4 || diff == 8)
2970    {
2971      rtx add_op;
2972
2973      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2974      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2975
2976      if (diff == 1)
2977	emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2978      else
2979	{
2980	  add_op = GEN_INT (f);
2981	  if (sext_add_operand (add_op, mode))
2982	    {
2983	      tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2984				  GEN_INT (diff));
2985	      tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2986	      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2987	    }
2988	  else
2989	    return 0;
2990	}
2991    }
2992  else
2993    return 0;
2994
2995  return 1;
2996}
2997
2998/* Look up the function X_floating library function name for the
2999   given operation.  */
3000
3001struct GTY(()) xfloating_op
3002{
3003  const enum rtx_code code;
3004  const char *const GTY((skip)) osf_func;
3005  const char *const GTY((skip)) vms_func;
3006  rtx libcall;
3007};
3008
3009static GTY(()) struct xfloating_op xfloating_ops[] =
3010{
3011  { PLUS,		"_OtsAddX", "OTS$ADD_X", 0 },
3012  { MINUS,		"_OtsSubX", "OTS$SUB_X", 0 },
3013  { MULT,		"_OtsMulX", "OTS$MUL_X", 0 },
3014  { DIV,		"_OtsDivX", "OTS$DIV_X", 0 },
3015  { EQ,			"_OtsEqlX", "OTS$EQL_X", 0 },
3016  { NE,			"_OtsNeqX", "OTS$NEQ_X", 0 },
3017  { LT,			"_OtsLssX", "OTS$LSS_X", 0 },
3018  { LE,			"_OtsLeqX", "OTS$LEQ_X", 0 },
3019  { GT,			"_OtsGtrX", "OTS$GTR_X", 0 },
3020  { GE,			"_OtsGeqX", "OTS$GEQ_X", 0 },
3021  { FIX,		"_OtsCvtXQ", "OTS$CVTXQ", 0 },
3022  { FLOAT,		"_OtsCvtQX", "OTS$CVTQX", 0 },
3023  { UNSIGNED_FLOAT,	"_OtsCvtQUX", "OTS$CVTQUX", 0 },
3024  { FLOAT_EXTEND,	"_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
3025  { FLOAT_TRUNCATE,	"_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
3026};
3027
3028static GTY(()) struct xfloating_op vax_cvt_ops[] =
3029{
3030  { FLOAT_EXTEND,	"_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
3031  { FLOAT_TRUNCATE,	"_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
3032};
3033
3034static rtx
3035alpha_lookup_xfloating_lib_func (enum rtx_code code)
3036{
3037  struct xfloating_op *ops = xfloating_ops;
3038  long n = ARRAY_SIZE (xfloating_ops);
3039  long i;
3040
3041  gcc_assert (TARGET_HAS_XFLOATING_LIBS);
3042
3043  /* How irritating.  Nothing to key off for the main table.  */
3044  if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
3045    {
3046      ops = vax_cvt_ops;
3047      n = ARRAY_SIZE (vax_cvt_ops);
3048    }
3049
3050  for (i = 0; i < n; ++i, ++ops)
3051    if (ops->code == code)
3052      {
3053	rtx func = ops->libcall;
3054	if (!func)
3055	  {
3056	    func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3057				     ? ops->vms_func : ops->osf_func);
3058	    ops->libcall = func;
3059	  }
3060        return func;
3061      }
3062
3063  gcc_unreachable ();
3064}
3065
3066/* Most X_floating operations take the rounding mode as an argument.
3067   Compute that here.  */
3068
3069static int
3070alpha_compute_xfloating_mode_arg (enum rtx_code code,
3071				  enum alpha_fp_rounding_mode round)
3072{
3073  int mode;
3074
3075  switch (round)
3076    {
3077    case ALPHA_FPRM_NORM:
3078      mode = 2;
3079      break;
3080    case ALPHA_FPRM_MINF:
3081      mode = 1;
3082      break;
3083    case ALPHA_FPRM_CHOP:
3084      mode = 0;
3085      break;
3086    case ALPHA_FPRM_DYN:
3087      mode = 4;
3088      break;
3089    default:
3090      gcc_unreachable ();
3091
3092    /* XXX For reference, round to +inf is mode = 3.  */
3093    }
3094
3095  if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3096    mode |= 0x10000;
3097
3098  return mode;
3099}
3100
3101/* Emit an X_floating library function call.
3102
3103   Note that these functions do not follow normal calling conventions:
3104   TFmode arguments are passed in two integer registers (as opposed to
3105   indirect); TFmode return values appear in R16+R17.
3106
3107   FUNC is the function to call.
3108   TARGET is where the output belongs.
3109   OPERANDS are the inputs.
3110   NOPERANDS is the count of inputs.
3111   EQUIV is the expression equivalent for the function.
3112*/
3113
3114static void
3115alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3116			      int noperands, rtx equiv)
3117{
3118  rtx usage = NULL_RTX, tmp, reg;
3119  int regno = 16, i;
3120
3121  start_sequence ();
3122
3123  for (i = 0; i < noperands; ++i)
3124    {
3125      switch (GET_MODE (operands[i]))
3126	{
3127	case TFmode:
3128	  reg = gen_rtx_REG (TFmode, regno);
3129	  regno += 2;
3130	  break;
3131
3132	case DFmode:
3133	  reg = gen_rtx_REG (DFmode, regno + 32);
3134	  regno += 1;
3135	  break;
3136
3137	case VOIDmode:
3138	  gcc_assert (CONST_INT_P (operands[i]));
3139	  /* FALLTHRU */
3140	case DImode:
3141	  reg = gen_rtx_REG (DImode, regno);
3142	  regno += 1;
3143	  break;
3144
3145	default:
3146	  gcc_unreachable ();
3147	}
3148
3149      emit_move_insn (reg, operands[i]);
3150      use_reg (&usage, reg);
3151    }
3152
3153  switch (GET_MODE (target))
3154    {
3155    case TFmode:
3156      reg = gen_rtx_REG (TFmode, 16);
3157      break;
3158    case DFmode:
3159      reg = gen_rtx_REG (DFmode, 32);
3160      break;
3161    case DImode:
3162      reg = gen_rtx_REG (DImode, 0);
3163      break;
3164    default:
3165      gcc_unreachable ();
3166    }
3167
3168  tmp = gen_rtx_MEM (QImode, func);
3169  tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
3170					const0_rtx, const0_rtx));
3171  CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3172  RTL_CONST_CALL_P (tmp) = 1;
3173
3174  tmp = get_insns ();
3175  end_sequence ();
3176
3177  emit_libcall_block (tmp, target, reg, equiv);
3178}
3179
3180/* Emit an X_floating library function call for arithmetic (+,-,*,/).  */
3181
3182void
3183alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3184{
3185  rtx func;
3186  int mode;
3187  rtx out_operands[3];
3188
3189  func = alpha_lookup_xfloating_lib_func (code);
3190  mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3191
3192  out_operands[0] = operands[1];
3193  out_operands[1] = operands[2];
3194  out_operands[2] = GEN_INT (mode);
3195  alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3196				gen_rtx_fmt_ee (code, TFmode, operands[1],
3197						operands[2]));
3198}
3199
3200/* Emit an X_floating library function call for a comparison.  */
3201
3202static rtx
3203alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3204{
3205  enum rtx_code cmp_code, res_code;
3206  rtx func, out, operands[2], note;
3207
3208  /* X_floating library comparison functions return
3209	   -1  unordered
3210	    0  false
3211	    1  true
3212     Convert the compare against the raw return value.  */
3213
3214  cmp_code = *pcode;
3215  switch (cmp_code)
3216    {
3217    case UNORDERED:
3218      cmp_code = EQ;
3219      res_code = LT;
3220      break;
3221    case ORDERED:
3222      cmp_code = EQ;
3223      res_code = GE;
3224      break;
3225    case NE:
3226      res_code = NE;
3227      break;
3228    case EQ:
3229    case LT:
3230    case GT:
3231    case LE:
3232    case GE:
3233      res_code = GT;
3234      break;
3235    default:
3236      gcc_unreachable ();
3237    }
3238  *pcode = res_code;
3239
3240  func = alpha_lookup_xfloating_lib_func (cmp_code);
3241
3242  operands[0] = op0;
3243  operands[1] = op1;
3244  out = gen_reg_rtx (DImode);
3245
3246  /* What's actually returned is -1,0,1, not a proper boolean value.  */
3247  note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3248  note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3249  alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3250
3251  return out;
3252}
3253
3254/* Emit an X_floating library function call for a conversion.  */
3255
3256void
3257alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3258{
3259  int noperands = 1, mode;
3260  rtx out_operands[2];
3261  rtx func;
3262  enum rtx_code code = orig_code;
3263
3264  if (code == UNSIGNED_FIX)
3265    code = FIX;
3266
3267  func = alpha_lookup_xfloating_lib_func (code);
3268
3269  out_operands[0] = operands[1];
3270
3271  switch (code)
3272    {
3273    case FIX:
3274      mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3275      out_operands[1] = GEN_INT (mode);
3276      noperands = 2;
3277      break;
3278    case FLOAT_TRUNCATE:
3279      mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3280      out_operands[1] = GEN_INT (mode);
3281      noperands = 2;
3282      break;
3283    default:
3284      break;
3285    }
3286
3287  alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3288				gen_rtx_fmt_e (orig_code,
3289					       GET_MODE (operands[0]),
3290					       operands[1]));
3291}
3292
3293/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3294   DImode moves from OP[2,3] to OP[0,1].  If FIXUP_OVERLAP is true,
3295   guarantee that the sequence
3296     set (OP[0] OP[2])
3297     set (OP[1] OP[3])
3298   is valid.  Naturally, output operand ordering is little-endian.
3299   This is used by *movtf_internal and *movti_internal.  */
3300
3301void
3302alpha_split_tmode_pair (rtx operands[4], machine_mode mode,
3303			bool fixup_overlap)
3304{
3305  switch (GET_CODE (operands[1]))
3306    {
3307    case REG:
3308      operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3309      operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3310      break;
3311
3312    case MEM:
3313      operands[3] = adjust_address (operands[1], DImode, 8);
3314      operands[2] = adjust_address (operands[1], DImode, 0);
3315      break;
3316
3317    case CONST_INT:
3318    case CONST_DOUBLE:
3319      gcc_assert (operands[1] == CONST0_RTX (mode));
3320      operands[2] = operands[3] = const0_rtx;
3321      break;
3322
3323    default:
3324      gcc_unreachable ();
3325    }
3326
3327  switch (GET_CODE (operands[0]))
3328    {
3329    case REG:
3330      operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3331      operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3332      break;
3333
3334    case MEM:
3335      operands[1] = adjust_address (operands[0], DImode, 8);
3336      operands[0] = adjust_address (operands[0], DImode, 0);
3337      break;
3338
3339    default:
3340      gcc_unreachable ();
3341    }
3342
3343  if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3344    {
3345      std::swap (operands[0], operands[1]);
3346      std::swap (operands[2], operands[3]);
3347    }
3348}
3349
3350/* Implement negtf2 or abstf2.  Op0 is destination, op1 is source,
3351   op2 is a register containing the sign bit, operation is the
3352   logical operation to be performed.  */
3353
3354void
3355alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3356{
3357  rtx high_bit = operands[2];
3358  rtx scratch;
3359  int move;
3360
3361  alpha_split_tmode_pair (operands, TFmode, false);
3362
3363  /* Detect three flavors of operand overlap.  */
3364  move = 1;
3365  if (rtx_equal_p (operands[0], operands[2]))
3366    move = 0;
3367  else if (rtx_equal_p (operands[1], operands[2]))
3368    {
3369      if (rtx_equal_p (operands[0], high_bit))
3370	move = 2;
3371      else
3372	move = -1;
3373    }
3374
3375  if (move < 0)
3376    emit_move_insn (operands[0], operands[2]);
3377
3378  /* ??? If the destination overlaps both source tf and high_bit, then
3379     assume source tf is dead in its entirety and use the other half
3380     for a scratch register.  Otherwise "scratch" is just the proper
3381     destination register.  */
3382  scratch = operands[move < 2 ? 1 : 3];
3383
3384  emit_insn ((*operation) (scratch, high_bit, operands[3]));
3385
3386  if (move > 0)
3387    {
3388      emit_move_insn (operands[0], operands[2]);
3389      if (move > 1)
3390	emit_move_insn (operands[1], scratch);
3391    }
3392}
3393
3394/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3395   unaligned data:
3396
3397           unsigned:                       signed:
3398   word:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
3399           ldq_u  r2,X+1(r11)              ldq_u  r2,X+1(r11)
3400           lda    r3,X(r11)                lda    r3,X+2(r11)
3401           extwl  r1,r3,r1                 extql  r1,r3,r1
3402           extwh  r2,r3,r2                 extqh  r2,r3,r2
3403           or     r1.r2.r1                 or     r1,r2,r1
3404                                           sra    r1,48,r1
3405
3406   long:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
3407           ldq_u  r2,X+3(r11)              ldq_u  r2,X+3(r11)
3408           lda    r3,X(r11)                lda    r3,X(r11)
3409           extll  r1,r3,r1                 extll  r1,r3,r1
3410           extlh  r2,r3,r2                 extlh  r2,r3,r2
3411           or     r1.r2.r1                 addl   r1,r2,r1
3412
3413   quad:   ldq_u  r1,X(r11)
3414           ldq_u  r2,X+7(r11)
3415           lda    r3,X(r11)
3416           extql  r1,r3,r1
3417           extqh  r2,r3,r2
3418           or     r1.r2.r1
3419*/
3420
3421void
3422alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3423			     HOST_WIDE_INT ofs, int sign)
3424{
3425  rtx meml, memh, addr, extl, exth, tmp, mema;
3426  machine_mode mode;
3427
3428  if (TARGET_BWX && size == 2)
3429    {
3430      meml = adjust_address (mem, QImode, ofs);
3431      memh = adjust_address (mem, QImode, ofs+1);
3432      extl = gen_reg_rtx (DImode);
3433      exth = gen_reg_rtx (DImode);
3434      emit_insn (gen_zero_extendqidi2 (extl, meml));
3435      emit_insn (gen_zero_extendqidi2 (exth, memh));
3436      exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3437				  NULL, 1, OPTAB_LIB_WIDEN);
3438      addr = expand_simple_binop (DImode, IOR, extl, exth,
3439				  NULL, 1, OPTAB_LIB_WIDEN);
3440
3441      if (sign && GET_MODE (tgt) != HImode)
3442	{
3443	  addr = gen_lowpart (HImode, addr);
3444	  emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3445	}
3446      else
3447	{
3448	  if (GET_MODE (tgt) != DImode)
3449	    addr = gen_lowpart (GET_MODE (tgt), addr);
3450	  emit_move_insn (tgt, addr);
3451	}
3452      return;
3453    }
3454
3455  meml = gen_reg_rtx (DImode);
3456  memh = gen_reg_rtx (DImode);
3457  addr = gen_reg_rtx (DImode);
3458  extl = gen_reg_rtx (DImode);
3459  exth = gen_reg_rtx (DImode);
3460
3461  mema = XEXP (mem, 0);
3462  if (GET_CODE (mema) == LO_SUM)
3463    mema = force_reg (Pmode, mema);
3464
3465  /* AND addresses cannot be in any alias set, since they may implicitly
3466     alias surrounding code.  Ideally we'd have some alias set that
3467     covered all types except those with alignment 8 or higher.  */
3468
3469  tmp = change_address (mem, DImode,
3470			gen_rtx_AND (DImode,
3471				     plus_constant (DImode, mema, ofs),
3472				     GEN_INT (-8)));
3473  set_mem_alias_set (tmp, 0);
3474  emit_move_insn (meml, tmp);
3475
3476  tmp = change_address (mem, DImode,
3477			gen_rtx_AND (DImode,
3478				     plus_constant (DImode, mema,
3479						    ofs + size - 1),
3480				     GEN_INT (-8)));
3481  set_mem_alias_set (tmp, 0);
3482  emit_move_insn (memh, tmp);
3483
3484  if (sign && size == 2)
3485    {
3486      emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3487
3488      emit_insn (gen_extql (extl, meml, addr));
3489      emit_insn (gen_extqh (exth, memh, addr));
3490
3491      /* We must use tgt here for the target.  Alpha-vms port fails if we use
3492	 addr for the target, because addr is marked as a pointer and combine
3493	 knows that pointers are always sign-extended 32-bit values.  */
3494      addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3495      addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3496			   addr, 1, OPTAB_WIDEN);
3497    }
3498  else
3499    {
3500      emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3501      emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3502      switch ((int) size)
3503	{
3504	case 2:
3505	  emit_insn (gen_extwh (exth, memh, addr));
3506	  mode = HImode;
3507	  break;
3508	case 4:
3509	  emit_insn (gen_extlh (exth, memh, addr));
3510	  mode = SImode;
3511	  break;
3512	case 8:
3513	  emit_insn (gen_extqh (exth, memh, addr));
3514	  mode = DImode;
3515	  break;
3516	default:
3517	  gcc_unreachable ();
3518	}
3519
3520      addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3521			   gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3522			   sign, OPTAB_WIDEN);
3523    }
3524
3525  if (addr != tgt)
3526    emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3527}
3528
3529/* Similarly, use ins and msk instructions to perform unaligned stores.  */
3530
3531void
3532alpha_expand_unaligned_store (rtx dst, rtx src,
3533			      HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3534{
3535  rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3536
3537  if (TARGET_BWX && size == 2)
3538    {
3539      if (src != const0_rtx)
3540	{
3541	  dstl = gen_lowpart (QImode, src);
3542	  dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3543				      NULL, 1, OPTAB_LIB_WIDEN);
3544	  dsth = gen_lowpart (QImode, dsth);
3545	}
3546      else
3547	dstl = dsth = const0_rtx;
3548
3549      meml = adjust_address (dst, QImode, ofs);
3550      memh = adjust_address (dst, QImode, ofs+1);
3551
3552      emit_move_insn (meml, dstl);
3553      emit_move_insn (memh, dsth);
3554      return;
3555    }
3556
3557  dstl = gen_reg_rtx (DImode);
3558  dsth = gen_reg_rtx (DImode);
3559  insl = gen_reg_rtx (DImode);
3560  insh = gen_reg_rtx (DImode);
3561
3562  dsta = XEXP (dst, 0);
3563  if (GET_CODE (dsta) == LO_SUM)
3564    dsta = force_reg (Pmode, dsta);
3565
3566  /* AND addresses cannot be in any alias set, since they may implicitly
3567     alias surrounding code.  Ideally we'd have some alias set that
3568     covered all types except those with alignment 8 or higher.  */
3569
3570  meml = change_address (dst, DImode,
3571			 gen_rtx_AND (DImode,
3572				      plus_constant (DImode, dsta, ofs),
3573				      GEN_INT (-8)));
3574  set_mem_alias_set (meml, 0);
3575
3576  memh = change_address (dst, DImode,
3577			 gen_rtx_AND (DImode,
3578				      plus_constant (DImode, dsta,
3579						     ofs + size - 1),
3580				      GEN_INT (-8)));
3581  set_mem_alias_set (memh, 0);
3582
3583  emit_move_insn (dsth, memh);
3584  emit_move_insn (dstl, meml);
3585
3586  addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3587
3588  if (src != CONST0_RTX (GET_MODE (src)))
3589    {
3590      emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3591			    GEN_INT (size*8), addr));
3592
3593      switch ((int) size)
3594	{
3595	case 2:
3596	  emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3597	  break;
3598	case 4:
3599	  emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3600	  break;
3601	case 8:
3602	  emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3603	  break;
3604	default:
3605	  gcc_unreachable ();
3606	}
3607    }
3608
3609  emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3610
3611  switch ((int) size)
3612    {
3613    case 2:
3614      emit_insn (gen_mskwl (dstl, dstl, addr));
3615      break;
3616    case 4:
3617      emit_insn (gen_mskll (dstl, dstl, addr));
3618      break;
3619    case 8:
3620      emit_insn (gen_mskql (dstl, dstl, addr));
3621      break;
3622    default:
3623      gcc_unreachable ();
3624    }
3625
3626  if (src != CONST0_RTX (GET_MODE (src)))
3627    {
3628      dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3629      dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3630    }
3631
3632  /* Must store high before low for degenerate case of aligned.  */
3633  emit_move_insn (memh, dsth);
3634  emit_move_insn (meml, dstl);
3635}
3636
3637/* The block move code tries to maximize speed by separating loads and
3638   stores at the expense of register pressure: we load all of the data
3639   before we store it back out.  There are two secondary effects worth
3640   mentioning, that this speeds copying to/from aligned and unaligned
3641   buffers, and that it makes the code significantly easier to write.  */
3642
3643#define MAX_MOVE_WORDS	8
3644
3645/* Load an integral number of consecutive unaligned quadwords.  */
3646
3647static void
3648alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3649				   HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3650{
3651  rtx const im8 = GEN_INT (-8);
3652  rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3653  rtx sreg, areg, tmp, smema;
3654  HOST_WIDE_INT i;
3655
3656  smema = XEXP (smem, 0);
3657  if (GET_CODE (smema) == LO_SUM)
3658    smema = force_reg (Pmode, smema);
3659
3660  /* Generate all the tmp registers we need.  */
3661  for (i = 0; i < words; ++i)
3662    {
3663      data_regs[i] = out_regs[i];
3664      ext_tmps[i] = gen_reg_rtx (DImode);
3665    }
3666  data_regs[words] = gen_reg_rtx (DImode);
3667
3668  if (ofs != 0)
3669    smem = adjust_address (smem, GET_MODE (smem), ofs);
3670
3671  /* Load up all of the source data.  */
3672  for (i = 0; i < words; ++i)
3673    {
3674      tmp = change_address (smem, DImode,
3675			    gen_rtx_AND (DImode,
3676					 plus_constant (DImode, smema, 8*i),
3677					 im8));
3678      set_mem_alias_set (tmp, 0);
3679      emit_move_insn (data_regs[i], tmp);
3680    }
3681
3682  tmp = change_address (smem, DImode,
3683			gen_rtx_AND (DImode,
3684				     plus_constant (DImode, smema,
3685						    8*words - 1),
3686				     im8));
3687  set_mem_alias_set (tmp, 0);
3688  emit_move_insn (data_regs[words], tmp);
3689
3690  /* Extract the half-word fragments.  Unfortunately DEC decided to make
3691     extxh with offset zero a noop instead of zeroing the register, so
3692     we must take care of that edge condition ourselves with cmov.  */
3693
3694  sreg = copy_addr_to_reg (smema);
3695  areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3696		       1, OPTAB_WIDEN);
3697  for (i = 0; i < words; ++i)
3698    {
3699      emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3700      emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3701      emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
3702			      gen_rtx_IF_THEN_ELSE (DImode,
3703						    gen_rtx_EQ (DImode, areg,
3704								const0_rtx),
3705						    const0_rtx, ext_tmps[i])));
3706    }
3707
3708  /* Merge the half-words into whole words.  */
3709  for (i = 0; i < words; ++i)
3710    {
3711      out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3712				  ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3713    }
3714}
3715
3716/* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
3717   may be NULL to store zeros.  */
3718
3719static void
3720alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3721				    HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3722{
3723  rtx const im8 = GEN_INT (-8);
3724  rtx ins_tmps[MAX_MOVE_WORDS];
3725  rtx st_tmp_1, st_tmp_2, dreg;
3726  rtx st_addr_1, st_addr_2, dmema;
3727  HOST_WIDE_INT i;
3728
3729  dmema = XEXP (dmem, 0);
3730  if (GET_CODE (dmema) == LO_SUM)
3731    dmema = force_reg (Pmode, dmema);
3732
3733  /* Generate all the tmp registers we need.  */
3734  if (data_regs != NULL)
3735    for (i = 0; i < words; ++i)
3736      ins_tmps[i] = gen_reg_rtx(DImode);
3737  st_tmp_1 = gen_reg_rtx(DImode);
3738  st_tmp_2 = gen_reg_rtx(DImode);
3739
3740  if (ofs != 0)
3741    dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3742
3743  st_addr_2 = change_address (dmem, DImode,
3744			      gen_rtx_AND (DImode,
3745					   plus_constant (DImode, dmema,
3746							  words*8 - 1),
3747					   im8));
3748  set_mem_alias_set (st_addr_2, 0);
3749
3750  st_addr_1 = change_address (dmem, DImode,
3751			      gen_rtx_AND (DImode, dmema, im8));
3752  set_mem_alias_set (st_addr_1, 0);
3753
3754  /* Load up the destination end bits.  */
3755  emit_move_insn (st_tmp_2, st_addr_2);
3756  emit_move_insn (st_tmp_1, st_addr_1);
3757
3758  /* Shift the input data into place.  */
3759  dreg = copy_addr_to_reg (dmema);
3760  if (data_regs != NULL)
3761    {
3762      for (i = words-1; i >= 0; --i)
3763	{
3764	  emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3765	  emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3766	}
3767      for (i = words-1; i > 0; --i)
3768	{
3769	  ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3770					ins_tmps[i-1], ins_tmps[i-1], 1,
3771					OPTAB_WIDEN);
3772	}
3773    }
3774
3775  /* Split and merge the ends with the destination data.  */
3776  emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3777  emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3778
3779  if (data_regs != NULL)
3780    {
3781      st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3782			       st_tmp_2, 1, OPTAB_WIDEN);
3783      st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3784			       st_tmp_1, 1, OPTAB_WIDEN);
3785    }
3786
3787  /* Store it all.  */
3788  emit_move_insn (st_addr_2, st_tmp_2);
3789  for (i = words-1; i > 0; --i)
3790    {
3791      rtx tmp = change_address (dmem, DImode,
3792				gen_rtx_AND (DImode,
3793					     plus_constant (DImode,
3794							    dmema, i*8),
3795					     im8));
3796      set_mem_alias_set (tmp, 0);
3797      emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3798    }
3799  emit_move_insn (st_addr_1, st_tmp_1);
3800}
3801
3802
3803/* Expand string/block move operations.
3804
3805   operands[0] is the pointer to the destination.
3806   operands[1] is the pointer to the source.
3807   operands[2] is the number of bytes to move.
3808   operands[3] is the alignment.  */
3809
3810int
3811alpha_expand_block_move (rtx operands[])
3812{
3813  rtx bytes_rtx	= operands[2];
3814  rtx align_rtx = operands[3];
3815  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3816  HOST_WIDE_INT bytes = orig_bytes;
3817  HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3818  HOST_WIDE_INT dst_align = src_align;
3819  rtx orig_src = operands[1];
3820  rtx orig_dst = operands[0];
3821  rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3822  rtx tmp;
3823  unsigned int i, words, ofs, nregs = 0;
3824
3825  if (orig_bytes <= 0)
3826    return 1;
3827  else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3828    return 0;
3829
3830  /* Look for additional alignment information from recorded register info.  */
3831
3832  tmp = XEXP (orig_src, 0);
3833  if (REG_P (tmp))
3834    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3835  else if (GET_CODE (tmp) == PLUS
3836	   && REG_P (XEXP (tmp, 0))
3837	   && CONST_INT_P (XEXP (tmp, 1)))
3838    {
3839      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3840      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3841
3842      if (a > src_align)
3843	{
3844          if (a >= 64 && c % 8 == 0)
3845	    src_align = 64;
3846          else if (a >= 32 && c % 4 == 0)
3847	    src_align = 32;
3848          else if (a >= 16 && c % 2 == 0)
3849	    src_align = 16;
3850	}
3851    }
3852
3853  tmp = XEXP (orig_dst, 0);
3854  if (REG_P (tmp))
3855    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3856  else if (GET_CODE (tmp) == PLUS
3857	   && REG_P (XEXP (tmp, 0))
3858	   && CONST_INT_P (XEXP (tmp, 1)))
3859    {
3860      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3861      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3862
3863      if (a > dst_align)
3864	{
3865          if (a >= 64 && c % 8 == 0)
3866	    dst_align = 64;
3867          else if (a >= 32 && c % 4 == 0)
3868	    dst_align = 32;
3869          else if (a >= 16 && c % 2 == 0)
3870	    dst_align = 16;
3871	}
3872    }
3873
3874  ofs = 0;
3875  if (src_align >= 64 && bytes >= 8)
3876    {
3877      words = bytes / 8;
3878
3879      for (i = 0; i < words; ++i)
3880	data_regs[nregs + i] = gen_reg_rtx (DImode);
3881
3882      for (i = 0; i < words; ++i)
3883	emit_move_insn (data_regs[nregs + i],
3884			adjust_address (orig_src, DImode, ofs + i * 8));
3885
3886      nregs += words;
3887      bytes -= words * 8;
3888      ofs += words * 8;
3889    }
3890
3891  if (src_align >= 32 && bytes >= 4)
3892    {
3893      words = bytes / 4;
3894
3895      for (i = 0; i < words; ++i)
3896	data_regs[nregs + i] = gen_reg_rtx (SImode);
3897
3898      for (i = 0; i < words; ++i)
3899	emit_move_insn (data_regs[nregs + i],
3900			adjust_address (orig_src, SImode, ofs + i * 4));
3901
3902      nregs += words;
3903      bytes -= words * 4;
3904      ofs += words * 4;
3905    }
3906
3907  if (bytes >= 8)
3908    {
3909      words = bytes / 8;
3910
3911      for (i = 0; i < words+1; ++i)
3912	data_regs[nregs + i] = gen_reg_rtx (DImode);
3913
3914      alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3915					 words, ofs);
3916
3917      nregs += words;
3918      bytes -= words * 8;
3919      ofs += words * 8;
3920    }
3921
3922  if (! TARGET_BWX && bytes >= 4)
3923    {
3924      data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3925      alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3926      bytes -= 4;
3927      ofs += 4;
3928    }
3929
3930  if (bytes >= 2)
3931    {
3932      if (src_align >= 16)
3933	{
3934	  do {
3935	    data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3936	    emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3937	    bytes -= 2;
3938	    ofs += 2;
3939	  } while (bytes >= 2);
3940	}
3941      else if (! TARGET_BWX)
3942	{
3943	  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3944	  alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3945	  bytes -= 2;
3946	  ofs += 2;
3947	}
3948    }
3949
3950  while (bytes > 0)
3951    {
3952      data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3953      emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3954      bytes -= 1;
3955      ofs += 1;
3956    }
3957
3958  gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3959
3960  /* Now save it back out again.  */
3961
3962  i = 0, ofs = 0;
3963
3964  /* Write out the data in whatever chunks reading the source allowed.  */
3965  if (dst_align >= 64)
3966    {
3967      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3968	{
3969	  emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3970			  data_regs[i]);
3971	  ofs += 8;
3972	  i++;
3973	}
3974    }
3975
3976  if (dst_align >= 32)
3977    {
3978      /* If the source has remaining DImode regs, write them out in
3979	 two pieces.  */
3980      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3981	{
3982	  tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3983			      NULL_RTX, 1, OPTAB_WIDEN);
3984
3985	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3986			  gen_lowpart (SImode, data_regs[i]));
3987	  emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3988			  gen_lowpart (SImode, tmp));
3989	  ofs += 8;
3990	  i++;
3991	}
3992
3993      while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3994	{
3995	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3996			  data_regs[i]);
3997	  ofs += 4;
3998	  i++;
3999	}
4000    }
4001
4002  if (i < nregs && GET_MODE (data_regs[i]) == DImode)
4003    {
4004      /* Write out a remaining block of words using unaligned methods.  */
4005
4006      for (words = 1; i + words < nregs; words++)
4007	if (GET_MODE (data_regs[i + words]) != DImode)
4008	  break;
4009
4010      if (words == 1)
4011	alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
4012      else
4013        alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
4014					    words, ofs);
4015
4016      i += words;
4017      ofs += words * 8;
4018    }
4019
4020  /* Due to the above, this won't be aligned.  */
4021  /* ??? If we have more than one of these, consider constructing full
4022     words in registers and using alpha_expand_unaligned_store_words.  */
4023  while (i < nregs && GET_MODE (data_regs[i]) == SImode)
4024    {
4025      alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
4026      ofs += 4;
4027      i++;
4028    }
4029
4030  if (dst_align >= 16)
4031    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4032      {
4033	emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
4034	i++;
4035	ofs += 2;
4036      }
4037  else
4038    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4039      {
4040	alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4041	i++;
4042	ofs += 2;
4043      }
4044
4045  /* The remainder must be byte copies.  */
4046  while (i < nregs)
4047    {
4048      gcc_assert (GET_MODE (data_regs[i]) == QImode);
4049      emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4050      i++;
4051      ofs += 1;
4052    }
4053
4054  return 1;
4055}
4056
4057int
4058alpha_expand_block_clear (rtx operands[])
4059{
4060  rtx bytes_rtx	= operands[1];
4061  rtx align_rtx = operands[3];
4062  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4063  HOST_WIDE_INT bytes = orig_bytes;
4064  HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4065  HOST_WIDE_INT alignofs = 0;
4066  rtx orig_dst = operands[0];
4067  rtx tmp;
4068  int i, words, ofs = 0;
4069
4070  if (orig_bytes <= 0)
4071    return 1;
4072  if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4073    return 0;
4074
4075  /* Look for stricter alignment.  */
4076  tmp = XEXP (orig_dst, 0);
4077  if (REG_P (tmp))
4078    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4079  else if (GET_CODE (tmp) == PLUS
4080	   && REG_P (XEXP (tmp, 0))
4081	   && CONST_INT_P (XEXP (tmp, 1)))
4082    {
4083      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4084      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4085
4086      if (a > align)
4087	{
4088          if (a >= 64)
4089	    align = a, alignofs = 8 - c % 8;
4090          else if (a >= 32)
4091	    align = a, alignofs = 4 - c % 4;
4092          else if (a >= 16)
4093	    align = a, alignofs = 2 - c % 2;
4094	}
4095    }
4096
4097  /* Handle an unaligned prefix first.  */
4098
4099  if (alignofs > 0)
4100    {
4101#if HOST_BITS_PER_WIDE_INT >= 64
4102      /* Given that alignofs is bounded by align, the only time BWX could
4103	 generate three stores is for a 7 byte fill.  Prefer two individual
4104	 stores over a load/mask/store sequence.  */
4105      if ((!TARGET_BWX || alignofs == 7)
4106	       && align >= 32
4107	       && !(alignofs == 4 && bytes >= 4))
4108	{
4109	  machine_mode mode = (align >= 64 ? DImode : SImode);
4110	  int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4111	  rtx mem, tmp;
4112	  HOST_WIDE_INT mask;
4113
4114	  mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4115	  set_mem_alias_set (mem, 0);
4116
4117	  mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
4118	  if (bytes < alignofs)
4119	    {
4120	      mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
4121	      ofs += bytes;
4122	      bytes = 0;
4123	    }
4124	  else
4125	    {
4126	      bytes -= alignofs;
4127	      ofs += alignofs;
4128	    }
4129	  alignofs = 0;
4130
4131	  tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4132			      NULL_RTX, 1, OPTAB_WIDEN);
4133
4134	  emit_move_insn (mem, tmp);
4135	}
4136#endif
4137
4138      if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4139	{
4140	  emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4141	  bytes -= 1;
4142	  ofs += 1;
4143	  alignofs -= 1;
4144	}
4145      if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4146	{
4147	  emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4148	  bytes -= 2;
4149	  ofs += 2;
4150	  alignofs -= 2;
4151	}
4152      if (alignofs == 4 && bytes >= 4)
4153	{
4154	  emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4155	  bytes -= 4;
4156	  ofs += 4;
4157	  alignofs = 0;
4158	}
4159
4160      /* If we've not used the extra lead alignment information by now,
4161	 we won't be able to.  Downgrade align to match what's left over.  */
4162      if (alignofs > 0)
4163	{
4164	  alignofs = alignofs & -alignofs;
4165	  align = MIN (align, alignofs * BITS_PER_UNIT);
4166	}
4167    }
4168
4169  /* Handle a block of contiguous long-words.  */
4170
4171  if (align >= 64 && bytes >= 8)
4172    {
4173      words = bytes / 8;
4174
4175      for (i = 0; i < words; ++i)
4176	emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4177			const0_rtx);
4178
4179      bytes -= words * 8;
4180      ofs += words * 8;
4181    }
4182
4183  /* If the block is large and appropriately aligned, emit a single
4184     store followed by a sequence of stq_u insns.  */
4185
4186  if (align >= 32 && bytes > 16)
4187    {
4188      rtx orig_dsta;
4189
4190      emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4191      bytes -= 4;
4192      ofs += 4;
4193
4194      orig_dsta = XEXP (orig_dst, 0);
4195      if (GET_CODE (orig_dsta) == LO_SUM)
4196	orig_dsta = force_reg (Pmode, orig_dsta);
4197
4198      words = bytes / 8;
4199      for (i = 0; i < words; ++i)
4200	{
4201	  rtx mem
4202	    = change_address (orig_dst, DImode,
4203			      gen_rtx_AND (DImode,
4204					   plus_constant (DImode, orig_dsta,
4205							  ofs + i*8),
4206					   GEN_INT (-8)));
4207	  set_mem_alias_set (mem, 0);
4208	  emit_move_insn (mem, const0_rtx);
4209	}
4210
4211      /* Depending on the alignment, the first stq_u may have overlapped
4212	 with the initial stl, which means that the last stq_u didn't
4213	 write as much as it would appear.  Leave those questionable bytes
4214	 unaccounted for.  */
4215      bytes -= words * 8 - 4;
4216      ofs += words * 8 - 4;
4217    }
4218
4219  /* Handle a smaller block of aligned words.  */
4220
4221  if ((align >= 64 && bytes == 4)
4222      || (align == 32 && bytes >= 4))
4223    {
4224      words = bytes / 4;
4225
4226      for (i = 0; i < words; ++i)
4227	emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4228			const0_rtx);
4229
4230      bytes -= words * 4;
4231      ofs += words * 4;
4232    }
4233
4234  /* An unaligned block uses stq_u stores for as many as possible.  */
4235
4236  if (bytes >= 8)
4237    {
4238      words = bytes / 8;
4239
4240      alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4241
4242      bytes -= words * 8;
4243      ofs += words * 8;
4244    }
4245
4246  /* Next clean up any trailing pieces.  */
4247
4248#if HOST_BITS_PER_WIDE_INT >= 64
4249  /* Count the number of bits in BYTES for which aligned stores could
4250     be emitted.  */
4251  words = 0;
4252  for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4253    if (bytes & i)
4254      words += 1;
4255
4256  /* If we have appropriate alignment (and it wouldn't take too many
4257     instructions otherwise), mask out the bytes we need.  */
4258  if (TARGET_BWX ? words > 2 : bytes > 0)
4259    {
4260      if (align >= 64)
4261	{
4262	  rtx mem, tmp;
4263	  HOST_WIDE_INT mask;
4264
4265	  mem = adjust_address (orig_dst, DImode, ofs);
4266	  set_mem_alias_set (mem, 0);
4267
4268	  mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4269
4270	  tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4271			      NULL_RTX, 1, OPTAB_WIDEN);
4272
4273	  emit_move_insn (mem, tmp);
4274	  return 1;
4275	}
4276      else if (align >= 32 && bytes < 4)
4277	{
4278	  rtx mem, tmp;
4279	  HOST_WIDE_INT mask;
4280
4281	  mem = adjust_address (orig_dst, SImode, ofs);
4282	  set_mem_alias_set (mem, 0);
4283
4284	  mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4285
4286	  tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4287			      NULL_RTX, 1, OPTAB_WIDEN);
4288
4289	  emit_move_insn (mem, tmp);
4290	  return 1;
4291	}
4292    }
4293#endif
4294
4295  if (!TARGET_BWX && bytes >= 4)
4296    {
4297      alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4298      bytes -= 4;
4299      ofs += 4;
4300    }
4301
4302  if (bytes >= 2)
4303    {
4304      if (align >= 16)
4305	{
4306	  do {
4307	    emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4308			    const0_rtx);
4309	    bytes -= 2;
4310	    ofs += 2;
4311	  } while (bytes >= 2);
4312	}
4313      else if (! TARGET_BWX)
4314	{
4315	  alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4316	  bytes -= 2;
4317	  ofs += 2;
4318	}
4319    }
4320
4321  while (bytes > 0)
4322    {
4323      emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4324      bytes -= 1;
4325      ofs += 1;
4326    }
4327
4328  return 1;
4329}
4330
4331/* Returns a mask so that zap(x, value) == x & mask.  */
4332
4333rtx
4334alpha_expand_zap_mask (HOST_WIDE_INT value)
4335{
4336  rtx result;
4337  int i;
4338
4339  if (HOST_BITS_PER_WIDE_INT >= 64)
4340    {
4341      HOST_WIDE_INT mask = 0;
4342
4343      for (i = 7; i >= 0; --i)
4344	{
4345	  mask <<= 8;
4346	  if (!((value >> i) & 1))
4347	    mask |= 0xff;
4348	}
4349
4350      result = gen_int_mode (mask, DImode);
4351    }
4352  else
4353    {
4354      HOST_WIDE_INT mask_lo = 0, mask_hi = 0;
4355
4356      gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
4357
4358      for (i = 7; i >= 4; --i)
4359	{
4360	  mask_hi <<= 8;
4361	  if (!((value >> i) & 1))
4362	    mask_hi |= 0xff;
4363	}
4364
4365      for (i = 3; i >= 0; --i)
4366	{
4367	  mask_lo <<= 8;
4368	  if (!((value >> i) & 1))
4369	    mask_lo |= 0xff;
4370	}
4371
4372      result = immed_double_const (mask_lo, mask_hi, DImode);
4373    }
4374
4375  return result;
4376}
4377
4378void
4379alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4380				   machine_mode mode,
4381				   rtx op0, rtx op1, rtx op2)
4382{
4383  op0 = gen_lowpart (mode, op0);
4384
4385  if (op1 == const0_rtx)
4386    op1 = CONST0_RTX (mode);
4387  else
4388    op1 = gen_lowpart (mode, op1);
4389
4390  if (op2 == const0_rtx)
4391    op2 = CONST0_RTX (mode);
4392  else
4393    op2 = gen_lowpart (mode, op2);
4394
4395  emit_insn ((*gen) (op0, op1, op2));
4396}
4397
4398/* A subroutine of the atomic operation splitters.  Jump to LABEL if
4399   COND is true.  Mark the jump as unlikely to be taken.  */
4400
4401static void
4402emit_unlikely_jump (rtx cond, rtx label)
4403{
4404  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
4405  rtx x;
4406
4407  x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4408  x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
4409  add_int_reg_note (x, REG_BR_PROB, very_unlikely);
4410}
4411
4412/* A subroutine of the atomic operation splitters.  Emit a load-locked
4413   instruction in MODE.  */
4414
4415static void
4416emit_load_locked (machine_mode mode, rtx reg, rtx mem)
4417{
4418  rtx (*fn) (rtx, rtx) = NULL;
4419  if (mode == SImode)
4420    fn = gen_load_locked_si;
4421  else if (mode == DImode)
4422    fn = gen_load_locked_di;
4423  emit_insn (fn (reg, mem));
4424}
4425
4426/* A subroutine of the atomic operation splitters.  Emit a store-conditional
4427   instruction in MODE.  */
4428
4429static void
4430emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
4431{
4432  rtx (*fn) (rtx, rtx, rtx) = NULL;
4433  if (mode == SImode)
4434    fn = gen_store_conditional_si;
4435  else if (mode == DImode)
4436    fn = gen_store_conditional_di;
4437  emit_insn (fn (res, mem, val));
4438}
4439
4440/* Subroutines of the atomic operation splitters.  Emit barriers
4441   as needed for the memory MODEL.  */
4442
4443static void
4444alpha_pre_atomic_barrier (enum memmodel model)
4445{
4446  if (need_atomic_barrier_p (model, true))
4447    emit_insn (gen_memory_barrier ());
4448}
4449
4450static void
4451alpha_post_atomic_barrier (enum memmodel model)
4452{
4453  if (need_atomic_barrier_p (model, false))
4454    emit_insn (gen_memory_barrier ());
4455}
4456
4457/* A subroutine of the atomic operation splitters.  Emit an insxl
4458   instruction in MODE.  */
4459
4460static rtx
4461emit_insxl (machine_mode mode, rtx op1, rtx op2)
4462{
4463  rtx ret = gen_reg_rtx (DImode);
4464  rtx (*fn) (rtx, rtx, rtx);
4465
4466  switch (mode)
4467    {
4468    case QImode:
4469      fn = gen_insbl;
4470      break;
4471    case HImode:
4472      fn = gen_inswl;
4473      break;
4474    case SImode:
4475      fn = gen_insll;
4476      break;
4477    case DImode:
4478      fn = gen_insql;
4479      break;
4480    default:
4481      gcc_unreachable ();
4482    }
4483
4484  op1 = force_reg (mode, op1);
4485  emit_insn (fn (ret, op1, op2));
4486
4487  return ret;
4488}
4489
4490/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
4491   to perform.  MEM is the memory on which to operate.  VAL is the second
4492   operand of the binary operator.  BEFORE and AFTER are optional locations to
4493   return the value of MEM either before of after the operation.  SCRATCH is
4494   a scratch register.  */
4495
4496void
4497alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4498		       rtx after, rtx scratch, enum memmodel model)
4499{
4500  machine_mode mode = GET_MODE (mem);
4501  rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4502
4503  alpha_pre_atomic_barrier (model);
4504
4505  label = gen_label_rtx ();
4506  emit_label (label);
4507  label = gen_rtx_LABEL_REF (DImode, label);
4508
4509  if (before == NULL)
4510    before = scratch;
4511  emit_load_locked (mode, before, mem);
4512
4513  if (code == NOT)
4514    {
4515      x = gen_rtx_AND (mode, before, val);
4516      emit_insn (gen_rtx_SET (VOIDmode, val, x));
4517
4518      x = gen_rtx_NOT (mode, val);
4519    }
4520  else
4521    x = gen_rtx_fmt_ee (code, mode, before, val);
4522  if (after)
4523    emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
4524  emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
4525
4526  emit_store_conditional (mode, cond, mem, scratch);
4527
4528  x = gen_rtx_EQ (DImode, cond, const0_rtx);
4529  emit_unlikely_jump (x, label);
4530
4531  alpha_post_atomic_barrier (model);
4532}
4533
4534/* Expand a compare and swap operation.  */
4535
4536void
4537alpha_split_compare_and_swap (rtx operands[])
4538{
4539  rtx cond, retval, mem, oldval, newval;
4540  bool is_weak;
4541  enum memmodel mod_s, mod_f;
4542  machine_mode mode;
4543  rtx label1, label2, x;
4544
4545  cond = operands[0];
4546  retval = operands[1];
4547  mem = operands[2];
4548  oldval = operands[3];
4549  newval = operands[4];
4550  is_weak = (operands[5] != const0_rtx);
4551  mod_s = memmodel_from_int (INTVAL (operands[6]));
4552  mod_f = memmodel_from_int (INTVAL (operands[7]));
4553  mode = GET_MODE (mem);
4554
4555  alpha_pre_atomic_barrier (mod_s);
4556
4557  label1 = NULL_RTX;
4558  if (!is_weak)
4559    {
4560      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4561      emit_label (XEXP (label1, 0));
4562    }
4563  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4564
4565  emit_load_locked (mode, retval, mem);
4566
4567  x = gen_lowpart (DImode, retval);
4568  if (oldval == const0_rtx)
4569    {
4570      emit_move_insn (cond, const0_rtx);
4571      x = gen_rtx_NE (DImode, x, const0_rtx);
4572    }
4573  else
4574    {
4575      x = gen_rtx_EQ (DImode, x, oldval);
4576      emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4577      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4578    }
4579  emit_unlikely_jump (x, label2);
4580
4581  emit_move_insn (cond, newval);
4582  emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
4583
4584  if (!is_weak)
4585    {
4586      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4587      emit_unlikely_jump (x, label1);
4588    }
4589
4590  if (!is_mm_relaxed (mod_f))
4591    emit_label (XEXP (label2, 0));
4592
4593  alpha_post_atomic_barrier (mod_s);
4594
4595  if (is_mm_relaxed (mod_f))
4596    emit_label (XEXP (label2, 0));
4597}
4598
4599void
4600alpha_expand_compare_and_swap_12 (rtx operands[])
4601{
4602  rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4603  machine_mode mode;
4604  rtx addr, align, wdst;
4605  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
4606
4607  cond = operands[0];
4608  dst = operands[1];
4609  mem = operands[2];
4610  oldval = operands[3];
4611  newval = operands[4];
4612  is_weak = operands[5];
4613  mod_s = operands[6];
4614  mod_f = operands[7];
4615  mode = GET_MODE (mem);
4616
4617  /* We forced the address into a register via mem_noofs_operand.  */
4618  addr = XEXP (mem, 0);
4619  gcc_assert (register_operand (addr, DImode));
4620
4621  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4622			       NULL_RTX, 1, OPTAB_DIRECT);
4623
4624  oldval = convert_modes (DImode, mode, oldval, 1);
4625
4626  if (newval != const0_rtx)
4627    newval = emit_insxl (mode, newval, addr);
4628
4629  wdst = gen_reg_rtx (DImode);
4630  if (mode == QImode)
4631    gen = gen_atomic_compare_and_swapqi_1;
4632  else
4633    gen = gen_atomic_compare_and_swaphi_1;
4634  emit_insn (gen (cond, wdst, mem, oldval, newval, align,
4635		  is_weak, mod_s, mod_f));
4636
4637  emit_move_insn (dst, gen_lowpart (mode, wdst));
4638}
4639
4640void
4641alpha_split_compare_and_swap_12 (rtx operands[])
4642{
4643  rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4644  machine_mode mode;
4645  bool is_weak;
4646  enum memmodel mod_s, mod_f;
4647  rtx label1, label2, mem, addr, width, mask, x;
4648
4649  cond = operands[0];
4650  dest = operands[1];
4651  orig_mem = operands[2];
4652  oldval = operands[3];
4653  newval = operands[4];
4654  align = operands[5];
4655  is_weak = (operands[6] != const0_rtx);
4656  mod_s = memmodel_from_int (INTVAL (operands[7]));
4657  mod_f = memmodel_from_int (INTVAL (operands[8]));
4658  scratch = operands[9];
4659  mode = GET_MODE (orig_mem);
4660  addr = XEXP (orig_mem, 0);
4661
4662  mem = gen_rtx_MEM (DImode, align);
4663  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4664  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4665    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4666
4667  alpha_pre_atomic_barrier (mod_s);
4668
4669  label1 = NULL_RTX;
4670  if (!is_weak)
4671    {
4672      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4673      emit_label (XEXP (label1, 0));
4674    }
4675  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4676
4677  emit_load_locked (DImode, scratch, mem);
4678
4679  width = GEN_INT (GET_MODE_BITSIZE (mode));
4680  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4681  emit_insn (gen_extxl (dest, scratch, width, addr));
4682
4683  if (oldval == const0_rtx)
4684    {
4685      emit_move_insn (cond, const0_rtx);
4686      x = gen_rtx_NE (DImode, dest, const0_rtx);
4687    }
4688  else
4689    {
4690      x = gen_rtx_EQ (DImode, dest, oldval);
4691      emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4692      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4693    }
4694  emit_unlikely_jump (x, label2);
4695
4696  emit_insn (gen_mskxl (cond, scratch, mask, addr));
4697
4698  if (newval != const0_rtx)
4699    emit_insn (gen_iordi3 (cond, cond, newval));
4700
4701  emit_store_conditional (DImode, cond, mem, cond);
4702
4703  if (!is_weak)
4704    {
4705      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4706      emit_unlikely_jump (x, label1);
4707    }
4708
4709  if (!is_mm_relaxed (mod_f))
4710    emit_label (XEXP (label2, 0));
4711
4712  alpha_post_atomic_barrier (mod_s);
4713
4714  if (is_mm_relaxed (mod_f))
4715    emit_label (XEXP (label2, 0));
4716}
4717
4718/* Expand an atomic exchange operation.  */
4719
4720void
4721alpha_split_atomic_exchange (rtx operands[])
4722{
4723  rtx retval, mem, val, scratch;
4724  enum memmodel model;
4725  machine_mode mode;
4726  rtx label, x, cond;
4727
4728  retval = operands[0];
4729  mem = operands[1];
4730  val = operands[2];
4731  model = (enum memmodel) INTVAL (operands[3]);
4732  scratch = operands[4];
4733  mode = GET_MODE (mem);
4734  cond = gen_lowpart (DImode, scratch);
4735
4736  alpha_pre_atomic_barrier (model);
4737
4738  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4739  emit_label (XEXP (label, 0));
4740
4741  emit_load_locked (mode, retval, mem);
4742  emit_move_insn (scratch, val);
4743  emit_store_conditional (mode, cond, mem, scratch);
4744
4745  x = gen_rtx_EQ (DImode, cond, const0_rtx);
4746  emit_unlikely_jump (x, label);
4747
4748  alpha_post_atomic_barrier (model);
4749}
4750
4751void
4752alpha_expand_atomic_exchange_12 (rtx operands[])
4753{
4754  rtx dst, mem, val, model;
4755  machine_mode mode;
4756  rtx addr, align, wdst;
4757  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
4758
4759  dst = operands[0];
4760  mem = operands[1];
4761  val = operands[2];
4762  model = operands[3];
4763  mode = GET_MODE (mem);
4764
4765  /* We forced the address into a register via mem_noofs_operand.  */
4766  addr = XEXP (mem, 0);
4767  gcc_assert (register_operand (addr, DImode));
4768
4769  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4770			       NULL_RTX, 1, OPTAB_DIRECT);
4771
4772  /* Insert val into the correct byte location within the word.  */
4773  if (val != const0_rtx)
4774    val = emit_insxl (mode, val, addr);
4775
4776  wdst = gen_reg_rtx (DImode);
4777  if (mode == QImode)
4778    gen = gen_atomic_exchangeqi_1;
4779  else
4780    gen = gen_atomic_exchangehi_1;
4781  emit_insn (gen (wdst, mem, val, align, model));
4782
4783  emit_move_insn (dst, gen_lowpart (mode, wdst));
4784}
4785
4786void
4787alpha_split_atomic_exchange_12 (rtx operands[])
4788{
4789  rtx dest, orig_mem, addr, val, align, scratch;
4790  rtx label, mem, width, mask, x;
4791  machine_mode mode;
4792  enum memmodel model;
4793
4794  dest = operands[0];
4795  orig_mem = operands[1];
4796  val = operands[2];
4797  align = operands[3];
4798  model = (enum memmodel) INTVAL (operands[4]);
4799  scratch = operands[5];
4800  mode = GET_MODE (orig_mem);
4801  addr = XEXP (orig_mem, 0);
4802
4803  mem = gen_rtx_MEM (DImode, align);
4804  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4805  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4806    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4807
4808  alpha_pre_atomic_barrier (model);
4809
4810  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4811  emit_label (XEXP (label, 0));
4812
4813  emit_load_locked (DImode, scratch, mem);
4814
4815  width = GEN_INT (GET_MODE_BITSIZE (mode));
4816  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4817  emit_insn (gen_extxl (dest, scratch, width, addr));
4818  emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4819  if (val != const0_rtx)
4820    emit_insn (gen_iordi3 (scratch, scratch, val));
4821
4822  emit_store_conditional (DImode, scratch, mem, scratch);
4823
4824  x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4825  emit_unlikely_jump (x, label);
4826
4827  alpha_post_atomic_barrier (model);
4828}
4829
4830/* Adjust the cost of a scheduling dependency.  Return the new cost of
4831   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4832
4833static int
4834alpha_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4835{
4836  enum attr_type dep_insn_type;
4837
4838  /* If the dependence is an anti-dependence, there is no cost.  For an
4839     output dependence, there is sometimes a cost, but it doesn't seem
4840     worth handling those few cases.  */
4841  if (REG_NOTE_KIND (link) != 0)
4842    return cost;
4843
4844  /* If we can't recognize the insns, we can't really do anything.  */
4845  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4846    return cost;
4847
4848  dep_insn_type = get_attr_type (dep_insn);
4849
4850  /* Bring in the user-defined memory latency.  */
4851  if (dep_insn_type == TYPE_ILD
4852      || dep_insn_type == TYPE_FLD
4853      || dep_insn_type == TYPE_LDSYM)
4854    cost += alpha_memory_latency-1;
4855
4856  /* Everything else handled in DFA bypasses now.  */
4857
4858  return cost;
4859}
4860
4861/* The number of instructions that can be issued per cycle.  */
4862
4863static int
4864alpha_issue_rate (void)
4865{
4866  return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4867}
4868
4869/* How many alternative schedules to try.  This should be as wide as the
4870   scheduling freedom in the DFA, but no wider.  Making this value too
4871   large results extra work for the scheduler.
4872
4873   For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4874   alternative schedules.  For EV5, we can choose between E0/E1 and
4875   FA/FM.  For EV6, an arithmetic insn can be issued to U0/U1/L0/L1.  */
4876
4877static int
4878alpha_multipass_dfa_lookahead (void)
4879{
4880  return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4881}
4882
4883/* Machine-specific function data.  */
4884
4885struct GTY(()) alpha_links;
4886
4887struct string_traits : default_hashmap_traits
4888{
4889  static bool equal_keys (const char *const &a, const char *const &b)
4890  {
4891    return strcmp (a, b) == 0;
4892  }
4893};
4894
4895struct GTY(()) machine_function
4896{
4897  /* For flag_reorder_blocks_and_partition.  */
4898  rtx gp_save_rtx;
4899
4900  /* For VMS condition handlers.  */
4901  bool uses_condition_handler;
4902
4903  /* Linkage entries.  */
4904  hash_map<const char *, alpha_links *, string_traits> *links;
4905};
4906
4907/* How to allocate a 'struct machine_function'.  */
4908
4909static struct machine_function *
4910alpha_init_machine_status (void)
4911{
4912  return ggc_cleared_alloc<machine_function> ();
4913}
4914
4915/* Support for frame based VMS condition handlers.  */
4916
4917/* A VMS condition handler may be established for a function with a call to
4918   __builtin_establish_vms_condition_handler, and cancelled with a call to
4919   __builtin_revert_vms_condition_handler.
4920
4921   The VMS Condition Handling Facility knows about the existence of a handler
4922   from the procedure descriptor .handler field.  As the VMS native compilers,
4923   we store the user specified handler's address at a fixed location in the
4924   stack frame and point the procedure descriptor at a common wrapper which
4925   fetches the real handler's address and issues an indirect call.
4926
4927   The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4928
4929   We force the procedure kind to PT_STACK, and the fixed frame location is
4930   fp+8, just before the register save area. We use the handler_data field in
4931   the procedure descriptor to state the fp offset at which the installed
4932   handler address can be found.  */
4933
4934#define VMS_COND_HANDLER_FP_OFFSET 8
4935
4936/* Expand code to store the currently installed user VMS condition handler
4937   into TARGET and install HANDLER as the new condition handler.  */
4938
4939void
4940alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4941{
4942  rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4943					    VMS_COND_HANDLER_FP_OFFSET);
4944
4945  rtx handler_slot
4946    = gen_rtx_MEM (DImode, handler_slot_address);
4947
4948  emit_move_insn (target, handler_slot);
4949  emit_move_insn (handler_slot, handler);
4950
4951  /* Notify the start/prologue/epilogue emitters that the condition handler
4952     slot is needed.  In addition to reserving the slot space, this will force
4953     the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4954     use above is correct.  */
4955  cfun->machine->uses_condition_handler = true;
4956}
4957
4958/* Expand code to store the current VMS condition handler into TARGET and
4959   nullify it.  */
4960
4961void
4962alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4963{
4964  /* We implement this by establishing a null condition handler, with the tiny
4965     side effect of setting uses_condition_handler.  This is a little bit
4966     pessimistic if no actual builtin_establish call is ever issued, which is
4967     not a real problem and expected never to happen anyway.  */
4968
4969  alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4970}
4971
4972/* Functions to save and restore alpha_return_addr_rtx.  */
4973
4974/* Start the ball rolling with RETURN_ADDR_RTX.  */
4975
4976rtx
4977alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4978{
4979  if (count != 0)
4980    return const0_rtx;
4981
4982  return get_hard_reg_initial_val (Pmode, REG_RA);
4983}
4984
4985/* Return or create a memory slot containing the gp value for the current
4986   function.  Needed only if TARGET_LD_BUGGY_LDGP.  */
4987
4988rtx
4989alpha_gp_save_rtx (void)
4990{
4991  rtx_insn *seq;
4992  rtx m = cfun->machine->gp_save_rtx;
4993
4994  if (m == NULL)
4995    {
4996      start_sequence ();
4997
4998      m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4999      m = validize_mem (m);
5000      emit_move_insn (m, pic_offset_table_rtx);
5001
5002      seq = get_insns ();
5003      end_sequence ();
5004
5005      /* We used to simply emit the sequence after entry_of_function.
5006	 However this breaks the CFG if the first instruction in the
5007	 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
5008	 label.  Emit the sequence properly on the edge.  We are only
5009	 invoked from dw2_build_landing_pads and finish_eh_generation
5010	 will call commit_edge_insertions thanks to a kludge.  */
5011      insert_insn_on_edge (seq,
5012			   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
5013
5014      cfun->machine->gp_save_rtx = m;
5015    }
5016
5017  return m;
5018}
5019
5020static void
5021alpha_instantiate_decls (void)
5022{
5023  if (cfun->machine->gp_save_rtx != NULL_RTX)
5024    instantiate_decl_rtl (cfun->machine->gp_save_rtx);
5025}
5026
5027static int
5028alpha_ra_ever_killed (void)
5029{
5030  rtx_insn *top;
5031
5032  if (!has_hard_reg_initial_val (Pmode, REG_RA))
5033    return (int)df_regs_ever_live_p (REG_RA);
5034
5035  push_topmost_sequence ();
5036  top = get_insns ();
5037  pop_topmost_sequence ();
5038
5039  return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
5040}
5041
5042
5043/* Return the trap mode suffix applicable to the current
5044   instruction, or NULL.  */
5045
5046static const char *
5047get_trap_mode_suffix (void)
5048{
5049  enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
5050
5051  switch (s)
5052    {
5053    case TRAP_SUFFIX_NONE:
5054      return NULL;
5055
5056    case TRAP_SUFFIX_SU:
5057      if (alpha_fptm >= ALPHA_FPTM_SU)
5058	return "su";
5059      return NULL;
5060
5061    case TRAP_SUFFIX_SUI:
5062      if (alpha_fptm >= ALPHA_FPTM_SUI)
5063	return "sui";
5064      return NULL;
5065
5066    case TRAP_SUFFIX_V_SV:
5067      switch (alpha_fptm)
5068	{
5069	case ALPHA_FPTM_N:
5070	  return NULL;
5071	case ALPHA_FPTM_U:
5072	  return "v";
5073	case ALPHA_FPTM_SU:
5074	case ALPHA_FPTM_SUI:
5075	  return "sv";
5076	default:
5077	  gcc_unreachable ();
5078	}
5079
5080    case TRAP_SUFFIX_V_SV_SVI:
5081      switch (alpha_fptm)
5082	{
5083	case ALPHA_FPTM_N:
5084	  return NULL;
5085	case ALPHA_FPTM_U:
5086	  return "v";
5087	case ALPHA_FPTM_SU:
5088	  return "sv";
5089	case ALPHA_FPTM_SUI:
5090	  return "svi";
5091	default:
5092	  gcc_unreachable ();
5093	}
5094      break;
5095
5096    case TRAP_SUFFIX_U_SU_SUI:
5097      switch (alpha_fptm)
5098	{
5099	case ALPHA_FPTM_N:
5100	  return NULL;
5101	case ALPHA_FPTM_U:
5102	  return "u";
5103	case ALPHA_FPTM_SU:
5104	  return "su";
5105	case ALPHA_FPTM_SUI:
5106	  return "sui";
5107	default:
5108	  gcc_unreachable ();
5109	}
5110      break;
5111
5112    default:
5113      gcc_unreachable ();
5114    }
5115  gcc_unreachable ();
5116}
5117
5118/* Return the rounding mode suffix applicable to the current
5119   instruction, or NULL.  */
5120
5121static const char *
5122get_round_mode_suffix (void)
5123{
5124  enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
5125
5126  switch (s)
5127    {
5128    case ROUND_SUFFIX_NONE:
5129      return NULL;
5130    case ROUND_SUFFIX_NORMAL:
5131      switch (alpha_fprm)
5132	{
5133	case ALPHA_FPRM_NORM:
5134	  return NULL;
5135	case ALPHA_FPRM_MINF:
5136	  return "m";
5137	case ALPHA_FPRM_CHOP:
5138	  return "c";
5139	case ALPHA_FPRM_DYN:
5140	  return "d";
5141	default:
5142	  gcc_unreachable ();
5143	}
5144      break;
5145
5146    case ROUND_SUFFIX_C:
5147      return "c";
5148
5149    default:
5150      gcc_unreachable ();
5151    }
5152  gcc_unreachable ();
5153}
5154
5155/* Print an operand.  Recognize special options, documented below.  */
5156
5157void
5158print_operand (FILE *file, rtx x, int code)
5159{
5160  int i;
5161
5162  switch (code)
5163    {
5164    case '~':
5165      /* Print the assembler name of the current function.  */
5166      assemble_name (file, alpha_fnname);
5167      break;
5168
5169    case '&':
5170      if (const char *name = get_some_local_dynamic_name ())
5171	assemble_name (file, name);
5172      else
5173	output_operand_lossage ("'%%&' used without any "
5174				"local dynamic TLS references");
5175      break;
5176
5177    case '/':
5178      {
5179	const char *trap = get_trap_mode_suffix ();
5180	const char *round = get_round_mode_suffix ();
5181
5182	if (trap || round)
5183	  fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5184	break;
5185      }
5186
5187    case ',':
5188      /* Generates single precision instruction suffix.  */
5189      fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5190      break;
5191
5192    case '-':
5193      /* Generates double precision instruction suffix.  */
5194      fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5195      break;
5196
5197    case '#':
5198      if (alpha_this_literal_sequence_number == 0)
5199	alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5200      fprintf (file, "%d", alpha_this_literal_sequence_number);
5201      break;
5202
5203    case '*':
5204      if (alpha_this_gpdisp_sequence_number == 0)
5205	alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5206      fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5207      break;
5208
5209    case 'H':
5210      if (GET_CODE (x) == HIGH)
5211	output_addr_const (file, XEXP (x, 0));
5212      else
5213	output_operand_lossage ("invalid %%H value");
5214      break;
5215
5216    case 'J':
5217      {
5218	const char *lituse;
5219
5220        if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5221	  {
5222	    x = XVECEXP (x, 0, 0);
5223	    lituse = "lituse_tlsgd";
5224	  }
5225	else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5226	  {
5227	    x = XVECEXP (x, 0, 0);
5228	    lituse = "lituse_tlsldm";
5229	  }
5230	else if (CONST_INT_P (x))
5231	  lituse = "lituse_jsr";
5232	else
5233	  {
5234	    output_operand_lossage ("invalid %%J value");
5235	    break;
5236	  }
5237
5238	if (x != const0_rtx)
5239	  fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5240      }
5241      break;
5242
5243    case 'j':
5244      {
5245	const char *lituse;
5246
5247#ifdef HAVE_AS_JSRDIRECT_RELOCS
5248	lituse = "lituse_jsrdirect";
5249#else
5250	lituse = "lituse_jsr";
5251#endif
5252
5253	gcc_assert (INTVAL (x) != 0);
5254	fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5255      }
5256      break;
5257    case 'r':
5258      /* If this operand is the constant zero, write it as "$31".  */
5259      if (REG_P (x))
5260	fprintf (file, "%s", reg_names[REGNO (x)]);
5261      else if (x == CONST0_RTX (GET_MODE (x)))
5262	fprintf (file, "$31");
5263      else
5264	output_operand_lossage ("invalid %%r value");
5265      break;
5266
5267    case 'R':
5268      /* Similar, but for floating-point.  */
5269      if (REG_P (x))
5270	fprintf (file, "%s", reg_names[REGNO (x)]);
5271      else if (x == CONST0_RTX (GET_MODE (x)))
5272	fprintf (file, "$f31");
5273      else
5274	output_operand_lossage ("invalid %%R value");
5275      break;
5276
5277    case 'N':
5278      /* Write the 1's complement of a constant.  */
5279      if (!CONST_INT_P (x))
5280	output_operand_lossage ("invalid %%N value");
5281
5282      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5283      break;
5284
5285    case 'P':
5286      /* Write 1 << C, for a constant C.  */
5287      if (!CONST_INT_P (x))
5288	output_operand_lossage ("invalid %%P value");
5289
5290      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
5291      break;
5292
5293    case 'h':
5294      /* Write the high-order 16 bits of a constant, sign-extended.  */
5295      if (!CONST_INT_P (x))
5296	output_operand_lossage ("invalid %%h value");
5297
5298      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5299      break;
5300
5301    case 'L':
5302      /* Write the low-order 16 bits of a constant, sign-extended.  */
5303      if (!CONST_INT_P (x))
5304	output_operand_lossage ("invalid %%L value");
5305
5306      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5307	       (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5308      break;
5309
5310    case 'm':
5311      /* Write mask for ZAP insn.  */
5312      if (GET_CODE (x) == CONST_DOUBLE)
5313	{
5314	  HOST_WIDE_INT mask = 0;
5315	  HOST_WIDE_INT value;
5316
5317	  value = CONST_DOUBLE_LOW (x);
5318	  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5319	       i++, value >>= 8)
5320	    if (value & 0xff)
5321	      mask |= (1 << i);
5322
5323	  value = CONST_DOUBLE_HIGH (x);
5324	  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5325	       i++, value >>= 8)
5326	    if (value & 0xff)
5327	      mask |= (1 << (i + sizeof (int)));
5328
5329	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
5330	}
5331
5332      else if (CONST_INT_P (x))
5333	{
5334	  HOST_WIDE_INT mask = 0, value = INTVAL (x);
5335
5336	  for (i = 0; i < 8; i++, value >>= 8)
5337	    if (value & 0xff)
5338	      mask |= (1 << i);
5339
5340	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5341	}
5342      else
5343	output_operand_lossage ("invalid %%m value");
5344      break;
5345
5346    case 'M':
5347      /* 'b', 'w', 'l', or 'q' as the value of the constant.  */
5348      if (!CONST_INT_P (x)
5349	  || (INTVAL (x) != 8 && INTVAL (x) != 16
5350	      && INTVAL (x) != 32 && INTVAL (x) != 64))
5351	output_operand_lossage ("invalid %%M value");
5352
5353      fprintf (file, "%s",
5354	       (INTVAL (x) == 8 ? "b"
5355		: INTVAL (x) == 16 ? "w"
5356		: INTVAL (x) == 32 ? "l"
5357		: "q"));
5358      break;
5359
5360    case 'U':
5361      /* Similar, except do it from the mask.  */
5362      if (CONST_INT_P (x))
5363	{
5364	  HOST_WIDE_INT value = INTVAL (x);
5365
5366	  if (value == 0xff)
5367	    {
5368	      fputc ('b', file);
5369	      break;
5370	    }
5371	  if (value == 0xffff)
5372	    {
5373	      fputc ('w', file);
5374	      break;
5375	    }
5376	  if (value == 0xffffffff)
5377	    {
5378	      fputc ('l', file);
5379	      break;
5380	    }
5381	  if (value == -1)
5382	    {
5383	      fputc ('q', file);
5384	      break;
5385	    }
5386	}
5387      else if (HOST_BITS_PER_WIDE_INT == 32
5388	       && GET_CODE (x) == CONST_DOUBLE
5389	       && CONST_DOUBLE_LOW (x) == 0xffffffff
5390	       && CONST_DOUBLE_HIGH (x) == 0)
5391	{
5392	  fputc ('l', file);
5393	  break;
5394	}
5395      output_operand_lossage ("invalid %%U value");
5396      break;
5397
5398    case 's':
5399      /* Write the constant value divided by 8.  */
5400      if (!CONST_INT_P (x)
5401	  || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5402	  || (INTVAL (x) & 7) != 0)
5403	output_operand_lossage ("invalid %%s value");
5404
5405      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5406      break;
5407
5408    case 'S':
5409      /* Same, except compute (64 - c) / 8 */
5410
5411      if (!CONST_INT_P (x)
5412	  && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5413	  && (INTVAL (x) & 7) != 8)
5414	output_operand_lossage ("invalid %%s value");
5415
5416      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5417      break;
5418
5419    case 'C': case 'D': case 'c': case 'd':
5420      /* Write out comparison name.  */
5421      {
5422	enum rtx_code c = GET_CODE (x);
5423
5424        if (!COMPARISON_P (x))
5425	  output_operand_lossage ("invalid %%C value");
5426
5427	else if (code == 'D')
5428	  c = reverse_condition (c);
5429	else if (code == 'c')
5430	  c = swap_condition (c);
5431	else if (code == 'd')
5432	  c = swap_condition (reverse_condition (c));
5433
5434        if (c == LEU)
5435	  fprintf (file, "ule");
5436        else if (c == LTU)
5437	  fprintf (file, "ult");
5438	else if (c == UNORDERED)
5439	  fprintf (file, "un");
5440        else
5441	  fprintf (file, "%s", GET_RTX_NAME (c));
5442      }
5443      break;
5444
5445    case 'E':
5446      /* Write the divide or modulus operator.  */
5447      switch (GET_CODE (x))
5448	{
5449	case DIV:
5450	  fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5451	  break;
5452	case UDIV:
5453	  fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5454	  break;
5455	case MOD:
5456	  fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5457	  break;
5458	case UMOD:
5459	  fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5460	  break;
5461	default:
5462	  output_operand_lossage ("invalid %%E value");
5463	  break;
5464	}
5465      break;
5466
5467    case 'A':
5468      /* Write "_u" for unaligned access.  */
5469      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5470	fprintf (file, "_u");
5471      break;
5472
5473    case 0:
5474      if (REG_P (x))
5475	fprintf (file, "%s", reg_names[REGNO (x)]);
5476      else if (MEM_P (x))
5477	output_address (XEXP (x, 0));
5478      else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5479	{
5480	  switch (XINT (XEXP (x, 0), 1))
5481	    {
5482	    case UNSPEC_DTPREL:
5483	    case UNSPEC_TPREL:
5484	      output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5485	      break;
5486	    default:
5487	      output_operand_lossage ("unknown relocation unspec");
5488	      break;
5489	    }
5490	}
5491      else
5492	output_addr_const (file, x);
5493      break;
5494
5495    default:
5496      output_operand_lossage ("invalid %%xn code");
5497    }
5498}
5499
5500void
5501print_operand_address (FILE *file, rtx addr)
5502{
5503  int basereg = 31;
5504  HOST_WIDE_INT offset = 0;
5505
5506  if (GET_CODE (addr) == AND)
5507    addr = XEXP (addr, 0);
5508
5509  if (GET_CODE (addr) == PLUS
5510      && CONST_INT_P (XEXP (addr, 1)))
5511    {
5512      offset = INTVAL (XEXP (addr, 1));
5513      addr = XEXP (addr, 0);
5514    }
5515
5516  if (GET_CODE (addr) == LO_SUM)
5517    {
5518      const char *reloc16, *reloclo;
5519      rtx op1 = XEXP (addr, 1);
5520
5521      if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5522	{
5523	  op1 = XEXP (op1, 0);
5524	  switch (XINT (op1, 1))
5525	    {
5526	    case UNSPEC_DTPREL:
5527	      reloc16 = NULL;
5528	      reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5529	      break;
5530	    case UNSPEC_TPREL:
5531	      reloc16 = NULL;
5532	      reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5533	      break;
5534	    default:
5535	      output_operand_lossage ("unknown relocation unspec");
5536	      return;
5537	    }
5538
5539	  output_addr_const (file, XVECEXP (op1, 0, 0));
5540	}
5541      else
5542	{
5543	  reloc16 = "gprel";
5544	  reloclo = "gprellow";
5545	  output_addr_const (file, op1);
5546	}
5547
5548      if (offset)
5549	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5550
5551      addr = XEXP (addr, 0);
5552      switch (GET_CODE (addr))
5553	{
5554	case REG:
5555	  basereg = REGNO (addr);
5556	  break;
5557
5558	case SUBREG:
5559	  basereg = subreg_regno (addr);
5560	  break;
5561
5562	default:
5563	  gcc_unreachable ();
5564	}
5565
5566      fprintf (file, "($%d)\t\t!%s", basereg,
5567	       (basereg == 29 ? reloc16 : reloclo));
5568      return;
5569    }
5570
5571  switch (GET_CODE (addr))
5572    {
5573    case REG:
5574      basereg = REGNO (addr);
5575      break;
5576
5577    case SUBREG:
5578      basereg = subreg_regno (addr);
5579      break;
5580
5581    case CONST_INT:
5582      offset = INTVAL (addr);
5583      break;
5584
5585    case SYMBOL_REF:
5586      gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5587      fprintf (file, "%s", XSTR (addr, 0));
5588      return;
5589
5590    case CONST:
5591      gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5592      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5593		  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5594      fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5595	       XSTR (XEXP (XEXP (addr, 0), 0), 0),
5596	       INTVAL (XEXP (XEXP (addr, 0), 1)));
5597      return;
5598
5599    default:
5600      output_operand_lossage ("invalid operand address");
5601      return;
5602    }
5603
5604  fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5605}
5606
5607/* Emit RTL insns to initialize the variable parts of a trampoline at
5608   M_TRAMP.  FNDECL is target function's decl.  CHAIN_VALUE is an rtx
5609   for the static chain value for the function.  */
5610
5611static void
5612alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5613{
5614  rtx fnaddr, mem, word1, word2;
5615
5616  fnaddr = XEXP (DECL_RTL (fndecl), 0);
5617
5618#ifdef POINTERS_EXTEND_UNSIGNED
5619  fnaddr = convert_memory_address (Pmode, fnaddr);
5620  chain_value = convert_memory_address (Pmode, chain_value);
5621#endif
5622
5623  if (TARGET_ABI_OPEN_VMS)
5624    {
5625      const char *fnname;
5626      char *trname;
5627
5628      /* Construct the name of the trampoline entry point.  */
5629      fnname = XSTR (fnaddr, 0);
5630      trname = (char *) alloca (strlen (fnname) + 5);
5631      strcpy (trname, fnname);
5632      strcat (trname, "..tr");
5633      fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5634      word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5635
5636      /* Trampoline (or "bounded") procedure descriptor is constructed from
5637	 the function's procedure descriptor with certain fields zeroed IAW
5638	 the VMS calling standard. This is stored in the first quadword.  */
5639      word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5640      word1 = expand_and (DImode, word1,
5641			  GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5642			  NULL);
5643    }
5644  else
5645    {
5646      /* These 4 instructions are:
5647	    ldq $1,24($27)
5648	    ldq $27,16($27)
5649	    jmp $31,($27),0
5650	    nop
5651	 We don't bother setting the HINT field of the jump; the nop
5652	 is merely there for padding.  */
5653      word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5654      word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5655    }
5656
5657  /* Store the first two words, as computed above.  */
5658  mem = adjust_address (m_tramp, DImode, 0);
5659  emit_move_insn (mem, word1);
5660  mem = adjust_address (m_tramp, DImode, 8);
5661  emit_move_insn (mem, word2);
5662
5663  /* Store function address and static chain value.  */
5664  mem = adjust_address (m_tramp, Pmode, 16);
5665  emit_move_insn (mem, fnaddr);
5666  mem = adjust_address (m_tramp, Pmode, 24);
5667  emit_move_insn (mem, chain_value);
5668
5669  if (TARGET_ABI_OSF)
5670    {
5671      emit_insn (gen_imb ());
5672#ifdef HAVE_ENABLE_EXECUTE_STACK
5673      emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5674			 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
5675#endif
5676    }
5677}
5678
5679/* Determine where to put an argument to a function.
5680   Value is zero to push the argument on the stack,
5681   or a hard register in which to store the argument.
5682
5683   MODE is the argument's machine mode.
5684   TYPE is the data type of the argument (as a tree).
5685    This is null for libcalls where that information may
5686    not be available.
5687   CUM is a variable of type CUMULATIVE_ARGS which gives info about
5688    the preceding args and about the function being called.
5689   NAMED is nonzero if this argument is a named parameter
5690    (otherwise it is an extra parameter matching an ellipsis).
5691
5692   On Alpha the first 6 words of args are normally in registers
5693   and the rest are pushed.  */
5694
5695static rtx
5696alpha_function_arg (cumulative_args_t cum_v, machine_mode mode,
5697		    const_tree type, bool named ATTRIBUTE_UNUSED)
5698{
5699  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5700  int basereg;
5701  int num_args;
5702
5703  /* Don't get confused and pass small structures in FP registers.  */
5704  if (type && AGGREGATE_TYPE_P (type))
5705    basereg = 16;
5706  else
5707    {
5708#ifdef ENABLE_CHECKING
5709      /* With alpha_split_complex_arg, we shouldn't see any raw complex
5710	 values here.  */
5711      gcc_assert (!COMPLEX_MODE_P (mode));
5712#endif
5713
5714      /* Set up defaults for FP operands passed in FP registers, and
5715	 integral operands passed in integer registers.  */
5716      if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5717	basereg = 32 + 16;
5718      else
5719	basereg = 16;
5720    }
5721
5722  /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5723     the two platforms, so we can't avoid conditional compilation.  */
5724#if TARGET_ABI_OPEN_VMS
5725    {
5726      if (mode == VOIDmode)
5727	return alpha_arg_info_reg_val (*cum);
5728
5729      num_args = cum->num_args;
5730      if (num_args >= 6
5731	  || targetm.calls.must_pass_in_stack (mode, type))
5732	return NULL_RTX;
5733    }
5734#elif TARGET_ABI_OSF
5735    {
5736      if (*cum >= 6)
5737	return NULL_RTX;
5738      num_args = *cum;
5739
5740      /* VOID is passed as a special flag for "last argument".  */
5741      if (type == void_type_node)
5742	basereg = 16;
5743      else if (targetm.calls.must_pass_in_stack (mode, type))
5744	return NULL_RTX;
5745    }
5746#else
5747#error Unhandled ABI
5748#endif
5749
5750  return gen_rtx_REG (mode, num_args + basereg);
5751}
5752
5753/* Update the data in CUM to advance over an argument
5754   of mode MODE and data type TYPE.
5755   (TYPE is null for libcalls where that information may not be available.)  */
5756
5757static void
5758alpha_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
5759			    const_tree type, bool named ATTRIBUTE_UNUSED)
5760{
5761  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5762  bool onstack = targetm.calls.must_pass_in_stack (mode, type);
5763  int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named);
5764
5765#if TARGET_ABI_OSF
5766  *cum += increment;
5767#else
5768  if (!onstack && cum->num_args < 6)
5769    cum->atypes[cum->num_args] = alpha_arg_type (mode);
5770  cum->num_args += increment;
5771#endif
5772}
5773
5774static int
5775alpha_arg_partial_bytes (cumulative_args_t cum_v,
5776			 machine_mode mode ATTRIBUTE_UNUSED,
5777			 tree type ATTRIBUTE_UNUSED,
5778			 bool named ATTRIBUTE_UNUSED)
5779{
5780  int words = 0;
5781  CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5782
5783#if TARGET_ABI_OPEN_VMS
5784  if (cum->num_args < 6
5785      && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
5786    words = 6 - cum->num_args;
5787#elif TARGET_ABI_OSF
5788  if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
5789    words = 6 - *cum;
5790#else
5791#error Unhandled ABI
5792#endif
5793
5794  return words * UNITS_PER_WORD;
5795}
5796
5797
5798/* Return true if TYPE must be returned in memory, instead of in registers.  */
5799
5800static bool
5801alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5802{
5803  machine_mode mode = VOIDmode;
5804  int size;
5805
5806  if (type)
5807    {
5808      mode = TYPE_MODE (type);
5809
5810      /* All aggregates are returned in memory, except on OpenVMS where
5811	 records that fit 64 bits should be returned by immediate value
5812	 as required by section 3.8.7.1 of the OpenVMS Calling Standard.  */
5813      if (TARGET_ABI_OPEN_VMS
5814	  && TREE_CODE (type) != ARRAY_TYPE
5815	  && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5816	return false;
5817
5818      if (AGGREGATE_TYPE_P (type))
5819	return true;
5820    }
5821
5822  size = GET_MODE_SIZE (mode);
5823  switch (GET_MODE_CLASS (mode))
5824    {
5825    case MODE_VECTOR_FLOAT:
5826      /* Pass all float vectors in memory, like an aggregate.  */
5827      return true;
5828
5829    case MODE_COMPLEX_FLOAT:
5830      /* We judge complex floats on the size of their element,
5831	 not the size of the whole type.  */
5832      size = GET_MODE_UNIT_SIZE (mode);
5833      break;
5834
5835    case MODE_INT:
5836    case MODE_FLOAT:
5837    case MODE_COMPLEX_INT:
5838    case MODE_VECTOR_INT:
5839      break;
5840
5841    default:
5842      /* ??? We get called on all sorts of random stuff from
5843	 aggregate_value_p.  We must return something, but it's not
5844	 clear what's safe to return.  Pretend it's a struct I
5845	 guess.  */
5846      return true;
5847    }
5848
5849  /* Otherwise types must fit in one register.  */
5850  return size > UNITS_PER_WORD;
5851}
5852
5853/* Return true if TYPE should be passed by invisible reference.  */
5854
5855static bool
5856alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5857			 machine_mode mode,
5858			 const_tree type ATTRIBUTE_UNUSED,
5859			 bool named ATTRIBUTE_UNUSED)
5860{
5861  return mode == TFmode || mode == TCmode;
5862}
5863
5864/* Define how to find the value returned by a function.  VALTYPE is the
5865   data type of the value (as a tree).  If the precise function being
5866   called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5867   MODE is set instead of VALTYPE for libcalls.
5868
5869   On Alpha the value is found in $0 for integer functions and
5870   $f0 for floating-point functions.  */
5871
5872rtx
5873function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5874		machine_mode mode)
5875{
5876  unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5877  enum mode_class mclass;
5878
5879  gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5880
5881  if (valtype)
5882    mode = TYPE_MODE (valtype);
5883
5884  mclass = GET_MODE_CLASS (mode);
5885  switch (mclass)
5886    {
5887    case MODE_INT:
5888      /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5889	 where we have them returning both SImode and DImode.  */
5890      if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5891        PROMOTE_MODE (mode, dummy, valtype);
5892      /* FALLTHRU */
5893
5894    case MODE_COMPLEX_INT:
5895    case MODE_VECTOR_INT:
5896      regnum = 0;
5897      break;
5898
5899    case MODE_FLOAT:
5900      regnum = 32;
5901      break;
5902
5903    case MODE_COMPLEX_FLOAT:
5904      {
5905	machine_mode cmode = GET_MODE_INNER (mode);
5906
5907	return gen_rtx_PARALLEL
5908	  (VOIDmode,
5909	   gen_rtvec (2,
5910		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5911				         const0_rtx),
5912		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5913				         GEN_INT (GET_MODE_SIZE (cmode)))));
5914      }
5915
5916    case MODE_RANDOM:
5917      /* We should only reach here for BLKmode on VMS.  */
5918      gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5919      regnum = 0;
5920      break;
5921
5922    default:
5923      gcc_unreachable ();
5924    }
5925
5926  return gen_rtx_REG (mode, regnum);
5927}
5928
5929/* TCmode complex values are passed by invisible reference.  We
5930   should not split these values.  */
5931
5932static bool
5933alpha_split_complex_arg (const_tree type)
5934{
5935  return TYPE_MODE (type) != TCmode;
5936}
5937
5938static tree
5939alpha_build_builtin_va_list (void)
5940{
5941  tree base, ofs, space, record, type_decl;
5942
5943  if (TARGET_ABI_OPEN_VMS)
5944    return ptr_type_node;
5945
5946  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5947  type_decl = build_decl (BUILTINS_LOCATION,
5948			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
5949  TYPE_STUB_DECL (record) = type_decl;
5950  TYPE_NAME (record) = type_decl;
5951
5952  /* C++? SET_IS_AGGR_TYPE (record, 1); */
5953
5954  /* Dummy field to prevent alignment warnings.  */
5955  space = build_decl (BUILTINS_LOCATION,
5956		      FIELD_DECL, NULL_TREE, integer_type_node);
5957  DECL_FIELD_CONTEXT (space) = record;
5958  DECL_ARTIFICIAL (space) = 1;
5959  DECL_IGNORED_P (space) = 1;
5960
5961  ofs = build_decl (BUILTINS_LOCATION,
5962		    FIELD_DECL, get_identifier ("__offset"),
5963		    integer_type_node);
5964  DECL_FIELD_CONTEXT (ofs) = record;
5965  DECL_CHAIN (ofs) = space;
5966  /* ??? This is a hack, __offset is marked volatile to prevent
5967     DCE that confuses stdarg optimization and results in
5968     gcc.c-torture/execute/stdarg-1.c failure.  See PR 41089.  */
5969  TREE_THIS_VOLATILE (ofs) = 1;
5970
5971  base = build_decl (BUILTINS_LOCATION,
5972		     FIELD_DECL, get_identifier ("__base"),
5973		     ptr_type_node);
5974  DECL_FIELD_CONTEXT (base) = record;
5975  DECL_CHAIN (base) = ofs;
5976
5977  TYPE_FIELDS (record) = base;
5978  layout_type (record);
5979
5980  va_list_gpr_counter_field = ofs;
5981  return record;
5982}
5983
5984#if TARGET_ABI_OSF
5985/* Helper function for alpha_stdarg_optimize_hook.  Skip over casts
5986   and constant additions.  */
5987
5988static gimple
5989va_list_skip_additions (tree lhs)
5990{
5991  gimple stmt;
5992
5993  for (;;)
5994    {
5995      enum tree_code code;
5996
5997      stmt = SSA_NAME_DEF_STMT (lhs);
5998
5999      if (gimple_code (stmt) == GIMPLE_PHI)
6000	return stmt;
6001
6002      if (!is_gimple_assign (stmt)
6003	  || gimple_assign_lhs (stmt) != lhs)
6004	return NULL;
6005
6006      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
6007	return stmt;
6008      code = gimple_assign_rhs_code (stmt);
6009      if (!CONVERT_EXPR_CODE_P (code)
6010	  && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
6011	      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
6012	      || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
6013	return stmt;
6014
6015      lhs = gimple_assign_rhs1 (stmt);
6016    }
6017}
6018
6019/* Check if LHS = RHS statement is
6020   LHS = *(ap.__base + ap.__offset + cst)
6021   or
6022   LHS = *(ap.__base
6023	   + ((ap.__offset + cst <= 47)
6024	      ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
6025   If the former, indicate that GPR registers are needed,
6026   if the latter, indicate that FPR registers are needed.
6027
6028   Also look for LHS = (*ptr).field, where ptr is one of the forms
6029   listed above.
6030
6031   On alpha, cfun->va_list_gpr_size is used as size of the needed
6032   regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
6033   registers are needed and bit 1 set if FPR registers are needed.
6034   Return true if va_list references should not be scanned for the
6035   current statement.  */
6036
6037static bool
6038alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
6039{
6040  tree base, offset, rhs;
6041  int offset_arg = 1;
6042  gimple base_stmt;
6043
6044  if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
6045      != GIMPLE_SINGLE_RHS)
6046    return false;
6047
6048  rhs = gimple_assign_rhs1 (stmt);
6049  while (handled_component_p (rhs))
6050    rhs = TREE_OPERAND (rhs, 0);
6051  if (TREE_CODE (rhs) != MEM_REF
6052      || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
6053    return false;
6054
6055  stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
6056  if (stmt == NULL
6057      || !is_gimple_assign (stmt)
6058      || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
6059    return false;
6060
6061  base = gimple_assign_rhs1 (stmt);
6062  if (TREE_CODE (base) == SSA_NAME)
6063    {
6064      base_stmt = va_list_skip_additions (base);
6065      if (base_stmt
6066	  && is_gimple_assign (base_stmt)
6067	  && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6068	base = gimple_assign_rhs1 (base_stmt);
6069    }
6070
6071  if (TREE_CODE (base) != COMPONENT_REF
6072      || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6073    {
6074      base = gimple_assign_rhs2 (stmt);
6075      if (TREE_CODE (base) == SSA_NAME)
6076	{
6077	  base_stmt = va_list_skip_additions (base);
6078	  if (base_stmt
6079	      && is_gimple_assign (base_stmt)
6080	      && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6081	    base = gimple_assign_rhs1 (base_stmt);
6082	}
6083
6084      if (TREE_CODE (base) != COMPONENT_REF
6085	  || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6086	return false;
6087
6088      offset_arg = 0;
6089    }
6090
6091  base = get_base_address (base);
6092  if (TREE_CODE (base) != VAR_DECL
6093      || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
6094    return false;
6095
6096  offset = gimple_op (stmt, 1 + offset_arg);
6097  if (TREE_CODE (offset) == SSA_NAME)
6098    {
6099      gimple offset_stmt = va_list_skip_additions (offset);
6100
6101      if (offset_stmt
6102	  && gimple_code (offset_stmt) == GIMPLE_PHI)
6103	{
6104	  HOST_WIDE_INT sub;
6105	  gimple arg1_stmt, arg2_stmt;
6106	  tree arg1, arg2;
6107	  enum tree_code code1, code2;
6108
6109	  if (gimple_phi_num_args (offset_stmt) != 2)
6110	    goto escapes;
6111
6112	  arg1_stmt
6113	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
6114	  arg2_stmt
6115	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
6116	  if (arg1_stmt == NULL
6117	      || !is_gimple_assign (arg1_stmt)
6118	      || arg2_stmt == NULL
6119	      || !is_gimple_assign (arg2_stmt))
6120	    goto escapes;
6121
6122	  code1 = gimple_assign_rhs_code (arg1_stmt);
6123	  code2 = gimple_assign_rhs_code (arg2_stmt);
6124	  if (code1 == COMPONENT_REF
6125	      && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
6126	    /* Do nothing.  */;
6127	  else if (code2 == COMPONENT_REF
6128		   && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
6129	    {
6130	      gimple tem = arg1_stmt;
6131	      code2 = code1;
6132	      arg1_stmt = arg2_stmt;
6133	      arg2_stmt = tem;
6134	    }
6135	  else
6136	    goto escapes;
6137
6138	  if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
6139	    goto escapes;
6140
6141	  sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
6142	  if (code2 == MINUS_EXPR)
6143	    sub = -sub;
6144	  if (sub < -48 || sub > -32)
6145	    goto escapes;
6146
6147	  arg1 = gimple_assign_rhs1 (arg1_stmt);
6148	  arg2 = gimple_assign_rhs1 (arg2_stmt);
6149	  if (TREE_CODE (arg2) == SSA_NAME)
6150	    {
6151	      arg2_stmt = va_list_skip_additions (arg2);
6152	      if (arg2_stmt == NULL
6153		  || !is_gimple_assign (arg2_stmt)
6154		  || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6155		goto escapes;
6156	      arg2 = gimple_assign_rhs1 (arg2_stmt);
6157	    }
6158	  if (arg1 != arg2)
6159	    goto escapes;
6160
6161	  if (TREE_CODE (arg1) != COMPONENT_REF
6162	      || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6163	      || get_base_address (arg1) != base)
6164	    goto escapes;
6165
6166	  /* Need floating point regs.  */
6167	  cfun->va_list_fpr_size |= 2;
6168	  return false;
6169	}
6170      if (offset_stmt
6171	  && is_gimple_assign (offset_stmt)
6172	  && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6173	offset = gimple_assign_rhs1 (offset_stmt);
6174    }
6175  if (TREE_CODE (offset) != COMPONENT_REF
6176      || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6177      || get_base_address (offset) != base)
6178    goto escapes;
6179  else
6180    /* Need general regs.  */
6181    cfun->va_list_fpr_size |= 1;
6182  return false;
6183
6184escapes:
6185  si->va_list_escapes = true;
6186  return false;
6187}
6188#endif
6189
6190/* Perform any needed actions needed for a function that is receiving a
6191   variable number of arguments.  */
6192
6193static void
6194alpha_setup_incoming_varargs (cumulative_args_t pcum, machine_mode mode,
6195			      tree type, int *pretend_size, int no_rtl)
6196{
6197  CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6198
6199  /* Skip the current argument.  */
6200  targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type,
6201				      true);
6202
6203#if TARGET_ABI_OPEN_VMS
6204  /* For VMS, we allocate space for all 6 arg registers plus a count.
6205
6206     However, if NO registers need to be saved, don't allocate any space.
6207     This is not only because we won't need the space, but because AP
6208     includes the current_pretend_args_size and we don't want to mess up
6209     any ap-relative addresses already made.  */
6210  if (cum.num_args < 6)
6211    {
6212      if (!no_rtl)
6213	{
6214	  emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6215	  emit_insn (gen_arg_home ());
6216	}
6217      *pretend_size = 7 * UNITS_PER_WORD;
6218    }
6219#else
6220  /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6221     only push those that are remaining.  However, if NO registers need to
6222     be saved, don't allocate any space.  This is not only because we won't
6223     need the space, but because AP includes the current_pretend_args_size
6224     and we don't want to mess up any ap-relative addresses already made.
6225
6226     If we are not to use the floating-point registers, save the integer
6227     registers where we would put the floating-point registers.  This is
6228     not the most efficient way to implement varargs with just one register
6229     class, but it isn't worth doing anything more efficient in this rare
6230     case.  */
6231  if (cum >= 6)
6232    return;
6233
6234  if (!no_rtl)
6235    {
6236      int count;
6237      alias_set_type set = get_varargs_alias_set ();
6238      rtx tmp;
6239
6240      count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6241      if (count > 6 - cum)
6242	count = 6 - cum;
6243
6244      /* Detect whether integer registers or floating-point registers
6245	 are needed by the detected va_arg statements.  See above for
6246	 how these values are computed.  Note that the "escape" value
6247	 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6248	 these bits set.  */
6249      gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6250
6251      if (cfun->va_list_fpr_size & 1)
6252	{
6253	  tmp = gen_rtx_MEM (BLKmode,
6254			     plus_constant (Pmode, virtual_incoming_args_rtx,
6255					    (cum + 6) * UNITS_PER_WORD));
6256	  MEM_NOTRAP_P (tmp) = 1;
6257	  set_mem_alias_set (tmp, set);
6258	  move_block_from_reg (16 + cum, tmp, count);
6259	}
6260
6261      if (cfun->va_list_fpr_size & 2)
6262	{
6263	  tmp = gen_rtx_MEM (BLKmode,
6264			     plus_constant (Pmode, virtual_incoming_args_rtx,
6265					    cum * UNITS_PER_WORD));
6266	  MEM_NOTRAP_P (tmp) = 1;
6267	  set_mem_alias_set (tmp, set);
6268	  move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6269	}
6270     }
6271  *pretend_size = 12 * UNITS_PER_WORD;
6272#endif
6273}
6274
6275static void
6276alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6277{
6278  HOST_WIDE_INT offset;
6279  tree t, offset_field, base_field;
6280
6281  if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6282    return;
6283
6284  /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6285     up by 48, storing fp arg registers in the first 48 bytes, and the
6286     integer arg registers in the next 48 bytes.  This is only done,
6287     however, if any integer registers need to be stored.
6288
6289     If no integer registers need be stored, then we must subtract 48
6290     in order to account for the integer arg registers which are counted
6291     in argsize above, but which are not actually stored on the stack.
6292     Must further be careful here about structures straddling the last
6293     integer argument register; that futzes with pretend_args_size,
6294     which changes the meaning of AP.  */
6295
6296  if (NUM_ARGS < 6)
6297    offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6298  else
6299    offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6300
6301  if (TARGET_ABI_OPEN_VMS)
6302    {
6303      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6304      t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6305      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6306      TREE_SIDE_EFFECTS (t) = 1;
6307      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6308    }
6309  else
6310    {
6311      base_field = TYPE_FIELDS (TREE_TYPE (valist));
6312      offset_field = DECL_CHAIN (base_field);
6313
6314      base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6315			   valist, base_field, NULL_TREE);
6316      offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6317			     valist, offset_field, NULL_TREE);
6318
6319      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6320      t = fold_build_pointer_plus_hwi (t, offset);
6321      t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6322      TREE_SIDE_EFFECTS (t) = 1;
6323      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6324
6325      t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6326      t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6327      TREE_SIDE_EFFECTS (t) = 1;
6328      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6329    }
6330}
6331
6332static tree
6333alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6334			 gimple_seq *pre_p)
6335{
6336  tree type_size, ptr_type, addend, t, addr;
6337  gimple_seq internal_post;
6338
6339  /* If the type could not be passed in registers, skip the block
6340     reserved for the registers.  */
6341  if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6342    {
6343      t = build_int_cst (TREE_TYPE (offset), 6*8);
6344      gimplify_assign (offset,
6345		       build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6346		       pre_p);
6347    }
6348
6349  addend = offset;
6350  ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6351
6352  if (TREE_CODE (type) == COMPLEX_TYPE)
6353    {
6354      tree real_part, imag_part, real_temp;
6355
6356      real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6357					   offset, pre_p);
6358
6359      /* Copy the value into a new temporary, lest the formal temporary
6360	 be reused out from under us.  */
6361      real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6362
6363      imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6364					   offset, pre_p);
6365
6366      return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6367    }
6368  else if (TREE_CODE (type) == REAL_TYPE)
6369    {
6370      tree fpaddend, cond, fourtyeight;
6371
6372      fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6373      fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6374			      addend, fourtyeight);
6375      cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6376      addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6377			    fpaddend, addend);
6378    }
6379
6380  /* Build the final address and force that value into a temporary.  */
6381  addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6382  internal_post = NULL;
6383  gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6384  gimple_seq_add_seq (pre_p, internal_post);
6385
6386  /* Update the offset field.  */
6387  type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6388  if (type_size == NULL || TREE_OVERFLOW (type_size))
6389    t = size_zero_node;
6390  else
6391    {
6392      t = size_binop (PLUS_EXPR, type_size, size_int (7));
6393      t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6394      t = size_binop (MULT_EXPR, t, size_int (8));
6395    }
6396  t = fold_convert (TREE_TYPE (offset), t);
6397  gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6398      		   pre_p);
6399
6400  return build_va_arg_indirect_ref (addr);
6401}
6402
6403static tree
6404alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6405		       gimple_seq *post_p)
6406{
6407  tree offset_field, base_field, offset, base, t, r;
6408  bool indirect;
6409
6410  if (TARGET_ABI_OPEN_VMS)
6411    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6412
6413  base_field = TYPE_FIELDS (va_list_type_node);
6414  offset_field = DECL_CHAIN (base_field);
6415  base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6416		       valist, base_field, NULL_TREE);
6417  offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6418			 valist, offset_field, NULL_TREE);
6419
6420  /* Pull the fields of the structure out into temporaries.  Since we never
6421     modify the base field, we can use a formal temporary.  Sign-extend the
6422     offset field so that it's the proper width for pointer arithmetic.  */
6423  base = get_formal_tmp_var (base_field, pre_p);
6424
6425  t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6426  offset = get_initialized_tmp_var (t, pre_p, NULL);
6427
6428  indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6429  if (indirect)
6430    type = build_pointer_type_for_mode (type, ptr_mode, true);
6431
6432  /* Find the value.  Note that this will be a stable indirection, or
6433     a composite of stable indirections in the case of complex.  */
6434  r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6435
6436  /* Stuff the offset temporary back into its field.  */
6437  gimplify_assign (unshare_expr (offset_field),
6438		   fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6439
6440  if (indirect)
6441    r = build_va_arg_indirect_ref (r);
6442
6443  return r;
6444}
6445
6446/* Builtins.  */
6447
6448enum alpha_builtin
6449{
6450  ALPHA_BUILTIN_CMPBGE,
6451  ALPHA_BUILTIN_EXTBL,
6452  ALPHA_BUILTIN_EXTWL,
6453  ALPHA_BUILTIN_EXTLL,
6454  ALPHA_BUILTIN_EXTQL,
6455  ALPHA_BUILTIN_EXTWH,
6456  ALPHA_BUILTIN_EXTLH,
6457  ALPHA_BUILTIN_EXTQH,
6458  ALPHA_BUILTIN_INSBL,
6459  ALPHA_BUILTIN_INSWL,
6460  ALPHA_BUILTIN_INSLL,
6461  ALPHA_BUILTIN_INSQL,
6462  ALPHA_BUILTIN_INSWH,
6463  ALPHA_BUILTIN_INSLH,
6464  ALPHA_BUILTIN_INSQH,
6465  ALPHA_BUILTIN_MSKBL,
6466  ALPHA_BUILTIN_MSKWL,
6467  ALPHA_BUILTIN_MSKLL,
6468  ALPHA_BUILTIN_MSKQL,
6469  ALPHA_BUILTIN_MSKWH,
6470  ALPHA_BUILTIN_MSKLH,
6471  ALPHA_BUILTIN_MSKQH,
6472  ALPHA_BUILTIN_UMULH,
6473  ALPHA_BUILTIN_ZAP,
6474  ALPHA_BUILTIN_ZAPNOT,
6475  ALPHA_BUILTIN_AMASK,
6476  ALPHA_BUILTIN_IMPLVER,
6477  ALPHA_BUILTIN_RPCC,
6478  ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6479  ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6480
6481  /* TARGET_MAX */
6482  ALPHA_BUILTIN_MINUB8,
6483  ALPHA_BUILTIN_MINSB8,
6484  ALPHA_BUILTIN_MINUW4,
6485  ALPHA_BUILTIN_MINSW4,
6486  ALPHA_BUILTIN_MAXUB8,
6487  ALPHA_BUILTIN_MAXSB8,
6488  ALPHA_BUILTIN_MAXUW4,
6489  ALPHA_BUILTIN_MAXSW4,
6490  ALPHA_BUILTIN_PERR,
6491  ALPHA_BUILTIN_PKLB,
6492  ALPHA_BUILTIN_PKWB,
6493  ALPHA_BUILTIN_UNPKBL,
6494  ALPHA_BUILTIN_UNPKBW,
6495
6496  /* TARGET_CIX */
6497  ALPHA_BUILTIN_CTTZ,
6498  ALPHA_BUILTIN_CTLZ,
6499  ALPHA_BUILTIN_CTPOP,
6500
6501  ALPHA_BUILTIN_max
6502};
6503
6504static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6505  CODE_FOR_builtin_cmpbge,
6506  CODE_FOR_extbl,
6507  CODE_FOR_extwl,
6508  CODE_FOR_extll,
6509  CODE_FOR_extql,
6510  CODE_FOR_extwh,
6511  CODE_FOR_extlh,
6512  CODE_FOR_extqh,
6513  CODE_FOR_builtin_insbl,
6514  CODE_FOR_builtin_inswl,
6515  CODE_FOR_builtin_insll,
6516  CODE_FOR_insql,
6517  CODE_FOR_inswh,
6518  CODE_FOR_inslh,
6519  CODE_FOR_insqh,
6520  CODE_FOR_mskbl,
6521  CODE_FOR_mskwl,
6522  CODE_FOR_mskll,
6523  CODE_FOR_mskql,
6524  CODE_FOR_mskwh,
6525  CODE_FOR_msklh,
6526  CODE_FOR_mskqh,
6527  CODE_FOR_umuldi3_highpart,
6528  CODE_FOR_builtin_zap,
6529  CODE_FOR_builtin_zapnot,
6530  CODE_FOR_builtin_amask,
6531  CODE_FOR_builtin_implver,
6532  CODE_FOR_builtin_rpcc,
6533  CODE_FOR_builtin_establish_vms_condition_handler,
6534  CODE_FOR_builtin_revert_vms_condition_handler,
6535
6536  /* TARGET_MAX */
6537  CODE_FOR_builtin_minub8,
6538  CODE_FOR_builtin_minsb8,
6539  CODE_FOR_builtin_minuw4,
6540  CODE_FOR_builtin_minsw4,
6541  CODE_FOR_builtin_maxub8,
6542  CODE_FOR_builtin_maxsb8,
6543  CODE_FOR_builtin_maxuw4,
6544  CODE_FOR_builtin_maxsw4,
6545  CODE_FOR_builtin_perr,
6546  CODE_FOR_builtin_pklb,
6547  CODE_FOR_builtin_pkwb,
6548  CODE_FOR_builtin_unpkbl,
6549  CODE_FOR_builtin_unpkbw,
6550
6551  /* TARGET_CIX */
6552  CODE_FOR_ctzdi2,
6553  CODE_FOR_clzdi2,
6554  CODE_FOR_popcountdi2
6555};
6556
6557struct alpha_builtin_def
6558{
6559  const char *name;
6560  enum alpha_builtin code;
6561  unsigned int target_mask;
6562  bool is_const;
6563};
6564
6565static struct alpha_builtin_def const zero_arg_builtins[] = {
6566  { "__builtin_alpha_implver",	ALPHA_BUILTIN_IMPLVER,	0, true },
6567  { "__builtin_alpha_rpcc",	ALPHA_BUILTIN_RPCC,	0, false }
6568};
6569
6570static struct alpha_builtin_def const one_arg_builtins[] = {
6571  { "__builtin_alpha_amask",	ALPHA_BUILTIN_AMASK,	0, true },
6572  { "__builtin_alpha_pklb",	ALPHA_BUILTIN_PKLB,	MASK_MAX, true },
6573  { "__builtin_alpha_pkwb",	ALPHA_BUILTIN_PKWB,	MASK_MAX, true },
6574  { "__builtin_alpha_unpkbl",	ALPHA_BUILTIN_UNPKBL,	MASK_MAX, true },
6575  { "__builtin_alpha_unpkbw",	ALPHA_BUILTIN_UNPKBW,	MASK_MAX, true },
6576  { "__builtin_alpha_cttz",	ALPHA_BUILTIN_CTTZ,	MASK_CIX, true },
6577  { "__builtin_alpha_ctlz",	ALPHA_BUILTIN_CTLZ,	MASK_CIX, true },
6578  { "__builtin_alpha_ctpop",	ALPHA_BUILTIN_CTPOP,	MASK_CIX, true }
6579};
6580
6581static struct alpha_builtin_def const two_arg_builtins[] = {
6582  { "__builtin_alpha_cmpbge",	ALPHA_BUILTIN_CMPBGE,	0, true },
6583  { "__builtin_alpha_extbl",	ALPHA_BUILTIN_EXTBL,	0, true },
6584  { "__builtin_alpha_extwl",	ALPHA_BUILTIN_EXTWL,	0, true },
6585  { "__builtin_alpha_extll",	ALPHA_BUILTIN_EXTLL,	0, true },
6586  { "__builtin_alpha_extql",	ALPHA_BUILTIN_EXTQL,	0, true },
6587  { "__builtin_alpha_extwh",	ALPHA_BUILTIN_EXTWH,	0, true },
6588  { "__builtin_alpha_extlh",	ALPHA_BUILTIN_EXTLH,	0, true },
6589  { "__builtin_alpha_extqh",	ALPHA_BUILTIN_EXTQH,	0, true },
6590  { "__builtin_alpha_insbl",	ALPHA_BUILTIN_INSBL,	0, true },
6591  { "__builtin_alpha_inswl",	ALPHA_BUILTIN_INSWL,	0, true },
6592  { "__builtin_alpha_insll",	ALPHA_BUILTIN_INSLL,	0, true },
6593  { "__builtin_alpha_insql",	ALPHA_BUILTIN_INSQL,	0, true },
6594  { "__builtin_alpha_inswh",	ALPHA_BUILTIN_INSWH,	0, true },
6595  { "__builtin_alpha_inslh",	ALPHA_BUILTIN_INSLH,	0, true },
6596  { "__builtin_alpha_insqh",	ALPHA_BUILTIN_INSQH,	0, true },
6597  { "__builtin_alpha_mskbl",	ALPHA_BUILTIN_MSKBL,	0, true },
6598  { "__builtin_alpha_mskwl",	ALPHA_BUILTIN_MSKWL,	0, true },
6599  { "__builtin_alpha_mskll",	ALPHA_BUILTIN_MSKLL,	0, true },
6600  { "__builtin_alpha_mskql",	ALPHA_BUILTIN_MSKQL,	0, true },
6601  { "__builtin_alpha_mskwh",	ALPHA_BUILTIN_MSKWH,	0, true },
6602  { "__builtin_alpha_msklh",	ALPHA_BUILTIN_MSKLH,	0, true },
6603  { "__builtin_alpha_mskqh",	ALPHA_BUILTIN_MSKQH,	0, true },
6604  { "__builtin_alpha_umulh",	ALPHA_BUILTIN_UMULH,	0, true },
6605  { "__builtin_alpha_zap",	ALPHA_BUILTIN_ZAP,	0, true },
6606  { "__builtin_alpha_zapnot",	ALPHA_BUILTIN_ZAPNOT,	0, true },
6607  { "__builtin_alpha_minub8",	ALPHA_BUILTIN_MINUB8,	MASK_MAX, true },
6608  { "__builtin_alpha_minsb8",	ALPHA_BUILTIN_MINSB8,	MASK_MAX, true },
6609  { "__builtin_alpha_minuw4",	ALPHA_BUILTIN_MINUW4,	MASK_MAX, true },
6610  { "__builtin_alpha_minsw4",	ALPHA_BUILTIN_MINSW4,	MASK_MAX, true },
6611  { "__builtin_alpha_maxub8",	ALPHA_BUILTIN_MAXUB8,	MASK_MAX, true },
6612  { "__builtin_alpha_maxsb8",	ALPHA_BUILTIN_MAXSB8,	MASK_MAX, true },
6613  { "__builtin_alpha_maxuw4",	ALPHA_BUILTIN_MAXUW4,	MASK_MAX, true },
6614  { "__builtin_alpha_maxsw4",	ALPHA_BUILTIN_MAXSW4,	MASK_MAX, true },
6615  { "__builtin_alpha_perr",	ALPHA_BUILTIN_PERR,	MASK_MAX, true }
6616};
6617
6618static GTY(()) tree alpha_dimode_u;
6619static GTY(()) tree alpha_v8qi_u;
6620static GTY(()) tree alpha_v8qi_s;
6621static GTY(()) tree alpha_v4hi_u;
6622static GTY(()) tree alpha_v4hi_s;
6623
6624static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6625
6626/* Return the alpha builtin for CODE.  */
6627
6628static tree
6629alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6630{
6631  if (code >= ALPHA_BUILTIN_max)
6632    return error_mark_node;
6633  return alpha_builtins[code];
6634}
6635
6636/* Helper function of alpha_init_builtins.  Add the built-in specified
6637   by NAME, TYPE, CODE, and ECF.  */
6638
6639static void
6640alpha_builtin_function (const char *name, tree ftype,
6641			enum alpha_builtin code, unsigned ecf)
6642{
6643  tree decl = add_builtin_function (name, ftype, (int) code,
6644				    BUILT_IN_MD, NULL, NULL_TREE);
6645
6646  if (ecf & ECF_CONST)
6647    TREE_READONLY (decl) = 1;
6648  if (ecf & ECF_NOTHROW)
6649    TREE_NOTHROW (decl) = 1;
6650
6651  alpha_builtins [(int) code] = decl;
6652}
6653
6654/* Helper function of alpha_init_builtins.  Add the COUNT built-in
6655   functions pointed to by P, with function type FTYPE.  */
6656
6657static void
6658alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6659		    tree ftype)
6660{
6661  size_t i;
6662
6663  for (i = 0; i < count; ++i, ++p)
6664    if ((target_flags & p->target_mask) == p->target_mask)
6665      alpha_builtin_function (p->name, ftype, p->code,
6666			      (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6667}
6668
6669static void
6670alpha_init_builtins (void)
6671{
6672  tree ftype;
6673
6674  alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6675  alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6676  alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6677  alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6678  alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6679
6680  ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6681  alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6682
6683  ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6684  alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6685
6686  ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6687				    alpha_dimode_u, NULL_TREE);
6688  alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6689
6690  if (TARGET_ABI_OPEN_VMS)
6691    {
6692      ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6693					NULL_TREE);
6694      alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6695			      ftype,
6696			      ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6697			      0);
6698
6699      ftype = build_function_type_list (ptr_type_node, void_type_node,
6700					NULL_TREE);
6701      alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6702			      ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6703
6704      vms_patch_builtins ();
6705    }
6706}
6707
6708/* Expand an expression EXP that calls a built-in function,
6709   with result going to TARGET if that's convenient
6710   (and in mode MODE if that's convenient).
6711   SUBTARGET may be used as the target for computing one of EXP's operands.
6712   IGNORE is nonzero if the value is to be ignored.  */
6713
6714static rtx
6715alpha_expand_builtin (tree exp, rtx target,
6716		      rtx subtarget ATTRIBUTE_UNUSED,
6717		      machine_mode mode ATTRIBUTE_UNUSED,
6718		      int ignore ATTRIBUTE_UNUSED)
6719{
6720#define MAX_ARGS 2
6721
6722  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6723  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6724  tree arg;
6725  call_expr_arg_iterator iter;
6726  enum insn_code icode;
6727  rtx op[MAX_ARGS], pat;
6728  int arity;
6729  bool nonvoid;
6730
6731  if (fcode >= ALPHA_BUILTIN_max)
6732    internal_error ("bad builtin fcode");
6733  icode = code_for_builtin[fcode];
6734  if (icode == 0)
6735    internal_error ("bad builtin fcode");
6736
6737  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6738
6739  arity = 0;
6740  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6741    {
6742      const struct insn_operand_data *insn_op;
6743
6744      if (arg == error_mark_node)
6745	return NULL_RTX;
6746      if (arity > MAX_ARGS)
6747	return NULL_RTX;
6748
6749      insn_op = &insn_data[icode].operand[arity + nonvoid];
6750
6751      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6752
6753      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6754	op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6755      arity++;
6756    }
6757
6758  if (nonvoid)
6759    {
6760      machine_mode tmode = insn_data[icode].operand[0].mode;
6761      if (!target
6762	  || GET_MODE (target) != tmode
6763	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6764	target = gen_reg_rtx (tmode);
6765    }
6766
6767  switch (arity)
6768    {
6769    case 0:
6770      pat = GEN_FCN (icode) (target);
6771      break;
6772    case 1:
6773      if (nonvoid)
6774        pat = GEN_FCN (icode) (target, op[0]);
6775      else
6776	pat = GEN_FCN (icode) (op[0]);
6777      break;
6778    case 2:
6779      pat = GEN_FCN (icode) (target, op[0], op[1]);
6780      break;
6781    default:
6782      gcc_unreachable ();
6783    }
6784  if (!pat)
6785    return NULL_RTX;
6786  emit_insn (pat);
6787
6788  if (nonvoid)
6789    return target;
6790  else
6791    return const0_rtx;
6792}
6793
6794
6795/* Several bits below assume HWI >= 64 bits.  This should be enforced
6796   by config.gcc.  */
6797#if HOST_BITS_PER_WIDE_INT < 64
6798# error "HOST_WIDE_INT too small"
6799#endif
6800
6801/* Fold the builtin for the CMPBGE instruction.  This is a vector comparison
6802   with an 8-bit output vector.  OPINT contains the integer operands; bit N
6803   of OP_CONST is set if OPINT[N] is valid.  */
6804
6805static tree
6806alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6807{
6808  if (op_const == 3)
6809    {
6810      int i, val;
6811      for (i = 0, val = 0; i < 8; ++i)
6812	{
6813	  unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6814	  unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6815	  if (c0 >= c1)
6816	    val |= 1 << i;
6817	}
6818      return build_int_cst (alpha_dimode_u, val);
6819    }
6820  else if (op_const == 2 && opint[1] == 0)
6821    return build_int_cst (alpha_dimode_u, 0xff);
6822  return NULL;
6823}
6824
6825/* Fold the builtin for the ZAPNOT instruction.  This is essentially a
6826   specialized form of an AND operation.  Other byte manipulation instructions
6827   are defined in terms of this instruction, so this is also used as a
6828   subroutine for other builtins.
6829
6830   OP contains the tree operands; OPINT contains the extracted integer values.
6831   Bit N of OP_CONST it set if OPINT[N] is valid.  OP may be null if only
6832   OPINT may be considered.  */
6833
6834static tree
6835alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6836			   long op_const)
6837{
6838  if (op_const & 2)
6839    {
6840      unsigned HOST_WIDE_INT mask = 0;
6841      int i;
6842
6843      for (i = 0; i < 8; ++i)
6844	if ((opint[1] >> i) & 1)
6845	  mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6846
6847      if (op_const & 1)
6848	return build_int_cst (alpha_dimode_u, opint[0] & mask);
6849
6850      if (op)
6851	return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6852			    build_int_cst (alpha_dimode_u, mask));
6853    }
6854  else if ((op_const & 1) && opint[0] == 0)
6855    return build_int_cst (alpha_dimode_u, 0);
6856  return NULL;
6857}
6858
6859/* Fold the builtins for the EXT family of instructions.  */
6860
6861static tree
6862alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6863			  long op_const, unsigned HOST_WIDE_INT bytemask,
6864			  bool is_high)
6865{
6866  long zap_const = 2;
6867  tree *zap_op = NULL;
6868
6869  if (op_const & 2)
6870    {
6871      unsigned HOST_WIDE_INT loc;
6872
6873      loc = opint[1] & 7;
6874      loc *= BITS_PER_UNIT;
6875
6876      if (loc != 0)
6877	{
6878	  if (op_const & 1)
6879	    {
6880	      unsigned HOST_WIDE_INT temp = opint[0];
6881	      if (is_high)
6882		temp <<= loc;
6883	      else
6884		temp >>= loc;
6885	      opint[0] = temp;
6886	      zap_const = 3;
6887	    }
6888	}
6889      else
6890	zap_op = op;
6891    }
6892
6893  opint[1] = bytemask;
6894  return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6895}
6896
6897/* Fold the builtins for the INS family of instructions.  */
6898
6899static tree
6900alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6901			  long op_const, unsigned HOST_WIDE_INT bytemask,
6902			  bool is_high)
6903{
6904  if ((op_const & 1) && opint[0] == 0)
6905    return build_int_cst (alpha_dimode_u, 0);
6906
6907  if (op_const & 2)
6908    {
6909      unsigned HOST_WIDE_INT temp, loc, byteloc;
6910      tree *zap_op = NULL;
6911
6912      loc = opint[1] & 7;
6913      bytemask <<= loc;
6914
6915      temp = opint[0];
6916      if (is_high)
6917	{
6918	  byteloc = (64 - (loc * 8)) & 0x3f;
6919	  if (byteloc == 0)
6920	    zap_op = op;
6921	  else
6922	    temp >>= byteloc;
6923	  bytemask >>= 8;
6924	}
6925      else
6926	{
6927	  byteloc = loc * 8;
6928	  if (byteloc == 0)
6929	    zap_op = op;
6930	  else
6931	    temp <<= byteloc;
6932	}
6933
6934      opint[0] = temp;
6935      opint[1] = bytemask;
6936      return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6937    }
6938
6939  return NULL;
6940}
6941
6942static tree
6943alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6944			  long op_const, unsigned HOST_WIDE_INT bytemask,
6945			  bool is_high)
6946{
6947  if (op_const & 2)
6948    {
6949      unsigned HOST_WIDE_INT loc;
6950
6951      loc = opint[1] & 7;
6952      bytemask <<= loc;
6953
6954      if (is_high)
6955	bytemask >>= 8;
6956
6957      opint[1] = bytemask ^ 0xff;
6958    }
6959
6960  return alpha_fold_builtin_zapnot (op, opint, op_const);
6961}
6962
6963static tree
6964alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6965{
6966  tree op0 = fold_convert (vtype, op[0]);
6967  tree op1 = fold_convert (vtype, op[1]);
6968  tree val = fold_build2 (code, vtype, op0, op1);
6969  return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6970}
6971
6972static tree
6973alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6974{
6975  unsigned HOST_WIDE_INT temp = 0;
6976  int i;
6977
6978  if (op_const != 3)
6979    return NULL;
6980
6981  for (i = 0; i < 8; ++i)
6982    {
6983      unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6984      unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6985      if (a >= b)
6986	temp += a - b;
6987      else
6988	temp += b - a;
6989    }
6990
6991  return build_int_cst (alpha_dimode_u, temp);
6992}
6993
6994static tree
6995alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6996{
6997  unsigned HOST_WIDE_INT temp;
6998
6999  if (op_const == 0)
7000    return NULL;
7001
7002  temp = opint[0] & 0xff;
7003  temp |= (opint[0] >> 24) & 0xff00;
7004
7005  return build_int_cst (alpha_dimode_u, temp);
7006}
7007
7008static tree
7009alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
7010{
7011  unsigned HOST_WIDE_INT temp;
7012
7013  if (op_const == 0)
7014    return NULL;
7015
7016  temp = opint[0] & 0xff;
7017  temp |= (opint[0] >>  8) & 0xff00;
7018  temp |= (opint[0] >> 16) & 0xff0000;
7019  temp |= (opint[0] >> 24) & 0xff000000;
7020
7021  return build_int_cst (alpha_dimode_u, temp);
7022}
7023
7024static tree
7025alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
7026{
7027  unsigned HOST_WIDE_INT temp;
7028
7029  if (op_const == 0)
7030    return NULL;
7031
7032  temp = opint[0] & 0xff;
7033  temp |= (opint[0] & 0xff00) << 24;
7034
7035  return build_int_cst (alpha_dimode_u, temp);
7036}
7037
7038static tree
7039alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
7040{
7041  unsigned HOST_WIDE_INT temp;
7042
7043  if (op_const == 0)
7044    return NULL;
7045
7046  temp = opint[0] & 0xff;
7047  temp |= (opint[0] & 0x0000ff00) << 8;
7048  temp |= (opint[0] & 0x00ff0000) << 16;
7049  temp |= (opint[0] & 0xff000000) << 24;
7050
7051  return build_int_cst (alpha_dimode_u, temp);
7052}
7053
7054static tree
7055alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
7056{
7057  unsigned HOST_WIDE_INT temp;
7058
7059  if (op_const == 0)
7060    return NULL;
7061
7062  if (opint[0] == 0)
7063    temp = 64;
7064  else
7065    temp = exact_log2 (opint[0] & -opint[0]);
7066
7067  return build_int_cst (alpha_dimode_u, temp);
7068}
7069
7070static tree
7071alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
7072{
7073  unsigned HOST_WIDE_INT temp;
7074
7075  if (op_const == 0)
7076    return NULL;
7077
7078  if (opint[0] == 0)
7079    temp = 64;
7080  else
7081    temp = 64 - floor_log2 (opint[0]) - 1;
7082
7083  return build_int_cst (alpha_dimode_u, temp);
7084}
7085
7086static tree
7087alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
7088{
7089  unsigned HOST_WIDE_INT temp, op;
7090
7091  if (op_const == 0)
7092    return NULL;
7093
7094  op = opint[0];
7095  temp = 0;
7096  while (op)
7097    temp++, op &= op - 1;
7098
7099  return build_int_cst (alpha_dimode_u, temp);
7100}
7101
7102/* Fold one of our builtin functions.  */
7103
7104static tree
7105alpha_fold_builtin (tree fndecl, int n_args, tree *op,
7106		    bool ignore ATTRIBUTE_UNUSED)
7107{
7108  unsigned HOST_WIDE_INT opint[MAX_ARGS];
7109  long op_const = 0;
7110  int i;
7111
7112  if (n_args > MAX_ARGS)
7113    return NULL;
7114
7115  for (i = 0; i < n_args; i++)
7116    {
7117      tree arg = op[i];
7118      if (arg == error_mark_node)
7119	return NULL;
7120
7121      opint[i] = 0;
7122      if (TREE_CODE (arg) == INTEGER_CST)
7123	{
7124          op_const |= 1L << i;
7125	  opint[i] = int_cst_value (arg);
7126	}
7127    }
7128
7129  switch (DECL_FUNCTION_CODE (fndecl))
7130    {
7131    case ALPHA_BUILTIN_CMPBGE:
7132      return alpha_fold_builtin_cmpbge (opint, op_const);
7133
7134    case ALPHA_BUILTIN_EXTBL:
7135      return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7136    case ALPHA_BUILTIN_EXTWL:
7137      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7138    case ALPHA_BUILTIN_EXTLL:
7139      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7140    case ALPHA_BUILTIN_EXTQL:
7141      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7142    case ALPHA_BUILTIN_EXTWH:
7143      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7144    case ALPHA_BUILTIN_EXTLH:
7145      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7146    case ALPHA_BUILTIN_EXTQH:
7147      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7148
7149    case ALPHA_BUILTIN_INSBL:
7150      return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7151    case ALPHA_BUILTIN_INSWL:
7152      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7153    case ALPHA_BUILTIN_INSLL:
7154      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7155    case ALPHA_BUILTIN_INSQL:
7156      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7157    case ALPHA_BUILTIN_INSWH:
7158      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7159    case ALPHA_BUILTIN_INSLH:
7160      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7161    case ALPHA_BUILTIN_INSQH:
7162      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7163
7164    case ALPHA_BUILTIN_MSKBL:
7165      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7166    case ALPHA_BUILTIN_MSKWL:
7167      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7168    case ALPHA_BUILTIN_MSKLL:
7169      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7170    case ALPHA_BUILTIN_MSKQL:
7171      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7172    case ALPHA_BUILTIN_MSKWH:
7173      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7174    case ALPHA_BUILTIN_MSKLH:
7175      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7176    case ALPHA_BUILTIN_MSKQH:
7177      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7178
7179    case ALPHA_BUILTIN_ZAP:
7180      opint[1] ^= 0xff;
7181      /* FALLTHRU */
7182    case ALPHA_BUILTIN_ZAPNOT:
7183      return alpha_fold_builtin_zapnot (op, opint, op_const);
7184
7185    case ALPHA_BUILTIN_MINUB8:
7186      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7187    case ALPHA_BUILTIN_MINSB8:
7188      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7189    case ALPHA_BUILTIN_MINUW4:
7190      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7191    case ALPHA_BUILTIN_MINSW4:
7192      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7193    case ALPHA_BUILTIN_MAXUB8:
7194      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7195    case ALPHA_BUILTIN_MAXSB8:
7196      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7197    case ALPHA_BUILTIN_MAXUW4:
7198      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7199    case ALPHA_BUILTIN_MAXSW4:
7200      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7201
7202    case ALPHA_BUILTIN_PERR:
7203      return alpha_fold_builtin_perr (opint, op_const);
7204    case ALPHA_BUILTIN_PKLB:
7205      return alpha_fold_builtin_pklb (opint, op_const);
7206    case ALPHA_BUILTIN_PKWB:
7207      return alpha_fold_builtin_pkwb (opint, op_const);
7208    case ALPHA_BUILTIN_UNPKBL:
7209      return alpha_fold_builtin_unpkbl (opint, op_const);
7210    case ALPHA_BUILTIN_UNPKBW:
7211      return alpha_fold_builtin_unpkbw (opint, op_const);
7212
7213    case ALPHA_BUILTIN_CTTZ:
7214      return alpha_fold_builtin_cttz (opint, op_const);
7215    case ALPHA_BUILTIN_CTLZ:
7216      return alpha_fold_builtin_ctlz (opint, op_const);
7217    case ALPHA_BUILTIN_CTPOP:
7218      return alpha_fold_builtin_ctpop (opint, op_const);
7219
7220    case ALPHA_BUILTIN_AMASK:
7221    case ALPHA_BUILTIN_IMPLVER:
7222    case ALPHA_BUILTIN_RPCC:
7223      /* None of these are foldable at compile-time.  */
7224    default:
7225      return NULL;
7226    }
7227}
7228
7229bool
7230alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi)
7231{
7232  bool changed = false;
7233  gimple stmt = gsi_stmt (*gsi);
7234  tree call = gimple_call_fn (stmt);
7235  gimple new_stmt = NULL;
7236
7237  if (call)
7238    {
7239      tree fndecl = gimple_call_fndecl (stmt);
7240
7241      if (fndecl)
7242	{
7243	  tree arg0, arg1;
7244
7245	  switch (DECL_FUNCTION_CODE (fndecl))
7246	    {
7247	    case ALPHA_BUILTIN_UMULH:
7248	      arg0 = gimple_call_arg (stmt, 0);
7249	      arg1 = gimple_call_arg (stmt, 1);
7250
7251	      new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
7252					      MULT_HIGHPART_EXPR, arg0, arg1);
7253	      break;
7254	    default:
7255	      break;
7256	    }
7257	}
7258    }
7259
7260  if (new_stmt)
7261    {
7262      gsi_replace (gsi, new_stmt, true);
7263      changed = true;
7264    }
7265
7266  return changed;
7267}
7268
7269/* This page contains routines that are used to determine what the function
7270   prologue and epilogue code will do and write them out.  */
7271
7272/* Compute the size of the save area in the stack.  */
7273
7274/* These variables are used for communication between the following functions.
7275   They indicate various things about the current function being compiled
7276   that are used to tell what kind of prologue, epilogue and procedure
7277   descriptor to generate.  */
7278
7279/* Nonzero if we need a stack procedure.  */
7280enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7281static enum alpha_procedure_types alpha_procedure_type;
7282
7283/* Register number (either FP or SP) that is used to unwind the frame.  */
7284static int vms_unwind_regno;
7285
7286/* Register number used to save FP.  We need not have one for RA since
7287   we don't modify it for register procedures.  This is only defined
7288   for register frame procedures.  */
7289static int vms_save_fp_regno;
7290
7291/* Register number used to reference objects off our PV.  */
7292static int vms_base_regno;
7293
7294/* Compute register masks for saved registers.  */
7295
7296static void
7297alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7298{
7299  unsigned long imask = 0;
7300  unsigned long fmask = 0;
7301  unsigned int i;
7302
7303  /* When outputting a thunk, we don't have valid register life info,
7304     but assemble_start_function wants to output .frame and .mask
7305     directives.  */
7306  if (cfun->is_thunk)
7307    {
7308      *imaskP = 0;
7309      *fmaskP = 0;
7310      return;
7311    }
7312
7313  if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7314    imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7315
7316  /* One for every register we have to save.  */
7317  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7318    if (! fixed_regs[i] && ! call_used_regs[i]
7319	&& df_regs_ever_live_p (i) && i != REG_RA)
7320      {
7321	if (i < 32)
7322	  imask |= (1UL << i);
7323	else
7324	  fmask |= (1UL << (i - 32));
7325      }
7326
7327  /* We need to restore these for the handler.  */
7328  if (crtl->calls_eh_return)
7329    {
7330      for (i = 0; ; ++i)
7331	{
7332	  unsigned regno = EH_RETURN_DATA_REGNO (i);
7333	  if (regno == INVALID_REGNUM)
7334	    break;
7335	  imask |= 1UL << regno;
7336	}
7337    }
7338
7339  /* If any register spilled, then spill the return address also.  */
7340  /* ??? This is required by the Digital stack unwind specification
7341     and isn't needed if we're doing Dwarf2 unwinding.  */
7342  if (imask || fmask || alpha_ra_ever_killed ())
7343    imask |= (1UL << REG_RA);
7344
7345  *imaskP = imask;
7346  *fmaskP = fmask;
7347}
7348
7349int
7350alpha_sa_size (void)
7351{
7352  unsigned long mask[2];
7353  int sa_size = 0;
7354  int i, j;
7355
7356  alpha_sa_mask (&mask[0], &mask[1]);
7357
7358  for (j = 0; j < 2; ++j)
7359    for (i = 0; i < 32; ++i)
7360      if ((mask[j] >> i) & 1)
7361	sa_size++;
7362
7363  if (TARGET_ABI_OPEN_VMS)
7364    {
7365      /* Start with a stack procedure if we make any calls (REG_RA used), or
7366	 need a frame pointer, with a register procedure if we otherwise need
7367	 at least a slot, and with a null procedure in other cases.  */
7368      if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
7369	alpha_procedure_type = PT_STACK;
7370      else if (get_frame_size() != 0)
7371	alpha_procedure_type = PT_REGISTER;
7372      else
7373	alpha_procedure_type = PT_NULL;
7374
7375      /* Don't reserve space for saving FP & RA yet.  Do that later after we've
7376	 made the final decision on stack procedure vs register procedure.  */
7377      if (alpha_procedure_type == PT_STACK)
7378	sa_size -= 2;
7379
7380      /* Decide whether to refer to objects off our PV via FP or PV.
7381	 If we need FP for something else or if we receive a nonlocal
7382	 goto (which expects PV to contain the value), we must use PV.
7383	 Otherwise, start by assuming we can use FP.  */
7384
7385      vms_base_regno
7386	= (frame_pointer_needed
7387	   || cfun->has_nonlocal_label
7388	   || alpha_procedure_type == PT_STACK
7389	   || crtl->outgoing_args_size)
7390	  ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7391
7392      /* If we want to copy PV into FP, we need to find some register
7393	 in which to save FP.  */
7394
7395      vms_save_fp_regno = -1;
7396      if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7397	for (i = 0; i < 32; i++)
7398	  if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
7399	    vms_save_fp_regno = i;
7400
7401      /* A VMS condition handler requires a stack procedure in our
7402	 implementation. (not required by the calling standard).  */
7403      if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7404	  || cfun->machine->uses_condition_handler)
7405	vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7406      else if (alpha_procedure_type == PT_NULL)
7407	vms_base_regno = REG_PV;
7408
7409      /* Stack unwinding should be done via FP unless we use it for PV.  */
7410      vms_unwind_regno = (vms_base_regno == REG_PV
7411			  ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7412
7413      /* If this is a stack procedure, allow space for saving FP, RA and
7414	 a condition handler slot if needed.  */
7415      if (alpha_procedure_type == PT_STACK)
7416	sa_size += 2 + cfun->machine->uses_condition_handler;
7417    }
7418  else
7419    {
7420      /* Our size must be even (multiple of 16 bytes).  */
7421      if (sa_size & 1)
7422	sa_size++;
7423    }
7424
7425  return sa_size * 8;
7426}
7427
7428/* Define the offset between two registers, one to be eliminated,
7429   and the other its replacement, at the start of a routine.  */
7430
7431HOST_WIDE_INT
7432alpha_initial_elimination_offset (unsigned int from,
7433				  unsigned int to ATTRIBUTE_UNUSED)
7434{
7435  HOST_WIDE_INT ret;
7436
7437  ret = alpha_sa_size ();
7438  ret += ALPHA_ROUND (crtl->outgoing_args_size);
7439
7440  switch (from)
7441    {
7442    case FRAME_POINTER_REGNUM:
7443      break;
7444
7445    case ARG_POINTER_REGNUM:
7446      ret += (ALPHA_ROUND (get_frame_size ()
7447			   + crtl->args.pretend_args_size)
7448	      - crtl->args.pretend_args_size);
7449      break;
7450
7451    default:
7452      gcc_unreachable ();
7453    }
7454
7455  return ret;
7456}
7457
7458#if TARGET_ABI_OPEN_VMS
7459
7460/* Worker function for TARGET_CAN_ELIMINATE.  */
7461
7462static bool
7463alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7464{
7465  /* We need the alpha_procedure_type to decide. Evaluate it now.  */
7466  alpha_sa_size ();
7467
7468  switch (alpha_procedure_type)
7469    {
7470    case PT_NULL:
7471      /* NULL procedures have no frame of their own and we only
7472	 know how to resolve from the current stack pointer.  */
7473      return to == STACK_POINTER_REGNUM;
7474
7475    case PT_REGISTER:
7476    case PT_STACK:
7477      /* We always eliminate except to the stack pointer if there is no
7478	 usable frame pointer at hand.  */
7479      return (to != STACK_POINTER_REGNUM
7480	      || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7481    }
7482
7483  gcc_unreachable ();
7484}
7485
7486/* FROM is to be eliminated for TO. Return the offset so that TO+offset
7487   designates the same location as FROM.  */
7488
7489HOST_WIDE_INT
7490alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7491{
7492  /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7493     HARD_FRAME or STACK_PTR.  We need the alpha_procedure_type to decide
7494     on the proper computations and will need the register save area size
7495     in most cases.  */
7496
7497  HOST_WIDE_INT sa_size = alpha_sa_size ();
7498
7499  /* PT_NULL procedures have no frame of their own and we only allow
7500     elimination to the stack pointer. This is the argument pointer and we
7501     resolve the soft frame pointer to that as well.  */
7502
7503  if (alpha_procedure_type == PT_NULL)
7504    return 0;
7505
7506  /* For a PT_STACK procedure the frame layout looks as follows
7507
7508                      -----> decreasing addresses
7509
7510		   <             size rounded up to 16       |   likewise   >
7511     --------------#------------------------------+++--------------+++-------#
7512     incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7513     --------------#---------------------------------------------------------#
7514                                   ^         ^              ^               ^
7515			      ARG_PTR FRAME_PTR HARD_FRAME_PTR       STACK_PTR
7516
7517
7518     PT_REGISTER procedures are similar in that they may have a frame of their
7519     own. They have no regs-sa/pv/outgoing-args area.
7520
7521     We first compute offset to HARD_FRAME_PTR, then add what we need to get
7522     to STACK_PTR if need be.  */
7523
7524  {
7525    HOST_WIDE_INT offset;
7526    HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7527
7528    switch (from)
7529      {
7530      case FRAME_POINTER_REGNUM:
7531	offset = ALPHA_ROUND (sa_size + pv_save_size);
7532	break;
7533      case ARG_POINTER_REGNUM:
7534	offset = (ALPHA_ROUND (sa_size + pv_save_size
7535			       + get_frame_size ()
7536			       + crtl->args.pretend_args_size)
7537		  - crtl->args.pretend_args_size);
7538	break;
7539      default:
7540	gcc_unreachable ();
7541      }
7542
7543    if (to == STACK_POINTER_REGNUM)
7544      offset += ALPHA_ROUND (crtl->outgoing_args_size);
7545
7546    return offset;
7547  }
7548}
7549
7550#define COMMON_OBJECT "common_object"
7551
7552static tree
7553common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7554		       tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7555		       bool *no_add_attrs ATTRIBUTE_UNUSED)
7556{
7557  tree decl = *node;
7558  gcc_assert (DECL_P (decl));
7559
7560  DECL_COMMON (decl) = 1;
7561  return NULL_TREE;
7562}
7563
7564static const struct attribute_spec vms_attribute_table[] =
7565{
7566  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7567       affects_type_identity } */
7568  { COMMON_OBJECT,   0, 1, true,  false, false, common_object_handler, false },
7569  { NULL,            0, 0, false, false, false, NULL, false }
7570};
7571
7572void
7573vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7574			       unsigned HOST_WIDE_INT size,
7575			       unsigned int align)
7576{
7577  tree attr = DECL_ATTRIBUTES (decl);
7578  fprintf (file, "%s", COMMON_ASM_OP);
7579  assemble_name (file, name);
7580  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7581  /* ??? Unlike on OSF/1, the alignment factor is not in log units.  */
7582  fprintf (file, ",%u", align / BITS_PER_UNIT);
7583  if (attr)
7584    {
7585      attr = lookup_attribute (COMMON_OBJECT, attr);
7586      if (attr)
7587        fprintf (file, ",%s",
7588		 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7589    }
7590  fputc ('\n', file);
7591}
7592
7593#undef COMMON_OBJECT
7594
7595#endif
7596
7597bool
7598alpha_find_lo_sum_using_gp (rtx insn)
7599{
7600  subrtx_iterator::array_type array;
7601  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
7602    {
7603      const_rtx x = *iter;
7604      if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx)
7605	return true;
7606    }
7607  return false;
7608}
7609
7610static int
7611alpha_does_function_need_gp (void)
7612{
7613  rtx_insn *insn;
7614
7615  /* The GP being variable is an OSF abi thing.  */
7616  if (! TARGET_ABI_OSF)
7617    return 0;
7618
7619  /* We need the gp to load the address of __mcount.  */
7620  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7621    return 1;
7622
7623  /* The code emitted by alpha_output_mi_thunk_osf uses the gp.  */
7624  if (cfun->is_thunk)
7625    return 1;
7626
7627  /* The nonlocal receiver pattern assumes that the gp is valid for
7628     the nested function.  Reasonable because it's almost always set
7629     correctly already.  For the cases where that's wrong, make sure
7630     the nested function loads its gp on entry.  */
7631  if (crtl->has_nonlocal_goto)
7632    return 1;
7633
7634  /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7635     Even if we are a static function, we still need to do this in case
7636     our address is taken and passed to something like qsort.  */
7637
7638  push_topmost_sequence ();
7639  insn = get_insns ();
7640  pop_topmost_sequence ();
7641
7642  for (; insn; insn = NEXT_INSN (insn))
7643    if (NONDEBUG_INSN_P (insn)
7644	&& GET_CODE (PATTERN (insn)) != USE
7645	&& GET_CODE (PATTERN (insn)) != CLOBBER
7646	&& get_attr_usegp (insn))
7647      return 1;
7648
7649  return 0;
7650}
7651
7652
7653/* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7654   sequences.  */
7655
7656static rtx_insn *
7657set_frame_related_p (void)
7658{
7659  rtx_insn *seq = get_insns ();
7660  rtx_insn *insn;
7661
7662  end_sequence ();
7663
7664  if (!seq)
7665    return NULL;
7666
7667  if (INSN_P (seq))
7668    {
7669      insn = seq;
7670      while (insn != NULL_RTX)
7671	{
7672	  RTX_FRAME_RELATED_P (insn) = 1;
7673	  insn = NEXT_INSN (insn);
7674	}
7675      seq = emit_insn (seq);
7676    }
7677  else
7678    {
7679      seq = emit_insn (seq);
7680      RTX_FRAME_RELATED_P (seq) = 1;
7681    }
7682  return seq;
7683}
7684
7685#define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
7686
7687/* Generates a store with the proper unwind info attached.  VALUE is
7688   stored at BASE_REG+BASE_OFS.  If FRAME_BIAS is nonzero, then BASE_REG
7689   contains SP+FRAME_BIAS, and that is the unwind info that should be
7690   generated.  If FRAME_REG != VALUE, then VALUE is being stored on
7691   behalf of FRAME_REG, and FRAME_REG should be present in the unwind.  */
7692
7693static void
7694emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7695		    HOST_WIDE_INT base_ofs, rtx frame_reg)
7696{
7697  rtx addr, mem;
7698  rtx_insn *insn;
7699
7700  addr = plus_constant (Pmode, base_reg, base_ofs);
7701  mem = gen_frame_mem (DImode, addr);
7702
7703  insn = emit_move_insn (mem, value);
7704  RTX_FRAME_RELATED_P (insn) = 1;
7705
7706  if (frame_bias || value != frame_reg)
7707    {
7708      if (frame_bias)
7709	{
7710	  addr = plus_constant (Pmode, stack_pointer_rtx,
7711			        frame_bias + base_ofs);
7712	  mem = gen_rtx_MEM (DImode, addr);
7713	}
7714
7715      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7716		    gen_rtx_SET (VOIDmode, mem, frame_reg));
7717    }
7718}
7719
7720static void
7721emit_frame_store (unsigned int regno, rtx base_reg,
7722		  HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7723{
7724  rtx reg = gen_rtx_REG (DImode, regno);
7725  emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7726}
7727
7728/* Compute the frame size.  SIZE is the size of the "naked" frame
7729   and SA_SIZE is the size of the register save area.  */
7730
7731static HOST_WIDE_INT
7732compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
7733{
7734  if (TARGET_ABI_OPEN_VMS)
7735    return ALPHA_ROUND (sa_size
7736			+ (alpha_procedure_type == PT_STACK ? 8 : 0)
7737			+ size
7738			+ crtl->args.pretend_args_size);
7739  else
7740    return ALPHA_ROUND (crtl->outgoing_args_size)
7741	   + sa_size
7742	   + ALPHA_ROUND (size
7743			  + crtl->args.pretend_args_size);
7744}
7745
7746/* Write function prologue.  */
7747
7748/* On vms we have two kinds of functions:
7749
7750   - stack frame (PROC_STACK)
7751	these are 'normal' functions with local vars and which are
7752	calling other functions
7753   - register frame (PROC_REGISTER)
7754	keeps all data in registers, needs no stack
7755
7756   We must pass this to the assembler so it can generate the
7757   proper pdsc (procedure descriptor)
7758   This is done with the '.pdesc' command.
7759
7760   On not-vms, we don't really differentiate between the two, as we can
7761   simply allocate stack without saving registers.  */
7762
7763void
7764alpha_expand_prologue (void)
7765{
7766  /* Registers to save.  */
7767  unsigned long imask = 0;
7768  unsigned long fmask = 0;
7769  /* Stack space needed for pushing registers clobbered by us.  */
7770  HOST_WIDE_INT sa_size, sa_bias;
7771  /* Complete stack size needed.  */
7772  HOST_WIDE_INT frame_size;
7773  /* Probed stack size; it additionally includes the size of
7774     the "reserve region" if any.  */
7775  HOST_WIDE_INT probed_size;
7776  /* Offset from base reg to register save area.  */
7777  HOST_WIDE_INT reg_offset;
7778  rtx sa_reg;
7779  int i;
7780
7781  sa_size = alpha_sa_size ();
7782  frame_size = compute_frame_size (get_frame_size (), sa_size);
7783
7784  if (flag_stack_usage_info)
7785    current_function_static_stack_size = frame_size;
7786
7787  if (TARGET_ABI_OPEN_VMS)
7788    reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7789  else
7790    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7791
7792  alpha_sa_mask (&imask, &fmask);
7793
7794  /* Emit an insn to reload GP, if needed.  */
7795  if (TARGET_ABI_OSF)
7796    {
7797      alpha_function_needs_gp = alpha_does_function_need_gp ();
7798      if (alpha_function_needs_gp)
7799	emit_insn (gen_prologue_ldgp ());
7800    }
7801
7802  /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7803     the call to mcount ourselves, rather than having the linker do it
7804     magically in response to -pg.  Since _mcount has special linkage,
7805     don't represent the call as a call.  */
7806  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7807    emit_insn (gen_prologue_mcount ());
7808
7809  /* Adjust the stack by the frame size.  If the frame size is > 4096
7810     bytes, we need to be sure we probe somewhere in the first and last
7811     4096 bytes (we can probably get away without the latter test) and
7812     every 8192 bytes in between.  If the frame size is > 32768, we
7813     do this in a loop.  Otherwise, we generate the explicit probe
7814     instructions.
7815
7816     Note that we are only allowed to adjust sp once in the prologue.  */
7817
7818  probed_size = frame_size;
7819  if (flag_stack_check)
7820    probed_size += STACK_CHECK_PROTECT;
7821
7822  if (probed_size <= 32768)
7823    {
7824      if (probed_size > 4096)
7825	{
7826	  int probed;
7827
7828	  for (probed = 4096; probed < probed_size; probed += 8192)
7829	    emit_insn (gen_probe_stack (GEN_INT (-probed)));
7830
7831	  /* We only have to do this probe if we aren't saving registers or
7832	     if we are probing beyond the frame because of -fstack-check.  */
7833	  if ((sa_size == 0 && probed_size > probed - 4096)
7834	      || flag_stack_check)
7835	    emit_insn (gen_probe_stack (GEN_INT (-probed_size)));
7836	}
7837
7838      if (frame_size != 0)
7839	FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7840				    GEN_INT (-frame_size))));
7841    }
7842  else
7843    {
7844      /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7845	 number of 8192 byte blocks to probe.  We then probe each block
7846	 in the loop and then set SP to the proper location.  If the
7847	 amount remaining is > 4096, we have to do one more probe if we
7848	 are not saving any registers or if we are probing beyond the
7849	 frame because of -fstack-check.  */
7850
7851      HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7852      HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7853      rtx ptr = gen_rtx_REG (DImode, 22);
7854      rtx count = gen_rtx_REG (DImode, 23);
7855      rtx seq;
7856
7857      emit_move_insn (count, GEN_INT (blocks));
7858      emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7859
7860      /* Because of the difficulty in emitting a new basic block this
7861	 late in the compilation, generate the loop as a single insn.  */
7862      emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7863
7864      if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
7865	{
7866	  rtx last = gen_rtx_MEM (DImode,
7867				  plus_constant (Pmode, ptr, -leftover));
7868	  MEM_VOLATILE_P (last) = 1;
7869	  emit_move_insn (last, const0_rtx);
7870	}
7871
7872      if (flag_stack_check)
7873	{
7874	  /* If -fstack-check is specified we have to load the entire
7875	     constant into a register and subtract from the sp in one go,
7876	     because the probed stack size is not equal to the frame size.  */
7877	  HOST_WIDE_INT lo, hi;
7878	  lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7879	  hi = frame_size - lo;
7880
7881	  emit_move_insn (ptr, GEN_INT (hi));
7882	  emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7883	  seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7884				       ptr));
7885	}
7886      else
7887	{
7888	  seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7889				       GEN_INT (-leftover)));
7890	}
7891
7892      /* This alternative is special, because the DWARF code cannot
7893         possibly intuit through the loop above.  So we invent this
7894         note it looks at instead.  */
7895      RTX_FRAME_RELATED_P (seq) = 1;
7896      add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7897		    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7898				 plus_constant (Pmode, stack_pointer_rtx,
7899						-frame_size)));
7900    }
7901
7902  /* Cope with very large offsets to the register save area.  */
7903  sa_bias = 0;
7904  sa_reg = stack_pointer_rtx;
7905  if (reg_offset + sa_size > 0x8000)
7906    {
7907      int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7908      rtx sa_bias_rtx;
7909
7910      if (low + sa_size <= 0x8000)
7911	sa_bias = reg_offset - low, reg_offset = low;
7912      else
7913	sa_bias = reg_offset, reg_offset = 0;
7914
7915      sa_reg = gen_rtx_REG (DImode, 24);
7916      sa_bias_rtx = GEN_INT (sa_bias);
7917
7918      if (add_operand (sa_bias_rtx, DImode))
7919	emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7920      else
7921	{
7922	  emit_move_insn (sa_reg, sa_bias_rtx);
7923	  emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7924	}
7925    }
7926
7927  /* Save regs in stack order.  Beginning with VMS PV.  */
7928  if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7929    emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7930
7931  /* Save register RA next.  */
7932  if (imask & (1UL << REG_RA))
7933    {
7934      emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7935      imask &= ~(1UL << REG_RA);
7936      reg_offset += 8;
7937    }
7938
7939  /* Now save any other registers required to be saved.  */
7940  for (i = 0; i < 31; i++)
7941    if (imask & (1UL << i))
7942      {
7943	emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7944	reg_offset += 8;
7945      }
7946
7947  for (i = 0; i < 31; i++)
7948    if (fmask & (1UL << i))
7949      {
7950	emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7951	reg_offset += 8;
7952      }
7953
7954  if (TARGET_ABI_OPEN_VMS)
7955    {
7956      /* Register frame procedures save the fp.  */
7957      if (alpha_procedure_type == PT_REGISTER)
7958	{
7959	  rtx_insn *insn =
7960	    emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7961			    hard_frame_pointer_rtx);
7962	  add_reg_note (insn, REG_CFA_REGISTER, NULL);
7963	  RTX_FRAME_RELATED_P (insn) = 1;
7964	}
7965
7966      if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7967	emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7968				    gen_rtx_REG (DImode, REG_PV)));
7969
7970      if (alpha_procedure_type != PT_NULL
7971	  && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7972	FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7973
7974      /* If we have to allocate space for outgoing args, do it now.  */
7975      if (crtl->outgoing_args_size != 0)
7976	{
7977	  rtx_insn *seq
7978	    = emit_move_insn (stack_pointer_rtx,
7979			      plus_constant
7980			      (Pmode, hard_frame_pointer_rtx,
7981			       - (ALPHA_ROUND
7982				  (crtl->outgoing_args_size))));
7983
7984	  /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7985	     if ! frame_pointer_needed. Setting the bit will change the CFA
7986	     computation rule to use sp again, which would be wrong if we had
7987	     frame_pointer_needed, as this means sp might move unpredictably
7988	     later on.
7989
7990	     Also, note that
7991	       frame_pointer_needed
7992	       => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7993	     and
7994	       crtl->outgoing_args_size != 0
7995	       => alpha_procedure_type != PT_NULL,
7996
7997	     so when we are not setting the bit here, we are guaranteed to
7998	     have emitted an FRP frame pointer update just before.  */
7999	  RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
8000	}
8001    }
8002  else
8003    {
8004      /* If we need a frame pointer, set it from the stack pointer.  */
8005      if (frame_pointer_needed)
8006	{
8007	  if (TARGET_CAN_FAULT_IN_PROLOGUE)
8008	    FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
8009	  else
8010	    /* This must always be the last instruction in the
8011	       prologue, thus we emit a special move + clobber.  */
8012	      FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
8013				           stack_pointer_rtx, sa_reg)));
8014	}
8015    }
8016
8017  /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
8018     the prologue, for exception handling reasons, we cannot do this for
8019     any insn that might fault.  We could prevent this for mems with a
8020     (clobber:BLK (scratch)), but this doesn't work for fp insns.  So we
8021     have to prevent all such scheduling with a blockage.
8022
8023     Linux, on the other hand, never bothered to implement OSF/1's
8024     exception handling, and so doesn't care about such things.  Anyone
8025     planning to use dwarf2 frame-unwind info can also omit the blockage.  */
8026
8027  if (! TARGET_CAN_FAULT_IN_PROLOGUE)
8028    emit_insn (gen_blockage ());
8029}
8030
8031/* Count the number of .file directives, so that .loc is up to date.  */
8032int num_source_filenames = 0;
8033
8034/* Output the textual info surrounding the prologue.  */
8035
8036void
8037alpha_start_function (FILE *file, const char *fnname,
8038		      tree decl ATTRIBUTE_UNUSED)
8039{
8040  unsigned long imask = 0;
8041  unsigned long fmask = 0;
8042  /* Stack space needed for pushing registers clobbered by us.  */
8043  HOST_WIDE_INT sa_size;
8044  /* Complete stack size needed.  */
8045  unsigned HOST_WIDE_INT frame_size;
8046  /* The maximum debuggable frame size.  */
8047  unsigned HOST_WIDE_INT max_frame_size = 1UL << 31;
8048  /* Offset from base reg to register save area.  */
8049  HOST_WIDE_INT reg_offset;
8050  char *entry_label = (char *) alloca (strlen (fnname) + 6);
8051  char *tramp_label = (char *) alloca (strlen (fnname) + 6);
8052  int i;
8053
8054#if TARGET_ABI_OPEN_VMS
8055  vms_start_function (fnname);
8056#endif
8057
8058  alpha_fnname = fnname;
8059  sa_size = alpha_sa_size ();
8060  frame_size = compute_frame_size (get_frame_size (), sa_size);
8061
8062  if (TARGET_ABI_OPEN_VMS)
8063    reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8064  else
8065    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8066
8067  alpha_sa_mask (&imask, &fmask);
8068
8069  /* Issue function start and label.  */
8070  if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
8071    {
8072      fputs ("\t.ent ", file);
8073      assemble_name (file, fnname);
8074      putc ('\n', file);
8075
8076      /* If the function needs GP, we'll write the "..ng" label there.
8077	 Otherwise, do it here.  */
8078      if (TARGET_ABI_OSF
8079          && ! alpha_function_needs_gp
8080	  && ! cfun->is_thunk)
8081	{
8082	  putc ('$', file);
8083	  assemble_name (file, fnname);
8084	  fputs ("..ng:\n", file);
8085	}
8086    }
8087  /* Nested functions on VMS that are potentially called via trampoline
8088     get a special transfer entry point that loads the called functions
8089     procedure descriptor and static chain.  */
8090   if (TARGET_ABI_OPEN_VMS
8091       && !TREE_PUBLIC (decl)
8092       && DECL_CONTEXT (decl)
8093       && !TYPE_P (DECL_CONTEXT (decl))
8094       && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
8095     {
8096	strcpy (tramp_label, fnname);
8097	strcat (tramp_label, "..tr");
8098	ASM_OUTPUT_LABEL (file, tramp_label);
8099	fprintf (file, "\tldq $1,24($27)\n");
8100	fprintf (file, "\tldq $27,16($27)\n");
8101     }
8102
8103  strcpy (entry_label, fnname);
8104  if (TARGET_ABI_OPEN_VMS)
8105    strcat (entry_label, "..en");
8106
8107  ASM_OUTPUT_LABEL (file, entry_label);
8108  inside_function = TRUE;
8109
8110  if (TARGET_ABI_OPEN_VMS)
8111    fprintf (file, "\t.base $%d\n", vms_base_regno);
8112
8113  if (TARGET_ABI_OSF
8114      && TARGET_IEEE_CONFORMANT
8115      && !flag_inhibit_size_directive)
8116    {
8117      /* Set flags in procedure descriptor to request IEEE-conformant
8118	 math-library routines.  The value we set it to is PDSC_EXC_IEEE
8119	 (/usr/include/pdsc.h).  */
8120      fputs ("\t.eflag 48\n", file);
8121    }
8122
8123  /* Set up offsets to alpha virtual arg/local debugging pointer.  */
8124  alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
8125  alpha_arg_offset = -frame_size + 48;
8126
8127  /* Describe our frame.  If the frame size is larger than an integer,
8128     print it as zero to avoid an assembler error.  We won't be
8129     properly describing such a frame, but that's the best we can do.  */
8130  if (TARGET_ABI_OPEN_VMS)
8131    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
8132	     HOST_WIDE_INT_PRINT_DEC "\n",
8133	     vms_unwind_regno,
8134	     frame_size >= (1UL << 31) ? 0 : frame_size,
8135	     reg_offset);
8136  else if (!flag_inhibit_size_directive)
8137    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
8138	     (frame_pointer_needed
8139	      ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
8140	     frame_size >= max_frame_size ? 0 : frame_size,
8141	     crtl->args.pretend_args_size);
8142
8143  /* Describe which registers were spilled.  */
8144  if (TARGET_ABI_OPEN_VMS)
8145    {
8146      if (imask)
8147        /* ??? Does VMS care if mask contains ra?  The old code didn't
8148           set it, so I don't here.  */
8149	fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
8150      if (fmask)
8151	fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
8152      if (alpha_procedure_type == PT_REGISTER)
8153	fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
8154    }
8155  else if (!flag_inhibit_size_directive)
8156    {
8157      if (imask)
8158	{
8159	  fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8160		   frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8161
8162	  for (i = 0; i < 32; ++i)
8163	    if (imask & (1UL << i))
8164	      reg_offset += 8;
8165	}
8166
8167      if (fmask)
8168	fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8169		 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8170    }
8171
8172#if TARGET_ABI_OPEN_VMS
8173  /* If a user condition handler has been installed at some point, emit
8174     the procedure descriptor bits to point the Condition Handling Facility
8175     at the indirection wrapper, and state the fp offset at which the user
8176     handler may be found.  */
8177  if (cfun->machine->uses_condition_handler)
8178    {
8179      fprintf (file, "\t.handler __gcc_shell_handler\n");
8180      fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8181    }
8182
8183#ifdef TARGET_VMS_CRASH_DEBUG
8184  /* Support of minimal traceback info.  */
8185  switch_to_section (readonly_data_section);
8186  fprintf (file, "\t.align 3\n");
8187  assemble_name (file, fnname); fputs ("..na:\n", file);
8188  fputs ("\t.ascii \"", file);
8189  assemble_name (file, fnname);
8190  fputs ("\\0\"\n", file);
8191  switch_to_section (text_section);
8192#endif
8193#endif /* TARGET_ABI_OPEN_VMS */
8194}
8195
8196/* Emit the .prologue note at the scheduled end of the prologue.  */
8197
8198static void
8199alpha_output_function_end_prologue (FILE *file)
8200{
8201  if (TARGET_ABI_OPEN_VMS)
8202    fputs ("\t.prologue\n", file);
8203  else if (!flag_inhibit_size_directive)
8204    fprintf (file, "\t.prologue %d\n",
8205	     alpha_function_needs_gp || cfun->is_thunk);
8206}
8207
8208/* Write function epilogue.  */
8209
8210void
8211alpha_expand_epilogue (void)
8212{
8213  /* Registers to save.  */
8214  unsigned long imask = 0;
8215  unsigned long fmask = 0;
8216  /* Stack space needed for pushing registers clobbered by us.  */
8217  HOST_WIDE_INT sa_size;
8218  /* Complete stack size needed.  */
8219  HOST_WIDE_INT frame_size;
8220  /* Offset from base reg to register save area.  */
8221  HOST_WIDE_INT reg_offset;
8222  int fp_is_frame_pointer, fp_offset;
8223  rtx sa_reg, sa_reg_exp = NULL;
8224  rtx sp_adj1, sp_adj2, mem, reg, insn;
8225  rtx eh_ofs;
8226  rtx cfa_restores = NULL_RTX;
8227  int i;
8228
8229  sa_size = alpha_sa_size ();
8230  frame_size = compute_frame_size (get_frame_size (), sa_size);
8231
8232  if (TARGET_ABI_OPEN_VMS)
8233    {
8234       if (alpha_procedure_type == PT_STACK)
8235          reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8236       else
8237          reg_offset = 0;
8238    }
8239  else
8240    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8241
8242  alpha_sa_mask (&imask, &fmask);
8243
8244  fp_is_frame_pointer
8245    = (TARGET_ABI_OPEN_VMS
8246       ? alpha_procedure_type == PT_STACK
8247       : frame_pointer_needed);
8248  fp_offset = 0;
8249  sa_reg = stack_pointer_rtx;
8250
8251  if (crtl->calls_eh_return)
8252    eh_ofs = EH_RETURN_STACKADJ_RTX;
8253  else
8254    eh_ofs = NULL_RTX;
8255
8256  if (sa_size)
8257    {
8258      /* If we have a frame pointer, restore SP from it.  */
8259      if (TARGET_ABI_OPEN_VMS
8260	  ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8261	  : frame_pointer_needed)
8262	emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8263
8264      /* Cope with very large offsets to the register save area.  */
8265      if (reg_offset + sa_size > 0x8000)
8266	{
8267	  int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8268	  HOST_WIDE_INT bias;
8269
8270	  if (low + sa_size <= 0x8000)
8271	    bias = reg_offset - low, reg_offset = low;
8272	  else
8273	    bias = reg_offset, reg_offset = 0;
8274
8275	  sa_reg = gen_rtx_REG (DImode, 22);
8276	  sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8277
8278	  emit_move_insn (sa_reg, sa_reg_exp);
8279	}
8280
8281      /* Restore registers in order, excepting a true frame pointer.  */
8282
8283      mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset));
8284      reg = gen_rtx_REG (DImode, REG_RA);
8285      emit_move_insn (reg, mem);
8286      cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8287
8288      reg_offset += 8;
8289      imask &= ~(1UL << REG_RA);
8290
8291      for (i = 0; i < 31; ++i)
8292	if (imask & (1UL << i))
8293	  {
8294	    if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8295	      fp_offset = reg_offset;
8296	    else
8297	      {
8298		mem = gen_frame_mem (DImode,
8299				     plus_constant (Pmode, sa_reg,
8300						    reg_offset));
8301		reg = gen_rtx_REG (DImode, i);
8302		emit_move_insn (reg, mem);
8303		cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8304					       cfa_restores);
8305	      }
8306	    reg_offset += 8;
8307	  }
8308
8309      for (i = 0; i < 31; ++i)
8310	if (fmask & (1UL << i))
8311	  {
8312	    mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg,
8313						        reg_offset));
8314	    reg = gen_rtx_REG (DFmode, i+32);
8315	    emit_move_insn (reg, mem);
8316	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8317	    reg_offset += 8;
8318	  }
8319    }
8320
8321  if (frame_size || eh_ofs)
8322    {
8323      sp_adj1 = stack_pointer_rtx;
8324
8325      if (eh_ofs)
8326	{
8327	  sp_adj1 = gen_rtx_REG (DImode, 23);
8328	  emit_move_insn (sp_adj1,
8329			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8330	}
8331
8332      /* If the stack size is large, begin computation into a temporary
8333	 register so as not to interfere with a potential fp restore,
8334	 which must be consecutive with an SP restore.  */
8335      if (frame_size < 32768 && !cfun->calls_alloca)
8336	sp_adj2 = GEN_INT (frame_size);
8337      else if (frame_size < 0x40007fffL)
8338	{
8339	  int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8340
8341	  sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8342	  if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8343	    sp_adj1 = sa_reg;
8344	  else
8345	    {
8346	      sp_adj1 = gen_rtx_REG (DImode, 23);
8347	      emit_move_insn (sp_adj1, sp_adj2);
8348	    }
8349	  sp_adj2 = GEN_INT (low);
8350	}
8351      else
8352	{
8353	  rtx tmp = gen_rtx_REG (DImode, 23);
8354	  sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8355	  if (!sp_adj2)
8356	    {
8357	      /* We can't drop new things to memory this late, afaik,
8358		 so build it up by pieces.  */
8359	      sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
8360						   -(frame_size < 0));
8361	      gcc_assert (sp_adj2);
8362	    }
8363	}
8364
8365      /* From now on, things must be in order.  So emit blockages.  */
8366
8367      /* Restore the frame pointer.  */
8368      if (fp_is_frame_pointer)
8369	{
8370	  emit_insn (gen_blockage ());
8371	  mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8372						      fp_offset));
8373	  emit_move_insn (hard_frame_pointer_rtx, mem);
8374	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8375					 hard_frame_pointer_rtx, cfa_restores);
8376	}
8377      else if (TARGET_ABI_OPEN_VMS)
8378	{
8379	  emit_insn (gen_blockage ());
8380	  emit_move_insn (hard_frame_pointer_rtx,
8381			  gen_rtx_REG (DImode, vms_save_fp_regno));
8382	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8383					 hard_frame_pointer_rtx, cfa_restores);
8384	}
8385
8386      /* Restore the stack pointer.  */
8387      emit_insn (gen_blockage ());
8388      if (sp_adj2 == const0_rtx)
8389	insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8390      else
8391	insn = emit_move_insn (stack_pointer_rtx,
8392			       gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8393      REG_NOTES (insn) = cfa_restores;
8394      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8395      RTX_FRAME_RELATED_P (insn) = 1;
8396    }
8397  else
8398    {
8399      gcc_assert (cfa_restores == NULL);
8400
8401      if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8402        {
8403          emit_insn (gen_blockage ());
8404          insn = emit_move_insn (hard_frame_pointer_rtx,
8405				 gen_rtx_REG (DImode, vms_save_fp_regno));
8406	  add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8407	  RTX_FRAME_RELATED_P (insn) = 1;
8408        }
8409    }
8410}
8411
8412/* Output the rest of the textual info surrounding the epilogue.  */
8413
8414void
8415alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8416{
8417  rtx_insn *insn;
8418
8419  /* We output a nop after noreturn calls at the very end of the function to
8420     ensure that the return address always remains in the caller's code range,
8421     as not doing so might confuse unwinding engines.  */
8422  insn = get_last_insn ();
8423  if (!INSN_P (insn))
8424    insn = prev_active_insn (insn);
8425  if (insn && CALL_P (insn))
8426    output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8427
8428#if TARGET_ABI_OPEN_VMS
8429  /* Write the linkage entries.  */
8430  alpha_write_linkage (file, fnname);
8431#endif
8432
8433  /* End the function.  */
8434  if (TARGET_ABI_OPEN_VMS
8435      || !flag_inhibit_size_directive)
8436    {
8437      fputs ("\t.end ", file);
8438      assemble_name (file, fnname);
8439      putc ('\n', file);
8440    }
8441  inside_function = FALSE;
8442}
8443
8444#if TARGET_ABI_OSF
8445/* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8446
8447   In order to avoid the hordes of differences between generated code
8448   with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8449   lots of code loading up large constants, generate rtl and emit it
8450   instead of going straight to text.
8451
8452   Not sure why this idea hasn't been explored before...  */
8453
8454static void
8455alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8456			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8457			   tree function)
8458{
8459  HOST_WIDE_INT hi, lo;
8460  rtx this_rtx, funexp;
8461  rtx_insn *insn;
8462
8463  /* We always require a valid GP.  */
8464  emit_insn (gen_prologue_ldgp ());
8465  emit_note (NOTE_INSN_PROLOGUE_END);
8466
8467  /* Find the "this" pointer.  If the function returns a structure,
8468     the structure return pointer is in $16.  */
8469  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8470    this_rtx = gen_rtx_REG (Pmode, 17);
8471  else
8472    this_rtx = gen_rtx_REG (Pmode, 16);
8473
8474  /* Add DELTA.  When possible we use ldah+lda.  Otherwise load the
8475     entire constant for the add.  */
8476  lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8477  hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8478  if (hi + lo == delta)
8479    {
8480      if (hi)
8481	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8482      if (lo)
8483	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8484    }
8485  else
8486    {
8487      rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0),
8488					   delta, -(delta < 0));
8489      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8490    }
8491
8492  /* Add a delta stored in the vtable at VCALL_OFFSET.  */
8493  if (vcall_offset)
8494    {
8495      rtx tmp, tmp2;
8496
8497      tmp = gen_rtx_REG (Pmode, 0);
8498      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8499
8500      lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8501      hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8502      if (hi + lo == vcall_offset)
8503	{
8504	  if (hi)
8505	    emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8506	}
8507      else
8508	{
8509	  tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8510					    vcall_offset, -(vcall_offset < 0));
8511          emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8512	  lo = 0;
8513	}
8514      if (lo)
8515	tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8516      else
8517	tmp2 = tmp;
8518      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8519
8520      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8521    }
8522
8523  /* Generate a tail call to the target function.  */
8524  if (! TREE_USED (function))
8525    {
8526      assemble_external (function);
8527      TREE_USED (function) = 1;
8528    }
8529  funexp = XEXP (DECL_RTL (function), 0);
8530  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8531  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8532  SIBLING_CALL_P (insn) = 1;
8533
8534  /* Run just enough of rest_of_compilation to get the insns emitted.
8535     There's not really enough bulk here to make other passes such as
8536     instruction scheduling worth while.  Note that use_thunk calls
8537     assemble_start_function and assemble_end_function.  */
8538  insn = get_insns ();
8539  shorten_branches (insn);
8540  final_start_function (insn, file, 1);
8541  final (insn, file, 1);
8542  final_end_function ();
8543}
8544#endif /* TARGET_ABI_OSF */
8545
8546/* Debugging support.  */
8547
8548#include "gstab.h"
8549
8550/* Name of the file containing the current function.  */
8551
8552static const char *current_function_file = "";
8553
8554/* Offsets to alpha virtual arg/local debugging pointers.  */
8555
8556long alpha_arg_offset;
8557long alpha_auto_offset;
8558
8559/* Emit a new filename to a stream.  */
8560
8561void
8562alpha_output_filename (FILE *stream, const char *name)
8563{
8564  static int first_time = TRUE;
8565
8566  if (first_time)
8567    {
8568      first_time = FALSE;
8569      ++num_source_filenames;
8570      current_function_file = name;
8571      fprintf (stream, "\t.file\t%d ", num_source_filenames);
8572      output_quoted_string (stream, name);
8573      fprintf (stream, "\n");
8574    }
8575
8576  else if (name != current_function_file
8577	   && strcmp (name, current_function_file) != 0)
8578    {
8579      ++num_source_filenames;
8580      current_function_file = name;
8581      fprintf (stream, "\t.file\t%d ", num_source_filenames);
8582
8583      output_quoted_string (stream, name);
8584      fprintf (stream, "\n");
8585    }
8586}
8587
8588/* Structure to show the current status of registers and memory.  */
8589
8590struct shadow_summary
8591{
8592  struct {
8593    unsigned int i     : 31;	/* Mask of int regs */
8594    unsigned int fp    : 31;	/* Mask of fp regs */
8595    unsigned int mem   :  1;	/* mem == imem | fpmem */
8596  } used, defd;
8597};
8598
8599/* Summary the effects of expression X on the machine.  Update SUM, a pointer
8600   to the summary structure.  SET is nonzero if the insn is setting the
8601   object, otherwise zero.  */
8602
8603static void
8604summarize_insn (rtx x, struct shadow_summary *sum, int set)
8605{
8606  const char *format_ptr;
8607  int i, j;
8608
8609  if (x == 0)
8610    return;
8611
8612  switch (GET_CODE (x))
8613    {
8614      /* ??? Note that this case would be incorrect if the Alpha had a
8615	 ZERO_EXTRACT in SET_DEST.  */
8616    case SET:
8617      summarize_insn (SET_SRC (x), sum, 0);
8618      summarize_insn (SET_DEST (x), sum, 1);
8619      break;
8620
8621    case CLOBBER:
8622      summarize_insn (XEXP (x, 0), sum, 1);
8623      break;
8624
8625    case USE:
8626      summarize_insn (XEXP (x, 0), sum, 0);
8627      break;
8628
8629    case ASM_OPERANDS:
8630      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8631	summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8632      break;
8633
8634    case PARALLEL:
8635      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8636	summarize_insn (XVECEXP (x, 0, i), sum, 0);
8637      break;
8638
8639    case SUBREG:
8640      summarize_insn (SUBREG_REG (x), sum, 0);
8641      break;
8642
8643    case REG:
8644      {
8645	int regno = REGNO (x);
8646	unsigned long mask = ((unsigned long) 1) << (regno % 32);
8647
8648	if (regno == 31 || regno == 63)
8649	  break;
8650
8651	if (set)
8652	  {
8653	    if (regno < 32)
8654	      sum->defd.i |= mask;
8655	    else
8656	      sum->defd.fp |= mask;
8657	  }
8658	else
8659	  {
8660	    if (regno < 32)
8661	      sum->used.i  |= mask;
8662	    else
8663	      sum->used.fp |= mask;
8664	  }
8665	}
8666      break;
8667
8668    case MEM:
8669      if (set)
8670	sum->defd.mem = 1;
8671      else
8672	sum->used.mem = 1;
8673
8674      /* Find the regs used in memory address computation: */
8675      summarize_insn (XEXP (x, 0), sum, 0);
8676      break;
8677
8678    case CONST_INT:   case CONST_DOUBLE:
8679    case SYMBOL_REF:  case LABEL_REF:     case CONST:
8680    case SCRATCH:     case ASM_INPUT:
8681      break;
8682
8683      /* Handle common unary and binary ops for efficiency.  */
8684    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
8685    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
8686    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
8687    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
8688    case NE:       case EQ:      case GE:      case GT:        case LE:
8689    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
8690      summarize_insn (XEXP (x, 0), sum, 0);
8691      summarize_insn (XEXP (x, 1), sum, 0);
8692      break;
8693
8694    case NEG:  case NOT:  case SIGN_EXTEND:  case ZERO_EXTEND:
8695    case TRUNCATE:  case FLOAT_EXTEND:  case FLOAT_TRUNCATE:  case FLOAT:
8696    case FIX:  case UNSIGNED_FLOAT:  case UNSIGNED_FIX:  case ABS:
8697    case SQRT:  case FFS:
8698      summarize_insn (XEXP (x, 0), sum, 0);
8699      break;
8700
8701    default:
8702      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8703      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8704	switch (format_ptr[i])
8705	  {
8706	  case 'e':
8707	    summarize_insn (XEXP (x, i), sum, 0);
8708	    break;
8709
8710	  case 'E':
8711	    for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8712	      summarize_insn (XVECEXP (x, i, j), sum, 0);
8713	    break;
8714
8715	  case 'i':
8716	    break;
8717
8718	  default:
8719	    gcc_unreachable ();
8720	  }
8721    }
8722}
8723
8724/* Ensure a sufficient number of `trapb' insns are in the code when
8725   the user requests code with a trap precision of functions or
8726   instructions.
8727
8728   In naive mode, when the user requests a trap-precision of
8729   "instruction", a trapb is needed after every instruction that may
8730   generate a trap.  This ensures that the code is resumption safe but
8731   it is also slow.
8732
8733   When optimizations are turned on, we delay issuing a trapb as long
8734   as possible.  In this context, a trap shadow is the sequence of
8735   instructions that starts with a (potentially) trap generating
8736   instruction and extends to the next trapb or call_pal instruction
8737   (but GCC never generates call_pal by itself).  We can delay (and
8738   therefore sometimes omit) a trapb subject to the following
8739   conditions:
8740
8741   (a) On entry to the trap shadow, if any Alpha register or memory
8742   location contains a value that is used as an operand value by some
8743   instruction in the trap shadow (live on entry), then no instruction
8744   in the trap shadow may modify the register or memory location.
8745
8746   (b) Within the trap shadow, the computation of the base register
8747   for a memory load or store instruction may not involve using the
8748   result of an instruction that might generate an UNPREDICTABLE
8749   result.
8750
8751   (c) Within the trap shadow, no register may be used more than once
8752   as a destination register.  (This is to make life easier for the
8753   trap-handler.)
8754
8755   (d) The trap shadow may not include any branch instructions.  */
8756
8757static void
8758alpha_handle_trap_shadows (void)
8759{
8760  struct shadow_summary shadow;
8761  int trap_pending, exception_nesting;
8762  rtx_insn *i, *n;
8763
8764  trap_pending = 0;
8765  exception_nesting = 0;
8766  shadow.used.i = 0;
8767  shadow.used.fp = 0;
8768  shadow.used.mem = 0;
8769  shadow.defd = shadow.used;
8770
8771  for (i = get_insns (); i ; i = NEXT_INSN (i))
8772    {
8773      if (NOTE_P (i))
8774	{
8775	  switch (NOTE_KIND (i))
8776	    {
8777	    case NOTE_INSN_EH_REGION_BEG:
8778	      exception_nesting++;
8779	      if (trap_pending)
8780		goto close_shadow;
8781	      break;
8782
8783	    case NOTE_INSN_EH_REGION_END:
8784	      exception_nesting--;
8785	      if (trap_pending)
8786		goto close_shadow;
8787	      break;
8788
8789	    case NOTE_INSN_EPILOGUE_BEG:
8790	      if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8791		goto close_shadow;
8792	      break;
8793	    }
8794	}
8795      else if (trap_pending)
8796	{
8797	  if (alpha_tp == ALPHA_TP_FUNC)
8798	    {
8799	      if (JUMP_P (i)
8800		  && GET_CODE (PATTERN (i)) == RETURN)
8801		goto close_shadow;
8802	    }
8803	  else if (alpha_tp == ALPHA_TP_INSN)
8804	    {
8805	      if (optimize > 0)
8806		{
8807		  struct shadow_summary sum;
8808
8809		  sum.used.i = 0;
8810		  sum.used.fp = 0;
8811		  sum.used.mem = 0;
8812		  sum.defd = sum.used;
8813
8814		  switch (GET_CODE (i))
8815		    {
8816		    case INSN:
8817		      /* Annoyingly, get_attr_trap will die on these.  */
8818		      if (GET_CODE (PATTERN (i)) == USE
8819			  || GET_CODE (PATTERN (i)) == CLOBBER)
8820			break;
8821
8822		      summarize_insn (PATTERN (i), &sum, 0);
8823
8824		      if ((sum.defd.i & shadow.defd.i)
8825			  || (sum.defd.fp & shadow.defd.fp))
8826			{
8827			  /* (c) would be violated */
8828			  goto close_shadow;
8829			}
8830
8831		      /* Combine shadow with summary of current insn: */
8832		      shadow.used.i   |= sum.used.i;
8833		      shadow.used.fp  |= sum.used.fp;
8834		      shadow.used.mem |= sum.used.mem;
8835		      shadow.defd.i   |= sum.defd.i;
8836		      shadow.defd.fp  |= sum.defd.fp;
8837		      shadow.defd.mem |= sum.defd.mem;
8838
8839		      if ((sum.defd.i & shadow.used.i)
8840			  || (sum.defd.fp & shadow.used.fp)
8841			  || (sum.defd.mem & shadow.used.mem))
8842			{
8843			  /* (a) would be violated (also takes care of (b))  */
8844			  gcc_assert (get_attr_trap (i) != TRAP_YES
8845				      || (!(sum.defd.i & sum.used.i)
8846					  && !(sum.defd.fp & sum.used.fp)));
8847
8848			  goto close_shadow;
8849			}
8850		      break;
8851
8852		    case BARRIER:
8853		      /* __builtin_unreachable can expand to no code at all,
8854			 leaving (barrier) RTXes in the instruction stream.  */
8855		      goto close_shadow_notrapb;
8856
8857		    case JUMP_INSN:
8858		    case CALL_INSN:
8859		    case CODE_LABEL:
8860		      goto close_shadow;
8861
8862		    default:
8863		      gcc_unreachable ();
8864		    }
8865		}
8866	      else
8867		{
8868		close_shadow:
8869		  n = emit_insn_before (gen_trapb (), i);
8870		  PUT_MODE (n, TImode);
8871		  PUT_MODE (i, TImode);
8872		close_shadow_notrapb:
8873		  trap_pending = 0;
8874		  shadow.used.i = 0;
8875		  shadow.used.fp = 0;
8876		  shadow.used.mem = 0;
8877		  shadow.defd = shadow.used;
8878		}
8879	    }
8880	}
8881
8882      if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8883	  && NONJUMP_INSN_P (i)
8884	  && GET_CODE (PATTERN (i)) != USE
8885	  && GET_CODE (PATTERN (i)) != CLOBBER
8886	  && get_attr_trap (i) == TRAP_YES)
8887	{
8888	  if (optimize && !trap_pending)
8889	    summarize_insn (PATTERN (i), &shadow, 0);
8890	  trap_pending = 1;
8891	}
8892    }
8893}
8894
8895/* Alpha can only issue instruction groups simultaneously if they are
8896   suitably aligned.  This is very processor-specific.  */
8897/* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8898   that are marked "fake".  These instructions do not exist on that target,
8899   but it is possible to see these insns with deranged combinations of
8900   command-line options, such as "-mtune=ev4 -mmax".  Instead of aborting,
8901   choose a result at random.  */
8902
8903enum alphaev4_pipe {
8904  EV4_STOP = 0,
8905  EV4_IB0 = 1,
8906  EV4_IB1 = 2,
8907  EV4_IBX = 4
8908};
8909
8910enum alphaev5_pipe {
8911  EV5_STOP = 0,
8912  EV5_NONE = 1,
8913  EV5_E01 = 2,
8914  EV5_E0 = 4,
8915  EV5_E1 = 8,
8916  EV5_FAM = 16,
8917  EV5_FA = 32,
8918  EV5_FM = 64
8919};
8920
8921static enum alphaev4_pipe
8922alphaev4_insn_pipe (rtx_insn *insn)
8923{
8924  if (recog_memoized (insn) < 0)
8925    return EV4_STOP;
8926  if (get_attr_length (insn) != 4)
8927    return EV4_STOP;
8928
8929  switch (get_attr_type (insn))
8930    {
8931    case TYPE_ILD:
8932    case TYPE_LDSYM:
8933    case TYPE_FLD:
8934    case TYPE_LD_L:
8935      return EV4_IBX;
8936
8937    case TYPE_IADD:
8938    case TYPE_ILOG:
8939    case TYPE_ICMOV:
8940    case TYPE_ICMP:
8941    case TYPE_FST:
8942    case TYPE_SHIFT:
8943    case TYPE_IMUL:
8944    case TYPE_FBR:
8945    case TYPE_MVI:		/* fake */
8946      return EV4_IB0;
8947
8948    case TYPE_IST:
8949    case TYPE_MISC:
8950    case TYPE_IBR:
8951    case TYPE_JSR:
8952    case TYPE_CALLPAL:
8953    case TYPE_FCPYS:
8954    case TYPE_FCMOV:
8955    case TYPE_FADD:
8956    case TYPE_FDIV:
8957    case TYPE_FMUL:
8958    case TYPE_ST_C:
8959    case TYPE_MB:
8960    case TYPE_FSQRT:		/* fake */
8961    case TYPE_FTOI:		/* fake */
8962    case TYPE_ITOF:		/* fake */
8963      return EV4_IB1;
8964
8965    default:
8966      gcc_unreachable ();
8967    }
8968}
8969
8970static enum alphaev5_pipe
8971alphaev5_insn_pipe (rtx_insn *insn)
8972{
8973  if (recog_memoized (insn) < 0)
8974    return EV5_STOP;
8975  if (get_attr_length (insn) != 4)
8976    return EV5_STOP;
8977
8978  switch (get_attr_type (insn))
8979    {
8980    case TYPE_ILD:
8981    case TYPE_FLD:
8982    case TYPE_LDSYM:
8983    case TYPE_IADD:
8984    case TYPE_ILOG:
8985    case TYPE_ICMOV:
8986    case TYPE_ICMP:
8987      return EV5_E01;
8988
8989    case TYPE_IST:
8990    case TYPE_FST:
8991    case TYPE_SHIFT:
8992    case TYPE_IMUL:
8993    case TYPE_MISC:
8994    case TYPE_MVI:
8995    case TYPE_LD_L:
8996    case TYPE_ST_C:
8997    case TYPE_MB:
8998    case TYPE_FTOI:		/* fake */
8999    case TYPE_ITOF:		/* fake */
9000      return EV5_E0;
9001
9002    case TYPE_IBR:
9003    case TYPE_JSR:
9004    case TYPE_CALLPAL:
9005      return EV5_E1;
9006
9007    case TYPE_FCPYS:
9008      return EV5_FAM;
9009
9010    case TYPE_FBR:
9011    case TYPE_FCMOV:
9012    case TYPE_FADD:
9013    case TYPE_FDIV:
9014    case TYPE_FSQRT:		/* fake */
9015      return EV5_FA;
9016
9017    case TYPE_FMUL:
9018      return EV5_FM;
9019
9020    default:
9021      gcc_unreachable ();
9022    }
9023}
9024
9025/* IN_USE is a mask of the slots currently filled within the insn group.
9026   The mask bits come from alphaev4_pipe above.  If EV4_IBX is set, then
9027   the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
9028
9029   LEN is, of course, the length of the group in bytes.  */
9030
9031static rtx_insn *
9032alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen)
9033{
9034  int len, in_use;
9035
9036  len = in_use = 0;
9037
9038  if (! INSN_P (insn)
9039      || GET_CODE (PATTERN (insn)) == CLOBBER
9040      || GET_CODE (PATTERN (insn)) == USE)
9041    goto next_and_done;
9042
9043  while (1)
9044    {
9045      enum alphaev4_pipe pipe;
9046
9047      pipe = alphaev4_insn_pipe (insn);
9048      switch (pipe)
9049	{
9050	case EV4_STOP:
9051	  /* Force complex instructions to start new groups.  */
9052	  if (in_use)
9053	    goto done;
9054
9055	  /* If this is a completely unrecognized insn, it's an asm.
9056	     We don't know how long it is, so record length as -1 to
9057	     signal a needed realignment.  */
9058	  if (recog_memoized (insn) < 0)
9059	    len = -1;
9060	  else
9061	    len = get_attr_length (insn);
9062	  goto next_and_done;
9063
9064	case EV4_IBX:
9065	  if (in_use & EV4_IB0)
9066	    {
9067	      if (in_use & EV4_IB1)
9068		goto done;
9069	      in_use |= EV4_IB1;
9070	    }
9071	  else
9072	    in_use |= EV4_IB0 | EV4_IBX;
9073	  break;
9074
9075	case EV4_IB0:
9076	  if (in_use & EV4_IB0)
9077	    {
9078	      if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
9079		goto done;
9080	      in_use |= EV4_IB1;
9081	    }
9082	  in_use |= EV4_IB0;
9083	  break;
9084
9085	case EV4_IB1:
9086	  if (in_use & EV4_IB1)
9087	    goto done;
9088	  in_use |= EV4_IB1;
9089	  break;
9090
9091	default:
9092	  gcc_unreachable ();
9093	}
9094      len += 4;
9095
9096      /* Haifa doesn't do well scheduling branches.  */
9097      if (JUMP_P (insn))
9098	goto next_and_done;
9099
9100    next:
9101      insn = next_nonnote_insn (insn);
9102
9103      if (!insn || ! INSN_P (insn))
9104	goto done;
9105
9106      /* Let Haifa tell us where it thinks insn group boundaries are.  */
9107      if (GET_MODE (insn) == TImode)
9108	goto done;
9109
9110      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9111	goto next;
9112    }
9113
9114 next_and_done:
9115  insn = next_nonnote_insn (insn);
9116
9117 done:
9118  *plen = len;
9119  *pin_use = in_use;
9120  return insn;
9121}
9122
9123/* IN_USE is a mask of the slots currently filled within the insn group.
9124   The mask bits come from alphaev5_pipe above.  If EV5_E01 is set, then
9125   the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9126
9127   LEN is, of course, the length of the group in bytes.  */
9128
9129static rtx_insn *
9130alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen)
9131{
9132  int len, in_use;
9133
9134  len = in_use = 0;
9135
9136  if (! INSN_P (insn)
9137      || GET_CODE (PATTERN (insn)) == CLOBBER
9138      || GET_CODE (PATTERN (insn)) == USE)
9139    goto next_and_done;
9140
9141  while (1)
9142    {
9143      enum alphaev5_pipe pipe;
9144
9145      pipe = alphaev5_insn_pipe (insn);
9146      switch (pipe)
9147	{
9148	case EV5_STOP:
9149	  /* Force complex instructions to start new groups.  */
9150	  if (in_use)
9151	    goto done;
9152
9153	  /* If this is a completely unrecognized insn, it's an asm.
9154	     We don't know how long it is, so record length as -1 to
9155	     signal a needed realignment.  */
9156	  if (recog_memoized (insn) < 0)
9157	    len = -1;
9158	  else
9159	    len = get_attr_length (insn);
9160	  goto next_and_done;
9161
9162	/* ??? Most of the places below, we would like to assert never
9163	   happen, as it would indicate an error either in Haifa, or
9164	   in the scheduling description.  Unfortunately, Haifa never
9165	   schedules the last instruction of the BB, so we don't have
9166	   an accurate TI bit to go off.  */
9167	case EV5_E01:
9168	  if (in_use & EV5_E0)
9169	    {
9170	      if (in_use & EV5_E1)
9171		goto done;
9172	      in_use |= EV5_E1;
9173	    }
9174	  else
9175	    in_use |= EV5_E0 | EV5_E01;
9176	  break;
9177
9178	case EV5_E0:
9179	  if (in_use & EV5_E0)
9180	    {
9181	      if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9182		goto done;
9183	      in_use |= EV5_E1;
9184	    }
9185	  in_use |= EV5_E0;
9186	  break;
9187
9188	case EV5_E1:
9189	  if (in_use & EV5_E1)
9190	    goto done;
9191	  in_use |= EV5_E1;
9192	  break;
9193
9194	case EV5_FAM:
9195	  if (in_use & EV5_FA)
9196	    {
9197	      if (in_use & EV5_FM)
9198		goto done;
9199	      in_use |= EV5_FM;
9200	    }
9201	  else
9202	    in_use |= EV5_FA | EV5_FAM;
9203	  break;
9204
9205	case EV5_FA:
9206	  if (in_use & EV5_FA)
9207	    goto done;
9208	  in_use |= EV5_FA;
9209	  break;
9210
9211	case EV5_FM:
9212	  if (in_use & EV5_FM)
9213	    goto done;
9214	  in_use |= EV5_FM;
9215	  break;
9216
9217	case EV5_NONE:
9218	  break;
9219
9220	default:
9221	  gcc_unreachable ();
9222	}
9223      len += 4;
9224
9225      /* Haifa doesn't do well scheduling branches.  */
9226      /* ??? If this is predicted not-taken, slotting continues, except
9227	 that no more IBR, FBR, or JSR insns may be slotted.  */
9228      if (JUMP_P (insn))
9229	goto next_and_done;
9230
9231    next:
9232      insn = next_nonnote_insn (insn);
9233
9234      if (!insn || ! INSN_P (insn))
9235	goto done;
9236
9237      /* Let Haifa tell us where it thinks insn group boundaries are.  */
9238      if (GET_MODE (insn) == TImode)
9239	goto done;
9240
9241      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9242	goto next;
9243    }
9244
9245 next_and_done:
9246  insn = next_nonnote_insn (insn);
9247
9248 done:
9249  *plen = len;
9250  *pin_use = in_use;
9251  return insn;
9252}
9253
9254static rtx
9255alphaev4_next_nop (int *pin_use)
9256{
9257  int in_use = *pin_use;
9258  rtx nop;
9259
9260  if (!(in_use & EV4_IB0))
9261    {
9262      in_use |= EV4_IB0;
9263      nop = gen_nop ();
9264    }
9265  else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9266    {
9267      in_use |= EV4_IB1;
9268      nop = gen_nop ();
9269    }
9270  else if (TARGET_FP && !(in_use & EV4_IB1))
9271    {
9272      in_use |= EV4_IB1;
9273      nop = gen_fnop ();
9274    }
9275  else
9276    nop = gen_unop ();
9277
9278  *pin_use = in_use;
9279  return nop;
9280}
9281
9282static rtx
9283alphaev5_next_nop (int *pin_use)
9284{
9285  int in_use = *pin_use;
9286  rtx nop;
9287
9288  if (!(in_use & EV5_E1))
9289    {
9290      in_use |= EV5_E1;
9291      nop = gen_nop ();
9292    }
9293  else if (TARGET_FP && !(in_use & EV5_FA))
9294    {
9295      in_use |= EV5_FA;
9296      nop = gen_fnop ();
9297    }
9298  else if (TARGET_FP && !(in_use & EV5_FM))
9299    {
9300      in_use |= EV5_FM;
9301      nop = gen_fnop ();
9302    }
9303  else
9304    nop = gen_unop ();
9305
9306  *pin_use = in_use;
9307  return nop;
9308}
9309
9310/* The instruction group alignment main loop.  */
9311
9312static void
9313alpha_align_insns_1 (unsigned int max_align,
9314		     rtx_insn *(*next_group) (rtx_insn *, int *, int *),
9315		     rtx (*next_nop) (int *))
9316{
9317  /* ALIGN is the known alignment for the insn group.  */
9318  unsigned int align;
9319  /* OFS is the offset of the current insn in the insn group.  */
9320  int ofs;
9321  int prev_in_use, in_use, len, ldgp;
9322  rtx_insn *i, *next;
9323
9324  /* Let shorten branches care for assigning alignments to code labels.  */
9325  shorten_branches (get_insns ());
9326
9327  if (align_functions < 4)
9328    align = 4;
9329  else if ((unsigned int) align_functions < max_align)
9330    align = align_functions;
9331  else
9332    align = max_align;
9333
9334  ofs = prev_in_use = 0;
9335  i = get_insns ();
9336  if (NOTE_P (i))
9337    i = next_nonnote_insn (i);
9338
9339  ldgp = alpha_function_needs_gp ? 8 : 0;
9340
9341  while (i)
9342    {
9343      next = (*next_group) (i, &in_use, &len);
9344
9345      /* When we see a label, resync alignment etc.  */
9346      if (LABEL_P (i))
9347	{
9348	  unsigned int new_align = 1 << label_to_alignment (i);
9349
9350	  if (new_align >= align)
9351	    {
9352	      align = new_align < max_align ? new_align : max_align;
9353	      ofs = 0;
9354	    }
9355
9356	  else if (ofs & (new_align-1))
9357	    ofs = (ofs | (new_align-1)) + 1;
9358	  gcc_assert (!len);
9359	}
9360
9361      /* Handle complex instructions special.  */
9362      else if (in_use == 0)
9363	{
9364	  /* Asms will have length < 0.  This is a signal that we have
9365	     lost alignment knowledge.  Assume, however, that the asm
9366	     will not mis-align instructions.  */
9367	  if (len < 0)
9368	    {
9369	      ofs = 0;
9370	      align = 4;
9371	      len = 0;
9372	    }
9373	}
9374
9375      /* If the known alignment is smaller than the recognized insn group,
9376	 realign the output.  */
9377      else if ((int) align < len)
9378	{
9379	  unsigned int new_log_align = len > 8 ? 4 : 3;
9380	  rtx_insn *prev, *where;
9381
9382	  where = prev = prev_nonnote_insn (i);
9383	  if (!where || !LABEL_P (where))
9384	    where = i;
9385
9386	  /* Can't realign between a call and its gp reload.  */
9387	  if (! (TARGET_EXPLICIT_RELOCS
9388		 && prev && CALL_P (prev)))
9389	    {
9390	      emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9391	      align = 1 << new_log_align;
9392	      ofs = 0;
9393	    }
9394	}
9395
9396      /* We may not insert padding inside the initial ldgp sequence.  */
9397      else if (ldgp > 0)
9398	ldgp -= len;
9399
9400      /* If the group won't fit in the same INT16 as the previous,
9401	 we need to add padding to keep the group together.  Rather
9402	 than simply leaving the insn filling to the assembler, we
9403	 can make use of the knowledge of what sorts of instructions
9404	 were issued in the previous group to make sure that all of
9405	 the added nops are really free.  */
9406      else if (ofs + len > (int) align)
9407	{
9408	  int nop_count = (align - ofs) / 4;
9409	  rtx_insn *where;
9410
9411	  /* Insert nops before labels, branches, and calls to truly merge
9412	     the execution of the nops with the previous instruction group.  */
9413	  where = prev_nonnote_insn (i);
9414	  if (where)
9415	    {
9416	      if (LABEL_P (where))
9417		{
9418		  rtx_insn *where2 = prev_nonnote_insn (where);
9419		  if (where2 && JUMP_P (where2))
9420		    where = where2;
9421		}
9422	      else if (NONJUMP_INSN_P (where))
9423		where = i;
9424	    }
9425	  else
9426	    where = i;
9427
9428	  do
9429	    emit_insn_before ((*next_nop)(&prev_in_use), where);
9430	  while (--nop_count);
9431	  ofs = 0;
9432	}
9433
9434      ofs = (ofs + len) & (align - 1);
9435      prev_in_use = in_use;
9436      i = next;
9437    }
9438}
9439
9440static void
9441alpha_align_insns (void)
9442{
9443  if (alpha_tune == PROCESSOR_EV4)
9444    alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop);
9445  else if (alpha_tune == PROCESSOR_EV5)
9446    alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop);
9447  else
9448    gcc_unreachable ();
9449}
9450
9451/* Insert an unop between sibcall or noreturn function call and GP load.  */
9452
9453static void
9454alpha_pad_function_end (void)
9455{
9456  rtx_insn *insn, *next;
9457
9458  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9459    {
9460      if (!CALL_P (insn)
9461	  || !(SIBLING_CALL_P (insn)
9462	       || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9463        continue;
9464
9465      /* Make sure we do not split a call and its corresponding
9466	 CALL_ARG_LOCATION note.  */
9467      next = NEXT_INSN (insn);
9468      if (next == NULL)
9469	continue;
9470      if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
9471	insn = next;
9472
9473      next = next_active_insn (insn);
9474      if (next)
9475	{
9476	  rtx pat = PATTERN (next);
9477
9478	  if (GET_CODE (pat) == SET
9479	      && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9480	      && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9481	    emit_insn_after (gen_unop (), insn);
9482	}
9483    }
9484}
9485
9486/* Machine dependent reorg pass.  */
9487
9488static void
9489alpha_reorg (void)
9490{
9491  /* Workaround for a linker error that triggers when an exception
9492     handler immediatelly follows a sibcall or a noreturn function.
9493
9494In the sibcall case:
9495
9496     The instruction stream from an object file:
9497
9498 1d8:   00 00 fb 6b     jmp     (t12)
9499 1dc:   00 00 ba 27     ldah    gp,0(ra)
9500 1e0:   00 00 bd 23     lda     gp,0(gp)
9501 1e4:   00 00 7d a7     ldq     t12,0(gp)
9502 1e8:   00 40 5b 6b     jsr     ra,(t12),1ec <__funcZ+0x1ec>
9503
9504     was converted in the final link pass to:
9505
9506   12003aa88:   67 fa ff c3     br      120039428 <...>
9507   12003aa8c:   00 00 fe 2f     unop
9508   12003aa90:   00 00 fe 2f     unop
9509   12003aa94:   48 83 7d a7     ldq     t12,-31928(gp)
9510   12003aa98:   00 40 5b 6b     jsr     ra,(t12),12003aa9c <__func+0x1ec>
9511
9512And in the noreturn case:
9513
9514     The instruction stream from an object file:
9515
9516  54:   00 40 5b 6b     jsr     ra,(t12),58 <__func+0x58>
9517  58:   00 00 ba 27     ldah    gp,0(ra)
9518  5c:   00 00 bd 23     lda     gp,0(gp)
9519  60:   00 00 7d a7     ldq     t12,0(gp)
9520  64:   00 40 5b 6b     jsr     ra,(t12),68 <__func+0x68>
9521
9522     was converted in the final link pass to:
9523
9524   fdb24:       a0 03 40 d3     bsr     ra,fe9a8 <_called_func+0x8>
9525   fdb28:       00 00 fe 2f     unop
9526   fdb2c:       00 00 fe 2f     unop
9527   fdb30:       30 82 7d a7     ldq     t12,-32208(gp)
9528   fdb34:       00 40 5b 6b     jsr     ra,(t12),fdb38 <__func+0x68>
9529
9530     GP load instructions were wrongly cleared by the linker relaxation
9531     pass.  This workaround prevents removal of GP loads by inserting
9532     an unop instruction between a sibcall or noreturn function call and
9533     exception handler prologue.  */
9534
9535  if (current_function_has_exception_handlers ())
9536    alpha_pad_function_end ();
9537}
9538
9539static void
9540alpha_file_start (void)
9541{
9542  default_file_start ();
9543
9544  fputs ("\t.set noreorder\n", asm_out_file);
9545  fputs ("\t.set volatile\n", asm_out_file);
9546  if (TARGET_ABI_OSF)
9547    fputs ("\t.set noat\n", asm_out_file);
9548  if (TARGET_EXPLICIT_RELOCS)
9549    fputs ("\t.set nomacro\n", asm_out_file);
9550  if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9551    {
9552      const char *arch;
9553
9554      if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9555	arch = "ev6";
9556      else if (TARGET_MAX)
9557	arch = "pca56";
9558      else if (TARGET_BWX)
9559	arch = "ev56";
9560      else if (alpha_cpu == PROCESSOR_EV5)
9561	arch = "ev5";
9562      else
9563	arch = "ev4";
9564
9565      fprintf (asm_out_file, "\t.arch %s\n", arch);
9566    }
9567}
9568
9569/* Since we don't have a .dynbss section, we should not allow global
9570   relocations in the .rodata section.  */
9571
9572static int
9573alpha_elf_reloc_rw_mask (void)
9574{
9575  return flag_pic ? 3 : 2;
9576}
9577
9578/* Return a section for X.  The only special thing we do here is to
9579   honor small data.  */
9580
9581static section *
9582alpha_elf_select_rtx_section (machine_mode mode, rtx x,
9583			      unsigned HOST_WIDE_INT align)
9584{
9585  if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9586    /* ??? Consider using mergeable sdata sections.  */
9587    return sdata_section;
9588  else
9589    return default_elf_select_rtx_section (mode, x, align);
9590}
9591
9592static unsigned int
9593alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9594{
9595  unsigned int flags = 0;
9596
9597  if (strcmp (name, ".sdata") == 0
9598      || strncmp (name, ".sdata.", 7) == 0
9599      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9600      || strcmp (name, ".sbss") == 0
9601      || strncmp (name, ".sbss.", 6) == 0
9602      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9603    flags = SECTION_SMALL;
9604
9605  flags |= default_section_type_flags (decl, name, reloc);
9606  return flags;
9607}
9608
9609/* Structure to collect function names for final output in link section.  */
9610/* Note that items marked with GTY can't be ifdef'ed out.  */
9611
9612enum reloc_kind
9613{
9614  KIND_LINKAGE,
9615  KIND_CODEADDR
9616};
9617
9618struct GTY(()) alpha_links
9619{
9620  rtx func;
9621  rtx linkage;
9622  enum reloc_kind rkind;
9623};
9624
9625#if TARGET_ABI_OPEN_VMS
9626
9627/* Return the VMS argument type corresponding to MODE.  */
9628
9629enum avms_arg_type
9630alpha_arg_type (machine_mode mode)
9631{
9632  switch (mode)
9633    {
9634    case SFmode:
9635      return TARGET_FLOAT_VAX ? FF : FS;
9636    case DFmode:
9637      return TARGET_FLOAT_VAX ? FD : FT;
9638    default:
9639      return I64;
9640    }
9641}
9642
9643/* Return an rtx for an integer representing the VMS Argument Information
9644   register value.  */
9645
9646rtx
9647alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9648{
9649  unsigned HOST_WIDE_INT regval = cum.num_args;
9650  int i;
9651
9652  for (i = 0; i < 6; i++)
9653    regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9654
9655  return GEN_INT (regval);
9656}
9657
9658
9659/* Return a SYMBOL_REF representing the reference to the .linkage entry
9660   of function FUNC built for calls made from CFUNDECL.  LFLAG is 1 if
9661   this is the reference to the linkage pointer value, 0 if this is the
9662   reference to the function entry value.  RFLAG is 1 if this a reduced
9663   reference (code address only), 0 if this is a full reference.  */
9664
9665rtx
9666alpha_use_linkage (rtx func, bool lflag, bool rflag)
9667{
9668  struct alpha_links *al = NULL;
9669  const char *name = XSTR (func, 0);
9670
9671  if (cfun->machine->links)
9672    {
9673      /* Is this name already defined?  */
9674      alpha_links **slot = cfun->machine->links->get (name);
9675      if (slot)
9676	al = *slot;
9677    }
9678  else
9679    cfun->machine->links
9680      = hash_map<const char *, alpha_links *, string_traits>::create_ggc (64);
9681
9682  if (al == NULL)
9683    {
9684      size_t buf_len;
9685      char *linksym;
9686      tree id;
9687
9688      if (name[0] == '*')
9689	name++;
9690
9691      /* Follow transparent alias, as this is used for CRTL translations.  */
9692      id = maybe_get_identifier (name);
9693      if (id)
9694        {
9695          while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9696            id = TREE_CHAIN (id);
9697          name = IDENTIFIER_POINTER (id);
9698        }
9699
9700      buf_len = strlen (name) + 8 + 9;
9701      linksym = (char *) alloca (buf_len);
9702      snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9703
9704      al = ggc_alloc<alpha_links> ();
9705      al->func = func;
9706      al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9707
9708      cfun->machine->links->put (ggc_strdup (name), al);
9709    }
9710
9711  al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9712
9713  if (lflag)
9714    return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9715  else
9716    return al->linkage;
9717}
9718
9719static int
9720alpha_write_one_linkage (const char *name, alpha_links *link, FILE *stream)
9721{
9722  ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9723  if (link->rkind == KIND_CODEADDR)
9724    {
9725      /* External and used, request code address.  */
9726      fprintf (stream, "\t.code_address ");
9727    }
9728  else
9729    {
9730      if (!SYMBOL_REF_EXTERNAL_P (link->func)
9731          && SYMBOL_REF_LOCAL_P (link->func))
9732	{
9733	  /* Locally defined, build linkage pair.  */
9734	  fprintf (stream, "\t.quad %s..en\n", name);
9735	  fprintf (stream, "\t.quad ");
9736	}
9737      else
9738	{
9739	  /* External, request linkage pair.  */
9740	  fprintf (stream, "\t.linkage ");
9741	}
9742    }
9743  assemble_name (stream, name);
9744  fputs ("\n", stream);
9745
9746  return 0;
9747}
9748
9749static void
9750alpha_write_linkage (FILE *stream, const char *funname)
9751{
9752  fprintf (stream, "\t.link\n");
9753  fprintf (stream, "\t.align 3\n");
9754  in_section = NULL;
9755
9756#ifdef TARGET_VMS_CRASH_DEBUG
9757  fputs ("\t.name ", stream);
9758  assemble_name (stream, funname);
9759  fputs ("..na\n", stream);
9760#endif
9761
9762  ASM_OUTPUT_LABEL (stream, funname);
9763  fprintf (stream, "\t.pdesc ");
9764  assemble_name (stream, funname);
9765  fprintf (stream, "..en,%s\n",
9766	   alpha_procedure_type == PT_STACK ? "stack"
9767	   : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9768
9769  if (cfun->machine->links)
9770    {
9771      hash_map<const char *, alpha_links *, string_traits>::iterator iter
9772	= cfun->machine->links->begin ();
9773      for (; iter != cfun->machine->links->end (); ++iter)
9774	alpha_write_one_linkage ((*iter).first, (*iter).second, stream);
9775    }
9776}
9777
9778/* Switch to an arbitrary section NAME with attributes as specified
9779   by FLAGS.  ALIGN specifies any known alignment requirements for
9780   the section; 0 if the default should be used.  */
9781
9782static void
9783vms_asm_named_section (const char *name, unsigned int flags,
9784		       tree decl ATTRIBUTE_UNUSED)
9785{
9786  fputc ('\n', asm_out_file);
9787  fprintf (asm_out_file, ".section\t%s", name);
9788
9789  if (flags & SECTION_DEBUG)
9790    fprintf (asm_out_file, ",NOWRT");
9791
9792  fputc ('\n', asm_out_file);
9793}
9794
9795/* Record an element in the table of global constructors.  SYMBOL is
9796   a SYMBOL_REF of the function to be called; PRIORITY is a number
9797   between 0 and MAX_INIT_PRIORITY.
9798
9799   Differs from default_ctors_section_asm_out_constructor in that the
9800   width of the .ctors entry is always 64 bits, rather than the 32 bits
9801   used by a normal pointer.  */
9802
9803static void
9804vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9805{
9806  switch_to_section (ctors_section);
9807  assemble_align (BITS_PER_WORD);
9808  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9809}
9810
9811static void
9812vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9813{
9814  switch_to_section (dtors_section);
9815  assemble_align (BITS_PER_WORD);
9816  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9817}
9818#else
9819rtx
9820alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9821		   bool lflag ATTRIBUTE_UNUSED,
9822		   bool rflag ATTRIBUTE_UNUSED)
9823{
9824  return NULL_RTX;
9825}
9826
9827#endif /* TARGET_ABI_OPEN_VMS */
9828
9829static void
9830alpha_init_libfuncs (void)
9831{
9832  if (TARGET_ABI_OPEN_VMS)
9833    {
9834      /* Use the VMS runtime library functions for division and
9835	 remainder.  */
9836      set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9837      set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9838      set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9839      set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9840      set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9841      set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9842      set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9843      set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9844      abort_libfunc = init_one_libfunc ("decc$abort");
9845      memcmp_libfunc = init_one_libfunc ("decc$memcmp");
9846#ifdef MEM_LIBFUNCS_INIT
9847      MEM_LIBFUNCS_INIT;
9848#endif
9849    }
9850}
9851
9852/* On the Alpha, we use this to disable the floating-point registers
9853   when they don't exist.  */
9854
9855static void
9856alpha_conditional_register_usage (void)
9857{
9858  int i;
9859  if (! TARGET_FPREGS)
9860    for (i = 32; i < 63; i++)
9861      fixed_regs[i] = call_used_regs[i] = 1;
9862}
9863
9864/* Canonicalize a comparison from one we don't have to one we do have.  */
9865
9866static void
9867alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9868			       bool op0_preserve_value)
9869{
9870  if (!op0_preserve_value
9871      && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9872      && (REG_P (*op1) || *op1 == const0_rtx))
9873    {
9874      rtx tem = *op0;
9875      *op0 = *op1;
9876      *op1 = tem;
9877      *code = (int)swap_condition ((enum rtx_code)*code);
9878    }
9879
9880  if ((*code == LT || *code == LTU)
9881      && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9882    {
9883      *code = *code == LT ? LE : LEU;
9884      *op1 = GEN_INT (255);
9885    }
9886}
9887
9888/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
9889
9890static void
9891alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
9892{
9893  const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
9894
9895  tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
9896  tree new_fenv_var, reload_fenv, restore_fnenv;
9897  tree update_call, atomic_feraiseexcept, hold_fnclex;
9898
9899  /* Assume OSF/1 compatible interfaces.  */
9900  if (!TARGET_ABI_OSF)
9901    return;
9902
9903  /* Generate the equivalent of :
9904       unsigned long fenv_var;
9905       fenv_var = __ieee_get_fp_control ();
9906
9907       unsigned long masked_fenv;
9908       masked_fenv = fenv_var & mask;
9909
9910       __ieee_set_fp_control (masked_fenv);  */
9911
9912  fenv_var = create_tmp_var (long_unsigned_type_node);
9913  get_fpscr
9914    = build_fn_decl ("__ieee_get_fp_control",
9915		     build_function_type_list (long_unsigned_type_node, NULL));
9916  set_fpscr
9917    = build_fn_decl ("__ieee_set_fp_control",
9918		     build_function_type_list (void_type_node, NULL));
9919  mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
9920  ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node,
9921		    fenv_var, build_call_expr (get_fpscr, 0));
9922  masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
9923  hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
9924  *hold = build2 (COMPOUND_EXPR, void_type_node,
9925		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
9926		  hold_fnclex);
9927
9928  /* Store the value of masked_fenv to clear the exceptions:
9929     __ieee_set_fp_control (masked_fenv);  */
9930
9931  *clear = build_call_expr (set_fpscr, 1, masked_fenv);
9932
9933  /* Generate the equivalent of :
9934       unsigned long new_fenv_var;
9935       new_fenv_var = __ieee_get_fp_control ();
9936
9937       __ieee_set_fp_control (fenv_var);
9938
9939       __atomic_feraiseexcept (new_fenv_var);  */
9940
9941  new_fenv_var = create_tmp_var (long_unsigned_type_node);
9942  reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var,
9943			build_call_expr (get_fpscr, 0));
9944  restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
9945  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
9946  update_call
9947    = build_call_expr (atomic_feraiseexcept, 1,
9948		       fold_convert (integer_type_node, new_fenv_var));
9949  *update = build2 (COMPOUND_EXPR, void_type_node,
9950		    build2 (COMPOUND_EXPR, void_type_node,
9951			    reload_fenv, restore_fnenv), update_call);
9952}
9953
9954/* Initialize the GCC target structure.  */
9955#if TARGET_ABI_OPEN_VMS
9956# undef TARGET_ATTRIBUTE_TABLE
9957# define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9958# undef TARGET_CAN_ELIMINATE
9959# define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9960#endif
9961
9962#undef TARGET_IN_SMALL_DATA_P
9963#define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9964
9965#undef TARGET_ASM_ALIGNED_HI_OP
9966#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9967#undef TARGET_ASM_ALIGNED_DI_OP
9968#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9969
9970/* Default unaligned ops are provided for ELF systems.  To get unaligned
9971   data for non-ELF systems, we have to turn off auto alignment.  */
9972#if TARGET_ABI_OPEN_VMS
9973#undef TARGET_ASM_UNALIGNED_HI_OP
9974#define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9975#undef TARGET_ASM_UNALIGNED_SI_OP
9976#define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9977#undef TARGET_ASM_UNALIGNED_DI_OP
9978#define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9979#endif
9980
9981#undef  TARGET_ASM_RELOC_RW_MASK
9982#define TARGET_ASM_RELOC_RW_MASK  alpha_elf_reloc_rw_mask
9983#undef	TARGET_ASM_SELECT_RTX_SECTION
9984#define	TARGET_ASM_SELECT_RTX_SECTION  alpha_elf_select_rtx_section
9985#undef  TARGET_SECTION_TYPE_FLAGS
9986#define TARGET_SECTION_TYPE_FLAGS  alpha_elf_section_type_flags
9987
9988#undef TARGET_ASM_FUNCTION_END_PROLOGUE
9989#define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9990
9991#undef TARGET_INIT_LIBFUNCS
9992#define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9993
9994#undef TARGET_LEGITIMIZE_ADDRESS
9995#define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9996#undef TARGET_MODE_DEPENDENT_ADDRESS_P
9997#define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
9998
9999#undef TARGET_ASM_FILE_START
10000#define TARGET_ASM_FILE_START alpha_file_start
10001
10002#undef TARGET_SCHED_ADJUST_COST
10003#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
10004#undef TARGET_SCHED_ISSUE_RATE
10005#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
10006#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
10007#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
10008  alpha_multipass_dfa_lookahead
10009
10010#undef TARGET_HAVE_TLS
10011#define TARGET_HAVE_TLS HAVE_AS_TLS
10012
10013#undef  TARGET_BUILTIN_DECL
10014#define TARGET_BUILTIN_DECL  alpha_builtin_decl
10015#undef  TARGET_INIT_BUILTINS
10016#define TARGET_INIT_BUILTINS alpha_init_builtins
10017#undef  TARGET_EXPAND_BUILTIN
10018#define TARGET_EXPAND_BUILTIN alpha_expand_builtin
10019#undef  TARGET_FOLD_BUILTIN
10020#define TARGET_FOLD_BUILTIN alpha_fold_builtin
10021#undef  TARGET_GIMPLE_FOLD_BUILTIN
10022#define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin
10023
10024#undef TARGET_FUNCTION_OK_FOR_SIBCALL
10025#define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
10026#undef TARGET_CANNOT_COPY_INSN_P
10027#define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
10028#undef TARGET_LEGITIMATE_CONSTANT_P
10029#define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
10030#undef TARGET_CANNOT_FORCE_CONST_MEM
10031#define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
10032
10033#if TARGET_ABI_OSF
10034#undef TARGET_ASM_OUTPUT_MI_THUNK
10035#define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
10036#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10037#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
10038#undef TARGET_STDARG_OPTIMIZE_HOOK
10039#define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
10040#endif
10041
10042/* Use 16-bits anchor.  */
10043#undef TARGET_MIN_ANCHOR_OFFSET
10044#define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
10045#undef TARGET_MAX_ANCHOR_OFFSET
10046#define TARGET_MAX_ANCHOR_OFFSET 0x7fff
10047#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10048#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
10049
10050#undef TARGET_RTX_COSTS
10051#define TARGET_RTX_COSTS alpha_rtx_costs
10052#undef TARGET_ADDRESS_COST
10053#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
10054
10055#undef TARGET_MACHINE_DEPENDENT_REORG
10056#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
10057
10058#undef TARGET_PROMOTE_FUNCTION_MODE
10059#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
10060#undef TARGET_PROMOTE_PROTOTYPES
10061#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
10062#undef TARGET_RETURN_IN_MEMORY
10063#define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10064#undef TARGET_PASS_BY_REFERENCE
10065#define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10066#undef TARGET_SETUP_INCOMING_VARARGS
10067#define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10068#undef TARGET_STRICT_ARGUMENT_NAMING
10069#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10070#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10071#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10072#undef TARGET_SPLIT_COMPLEX_ARG
10073#define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10074#undef TARGET_GIMPLIFY_VA_ARG_EXPR
10075#define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10076#undef TARGET_ARG_PARTIAL_BYTES
10077#define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10078#undef TARGET_FUNCTION_ARG
10079#define TARGET_FUNCTION_ARG alpha_function_arg
10080#undef TARGET_FUNCTION_ARG_ADVANCE
10081#define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
10082#undef TARGET_TRAMPOLINE_INIT
10083#define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
10084
10085#undef TARGET_INSTANTIATE_DECLS
10086#define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
10087
10088#undef TARGET_SECONDARY_RELOAD
10089#define TARGET_SECONDARY_RELOAD alpha_secondary_reload
10090
10091#undef TARGET_SCALAR_MODE_SUPPORTED_P
10092#define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10093#undef TARGET_VECTOR_MODE_SUPPORTED_P
10094#define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10095
10096#undef TARGET_BUILD_BUILTIN_VA_LIST
10097#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10098
10099#undef TARGET_EXPAND_BUILTIN_VA_START
10100#define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
10101
10102/* The Alpha architecture does not require sequential consistency.  See
10103   http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
10104   for an example of how it can be violated in practice.  */
10105#undef TARGET_RELAXED_ORDERING
10106#define TARGET_RELAXED_ORDERING true
10107
10108#undef TARGET_OPTION_OVERRIDE
10109#define TARGET_OPTION_OVERRIDE alpha_option_override
10110
10111#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10112#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10113  alpha_override_options_after_change
10114
10115#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10116#undef TARGET_MANGLE_TYPE
10117#define TARGET_MANGLE_TYPE alpha_mangle_type
10118#endif
10119
10120#undef TARGET_LEGITIMATE_ADDRESS_P
10121#define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
10122
10123#undef TARGET_CONDITIONAL_REGISTER_USAGE
10124#define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
10125
10126#undef TARGET_CANONICALIZE_COMPARISON
10127#define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
10128
10129#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10130#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
10131
10132struct gcc_target targetm = TARGET_INITIALIZER;
10133
10134
10135#include "gt-alpha.h"
10136