alpha.c revision 1.9
1/* Subroutines used for code generation on the DEC Alpha.
2   Copyright (C) 1992-2017 Free Software Foundation, Inc.
3   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "memmodel.h"
30#include "gimple.h"
31#include "df.h"
32#include "tm_p.h"
33#include "ssa.h"
34#include "expmed.h"
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "diagnostic-core.h"
40#include "alias.h"
41#include "fold-const.h"
42#include "stor-layout.h"
43#include "calls.h"
44#include "varasm.h"
45#include "output.h"
46#include "insn-attr.h"
47#include "explow.h"
48#include "expr.h"
49#include "reload.h"
50#include "except.h"
51#include "common/common-target.h"
52#include "debug.h"
53#include "langhooks.h"
54#include "cfgrtl.h"
55#include "tree-pass.h"
56#include "context.h"
57#include "gimple-iterator.h"
58#include "gimplify.h"
59#include "tree-stdarg.h"
60#include "tm-constrs.h"
61#include "libfuncs.h"
62#include "params.h"
63#include "builtins.h"
64#include "rtl-iter.h"
65
66/* This file should be included last.  */
67#include "target-def.h"
68
69/* Specify which cpu to schedule for.  */
70enum processor_type alpha_tune;
71
72/* Which cpu we're generating code for.  */
73enum processor_type alpha_cpu;
74
75static const char * const alpha_cpu_name[] =
76{
77  "ev4", "ev5", "ev6"
78};
79
80/* Specify how accurate floating-point traps need to be.  */
81
82enum alpha_trap_precision alpha_tp;
83
84/* Specify the floating-point rounding mode.  */
85
86enum alpha_fp_rounding_mode alpha_fprm;
87
88/* Specify which things cause traps.  */
89
90enum alpha_fp_trap_mode alpha_fptm;
91
92/* Nonzero if inside of a function, because the Alpha asm can't
93   handle .files inside of functions.  */
94
95static int inside_function = FALSE;
96
97/* The number of cycles of latency we should assume on memory reads.  */
98
99static int alpha_memory_latency = 3;
100
101/* Whether the function needs the GP.  */
102
103static int alpha_function_needs_gp;
104
105/* The assembler name of the current function.  */
106
107static const char *alpha_fnname;
108
109/* The next explicit relocation sequence number.  */
110extern GTY(()) int alpha_next_sequence_number;
111int alpha_next_sequence_number = 1;
112
113/* The literal and gpdisp sequence numbers for this insn, as printed
114   by %# and %* respectively.  */
115extern GTY(()) int alpha_this_literal_sequence_number;
116extern GTY(()) int alpha_this_gpdisp_sequence_number;
117int alpha_this_literal_sequence_number;
118int alpha_this_gpdisp_sequence_number;
119
120/* Costs of various operations on the different architectures.  */
121
122struct alpha_rtx_cost_data
123{
124  unsigned char fp_add;
125  unsigned char fp_mult;
126  unsigned char fp_div_sf;
127  unsigned char fp_div_df;
128  unsigned char int_mult_si;
129  unsigned char int_mult_di;
130  unsigned char int_shift;
131  unsigned char int_cmov;
132  unsigned short int_div;
133};
134
135static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
136{
137  { /* EV4 */
138    COSTS_N_INSNS (6),		/* fp_add */
139    COSTS_N_INSNS (6),		/* fp_mult */
140    COSTS_N_INSNS (34),		/* fp_div_sf */
141    COSTS_N_INSNS (63),		/* fp_div_df */
142    COSTS_N_INSNS (23),		/* int_mult_si */
143    COSTS_N_INSNS (23),		/* int_mult_di */
144    COSTS_N_INSNS (2),		/* int_shift */
145    COSTS_N_INSNS (2),		/* int_cmov */
146    COSTS_N_INSNS (97),		/* int_div */
147  },
148  { /* EV5 */
149    COSTS_N_INSNS (4),		/* fp_add */
150    COSTS_N_INSNS (4),		/* fp_mult */
151    COSTS_N_INSNS (15),		/* fp_div_sf */
152    COSTS_N_INSNS (22),		/* fp_div_df */
153    COSTS_N_INSNS (8),		/* int_mult_si */
154    COSTS_N_INSNS (12),		/* int_mult_di */
155    COSTS_N_INSNS (1) + 1,	/* int_shift */
156    COSTS_N_INSNS (1),		/* int_cmov */
157    COSTS_N_INSNS (83),		/* int_div */
158  },
159  { /* EV6 */
160    COSTS_N_INSNS (4),		/* fp_add */
161    COSTS_N_INSNS (4),		/* fp_mult */
162    COSTS_N_INSNS (12),		/* fp_div_sf */
163    COSTS_N_INSNS (15),		/* fp_div_df */
164    COSTS_N_INSNS (7),		/* int_mult_si */
165    COSTS_N_INSNS (7),		/* int_mult_di */
166    COSTS_N_INSNS (1),		/* int_shift */
167    COSTS_N_INSNS (2),		/* int_cmov */
168    COSTS_N_INSNS (86),		/* int_div */
169  },
170};
171
172/* Similar but tuned for code size instead of execution latency.  The
173   extra +N is fractional cost tuning based on latency.  It's used to
174   encourage use of cheaper insns like shift, but only if there's just
175   one of them.  */
176
177static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
178{
179  COSTS_N_INSNS (1),		/* fp_add */
180  COSTS_N_INSNS (1),		/* fp_mult */
181  COSTS_N_INSNS (1),		/* fp_div_sf */
182  COSTS_N_INSNS (1) + 1,	/* fp_div_df */
183  COSTS_N_INSNS (1) + 1,	/* int_mult_si */
184  COSTS_N_INSNS (1) + 2,	/* int_mult_di */
185  COSTS_N_INSNS (1),		/* int_shift */
186  COSTS_N_INSNS (1),		/* int_cmov */
187  COSTS_N_INSNS (6),		/* int_div */
188};
189
190/* Get the number of args of a function in one of two ways.  */
191#if TARGET_ABI_OPEN_VMS
192#define NUM_ARGS crtl->args.info.num_args
193#else
194#define NUM_ARGS crtl->args.info
195#endif
196
197#define REG_PV 27
198#define REG_RA 26
199
200/* Declarations of static functions.  */
201static struct machine_function *alpha_init_machine_status (void);
202static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
203static void alpha_handle_trap_shadows (void);
204static void alpha_align_insns (void);
205static void alpha_override_options_after_change (void);
206
207#if TARGET_ABI_OPEN_VMS
208static void alpha_write_linkage (FILE *, const char *);
209static bool vms_valid_pointer_mode (machine_mode);
210#else
211#define vms_patch_builtins()  gcc_unreachable()
212#endif
213
214static unsigned int
215rest_of_handle_trap_shadows (void)
216{
217  alpha_handle_trap_shadows ();
218  return 0;
219}
220
221namespace {
222
223const pass_data pass_data_handle_trap_shadows =
224{
225  RTL_PASS,
226  "trap_shadows",			/* name */
227  OPTGROUP_NONE,			/* optinfo_flags */
228  TV_NONE,				/* tv_id */
229  0,					/* properties_required */
230  0,					/* properties_provided */
231  0,					/* properties_destroyed */
232  0,					/* todo_flags_start */
233  TODO_df_finish,			/* todo_flags_finish */
234};
235
236class pass_handle_trap_shadows : public rtl_opt_pass
237{
238public:
239  pass_handle_trap_shadows(gcc::context *ctxt)
240    : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
241  {}
242
243  /* opt_pass methods: */
244  virtual bool gate (function *)
245    {
246      return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
247    }
248
249  virtual unsigned int execute (function *)
250    {
251      return rest_of_handle_trap_shadows ();
252    }
253
254}; // class pass_handle_trap_shadows
255
256} // anon namespace
257
258rtl_opt_pass *
259make_pass_handle_trap_shadows (gcc::context *ctxt)
260{
261  return new pass_handle_trap_shadows (ctxt);
262}
263
264static unsigned int
265rest_of_align_insns (void)
266{
267  alpha_align_insns ();
268  return 0;
269}
270
271namespace {
272
273const pass_data pass_data_align_insns =
274{
275  RTL_PASS,
276  "align_insns",			/* name */
277  OPTGROUP_NONE,			/* optinfo_flags */
278  TV_NONE,				/* tv_id */
279  0,					/* properties_required */
280  0,					/* properties_provided */
281  0,					/* properties_destroyed */
282  0,					/* todo_flags_start */
283  TODO_df_finish,			/* todo_flags_finish */
284};
285
286class pass_align_insns : public rtl_opt_pass
287{
288public:
289  pass_align_insns(gcc::context *ctxt)
290    : rtl_opt_pass(pass_data_align_insns, ctxt)
291  {}
292
293  /* opt_pass methods: */
294  virtual bool gate (function *)
295    {
296      /* Due to the number of extra trapb insns, don't bother fixing up
297	 alignment when trap precision is instruction.  Moreover, we can
298	 only do our job when sched2 is run.  */
299      return ((alpha_tune == PROCESSOR_EV4
300	       || alpha_tune == PROCESSOR_EV5)
301	      && optimize && !optimize_size
302	      && alpha_tp != ALPHA_TP_INSN
303	      && flag_schedule_insns_after_reload);
304    }
305
306  virtual unsigned int execute (function *)
307    {
308      return rest_of_align_insns ();
309    }
310
311}; // class pass_align_insns
312
313} // anon namespace
314
315rtl_opt_pass *
316make_pass_align_insns (gcc::context *ctxt)
317{
318  return new pass_align_insns (ctxt);
319}
320
321#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
322/* Implement TARGET_MANGLE_TYPE.  */
323
324static const char *
325alpha_mangle_type (const_tree type)
326{
327  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
328      && TARGET_LONG_DOUBLE_128)
329    return "g";
330
331  /* For all other types, use normal C++ mangling.  */
332  return NULL;
333}
334#endif
335
336/* Parse target option strings.  */
337
338static void
339alpha_option_override (void)
340{
341  static const struct cpu_table {
342    const char *const name;
343    const enum processor_type processor;
344    const int flags;
345    const unsigned short line_size; /* in bytes */
346    const unsigned short l1_size;   /* in kb.  */
347    const unsigned short l2_size;   /* in kb.  */
348  } cpu_table[] = {
349    /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
350       EV4/EV45 had 128k to 16M 32-byte direct Bcache.  LCA45
351       had 64k to 8M 8-byte direct Bcache.  */
352    { "ev4",	PROCESSOR_EV4, 0, 32, 8, 8*1024 },
353    { "21064",	PROCESSOR_EV4, 0, 32, 8, 8*1024 },
354    { "ev45",	PROCESSOR_EV4, 0, 32, 16, 16*1024 },
355
356    /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
357       and 1M to 16M 64 byte L3 (not modeled).
358       PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
359       PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache.  */
360    { "ev5",	PROCESSOR_EV5, 0, 32, 8, 96 },
361    { "21164",	PROCESSOR_EV5, 0, 32, 8, 96 },
362    { "ev56",	PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
363    { "21164a",	PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
364    { "pca56",	PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
365    { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
366    { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
367
368    /* EV6 had 64k 64 byte L1, 1M to 16M Bcache.  */
369    { "ev6",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
370    { "21264",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
371    { "ev67",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
372      64, 64, 16*1024 },
373    { "21264a",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
374      64, 64, 16*1024 }
375  };
376
377  int const ct_size = ARRAY_SIZE (cpu_table);
378  int line_size = 0, l1_size = 0, l2_size = 0;
379  int i;
380
381#ifdef SUBTARGET_OVERRIDE_OPTIONS
382  SUBTARGET_OVERRIDE_OPTIONS;
383#endif
384
385  /* Default to full IEEE compliance mode for Go language.  */
386  if (strcmp (lang_hooks.name, "GNU Go") == 0
387      && !(target_flags_explicit & MASK_IEEE))
388    target_flags |= MASK_IEEE;
389
390  alpha_fprm = ALPHA_FPRM_NORM;
391  alpha_tp = ALPHA_TP_PROG;
392  alpha_fptm = ALPHA_FPTM_N;
393
394  if (TARGET_IEEE)
395    {
396      alpha_tp = ALPHA_TP_INSN;
397      alpha_fptm = ALPHA_FPTM_SU;
398    }
399  if (TARGET_IEEE_WITH_INEXACT)
400    {
401      alpha_tp = ALPHA_TP_INSN;
402      alpha_fptm = ALPHA_FPTM_SUI;
403    }
404
405  if (alpha_tp_string)
406    {
407      if (! strcmp (alpha_tp_string, "p"))
408	alpha_tp = ALPHA_TP_PROG;
409      else if (! strcmp (alpha_tp_string, "f"))
410	alpha_tp = ALPHA_TP_FUNC;
411      else if (! strcmp (alpha_tp_string, "i"))
412	alpha_tp = ALPHA_TP_INSN;
413      else
414	error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
415    }
416
417  if (alpha_fprm_string)
418    {
419      if (! strcmp (alpha_fprm_string, "n"))
420	alpha_fprm = ALPHA_FPRM_NORM;
421      else if (! strcmp (alpha_fprm_string, "m"))
422	alpha_fprm = ALPHA_FPRM_MINF;
423      else if (! strcmp (alpha_fprm_string, "c"))
424	alpha_fprm = ALPHA_FPRM_CHOP;
425      else if (! strcmp (alpha_fprm_string,"d"))
426	alpha_fprm = ALPHA_FPRM_DYN;
427      else
428	error ("bad value %qs for -mfp-rounding-mode switch",
429	       alpha_fprm_string);
430    }
431
432  if (alpha_fptm_string)
433    {
434      if (strcmp (alpha_fptm_string, "n") == 0)
435	alpha_fptm = ALPHA_FPTM_N;
436      else if (strcmp (alpha_fptm_string, "u") == 0)
437	alpha_fptm = ALPHA_FPTM_U;
438      else if (strcmp (alpha_fptm_string, "su") == 0)
439	alpha_fptm = ALPHA_FPTM_SU;
440      else if (strcmp (alpha_fptm_string, "sui") == 0)
441	alpha_fptm = ALPHA_FPTM_SUI;
442      else
443	error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
444    }
445
446  if (alpha_cpu_string)
447    {
448      for (i = 0; i < ct_size; i++)
449	if (! strcmp (alpha_cpu_string, cpu_table [i].name))
450	  {
451	    alpha_tune = alpha_cpu = cpu_table[i].processor;
452	    line_size = cpu_table[i].line_size;
453	    l1_size = cpu_table[i].l1_size;
454	    l2_size = cpu_table[i].l2_size;
455	    target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
456	    target_flags |= cpu_table[i].flags;
457	    break;
458	  }
459      if (i == ct_size)
460	error ("bad value %qs for -mcpu switch", alpha_cpu_string);
461    }
462
463  if (alpha_tune_string)
464    {
465      for (i = 0; i < ct_size; i++)
466	if (! strcmp (alpha_tune_string, cpu_table [i].name))
467	  {
468	    alpha_tune = cpu_table[i].processor;
469	    line_size = cpu_table[i].line_size;
470	    l1_size = cpu_table[i].l1_size;
471	    l2_size = cpu_table[i].l2_size;
472	    break;
473	  }
474      if (i == ct_size)
475	error ("bad value %qs for -mtune switch", alpha_tune_string);
476    }
477
478  if (line_size)
479    maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
480			   global_options.x_param_values,
481			   global_options_set.x_param_values);
482  if (l1_size)
483    maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
484			   global_options.x_param_values,
485			   global_options_set.x_param_values);
486  if (l2_size)
487    maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
488			   global_options.x_param_values,
489			   global_options_set.x_param_values);
490
491  /* Do some sanity checks on the above options.  */
492
493  if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
494      && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
495    {
496      warning (0, "fp software completion requires -mtrap-precision=i");
497      alpha_tp = ALPHA_TP_INSN;
498    }
499
500  if (alpha_cpu == PROCESSOR_EV6)
501    {
502      /* Except for EV6 pass 1 (not released), we always have precise
503	 arithmetic traps.  Which means we can do software completion
504	 without minding trap shadows.  */
505      alpha_tp = ALPHA_TP_PROG;
506    }
507
508  if (TARGET_FLOAT_VAX)
509    {
510      if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
511	{
512	  warning (0, "rounding mode not supported for VAX floats");
513	  alpha_fprm = ALPHA_FPRM_NORM;
514	}
515      if (alpha_fptm == ALPHA_FPTM_SUI)
516	{
517	  warning (0, "trap mode not supported for VAX floats");
518	  alpha_fptm = ALPHA_FPTM_SU;
519	}
520      if (target_flags_explicit & MASK_LONG_DOUBLE_128)
521	warning (0, "128-bit long double not supported for VAX floats");
522      target_flags &= ~MASK_LONG_DOUBLE_128;
523    }
524
525  {
526    char *end;
527    int lat;
528
529    if (!alpha_mlat_string)
530      alpha_mlat_string = "L1";
531
532    if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
533	&& (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
534      ;
535    else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
536	     && ISDIGIT ((unsigned char)alpha_mlat_string[1])
537	     && alpha_mlat_string[2] == '\0')
538      {
539	static int const cache_latency[][4] =
540	{
541	  { 3, 30, -1 },	/* ev4 -- Bcache is a guess */
542	  { 2, 12, 38 },	/* ev5 -- Bcache from PC164 LMbench numbers */
543	  { 3, 12, 30 },	/* ev6 -- Bcache from DS20 LMbench.  */
544	};
545
546	lat = alpha_mlat_string[1] - '0';
547	if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
548	  {
549	    warning (0, "L%d cache latency unknown for %s",
550		     lat, alpha_cpu_name[alpha_tune]);
551	    lat = 3;
552	  }
553	else
554	  lat = cache_latency[alpha_tune][lat-1];
555      }
556    else if (! strcmp (alpha_mlat_string, "main"))
557      {
558	/* Most current memories have about 370ns latency.  This is
559	   a reasonable guess for a fast cpu.  */
560	lat = 150;
561      }
562    else
563      {
564	warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
565	lat = 3;
566      }
567
568    alpha_memory_latency = lat;
569  }
570
571  /* Default the definition of "small data" to 8 bytes.  */
572  if (!global_options_set.x_g_switch_value)
573    g_switch_value = 8;
574
575  /* Infer TARGET_SMALL_DATA from -fpic/-fPIC.  */
576  if (flag_pic == 1)
577    target_flags |= MASK_SMALL_DATA;
578  else if (flag_pic == 2)
579    target_flags &= ~MASK_SMALL_DATA;
580
581  alpha_override_options_after_change ();
582
583  /* Register variables and functions with the garbage collector.  */
584
585  /* Set up function hooks.  */
586  init_machine_status = alpha_init_machine_status;
587
588  /* Tell the compiler when we're using VAX floating point.  */
589  if (TARGET_FLOAT_VAX)
590    {
591      REAL_MODE_FORMAT (SFmode) = &vax_f_format;
592      REAL_MODE_FORMAT (DFmode) = &vax_g_format;
593      REAL_MODE_FORMAT (TFmode) = NULL;
594    }
595
596#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
597  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
598    target_flags |= MASK_LONG_DOUBLE_128;
599#endif
600
601}
602
603/* Implement targetm.override_options_after_change.  */
604
605static void
606alpha_override_options_after_change (void)
607{
608  /* Align labels and loops for optimal branching.  */
609  /* ??? Kludge these by not doing anything if we don't optimize.  */
610  if (optimize > 0)
611    {
612      if (align_loops <= 0)
613	align_loops = 16;
614      if (align_jumps <= 0)
615	align_jumps = 16;
616    }
617  if (align_functions <= 0)
618    align_functions = 16;
619}
620
621/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
622
623int
624zap_mask (HOST_WIDE_INT value)
625{
626  int i;
627
628  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
629       i++, value >>= 8)
630    if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
631      return 0;
632
633  return 1;
634}
635
636/* Return true if OP is valid for a particular TLS relocation.
637   We are already guaranteed that OP is a CONST.  */
638
639int
640tls_symbolic_operand_1 (rtx op, int size, int unspec)
641{
642  op = XEXP (op, 0);
643
644  if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
645    return 0;
646  op = XVECEXP (op, 0, 0);
647
648  if (GET_CODE (op) != SYMBOL_REF)
649    return 0;
650
651  switch (SYMBOL_REF_TLS_MODEL (op))
652    {
653    case TLS_MODEL_LOCAL_DYNAMIC:
654      return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
655    case TLS_MODEL_INITIAL_EXEC:
656      return unspec == UNSPEC_TPREL && size == 64;
657    case TLS_MODEL_LOCAL_EXEC:
658      return unspec == UNSPEC_TPREL && size == alpha_tls_size;
659    default:
660      gcc_unreachable ();
661    }
662}
663
664/* Used by aligned_memory_operand and unaligned_memory_operand to
665   resolve what reload is going to do with OP if it's a register.  */
666
667rtx
668resolve_reload_operand (rtx op)
669{
670  if (reload_in_progress)
671    {
672      rtx tmp = op;
673      if (SUBREG_P (tmp))
674	tmp = SUBREG_REG (tmp);
675      if (REG_P (tmp)
676	  && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
677	{
678	  op = reg_equiv_memory_loc (REGNO (tmp));
679	  if (op == 0)
680	    return 0;
681	}
682    }
683  return op;
684}
685
686/* The scalar modes supported differs from the default check-what-c-supports
687   version in that sometimes TFmode is available even when long double
688   indicates only DFmode.  */
689
690static bool
691alpha_scalar_mode_supported_p (machine_mode mode)
692{
693  switch (mode)
694    {
695    case QImode:
696    case HImode:
697    case SImode:
698    case DImode:
699    case TImode: /* via optabs.c */
700      return true;
701
702    case SFmode:
703    case DFmode:
704      return true;
705
706    case TFmode:
707      return TARGET_HAS_XFLOATING_LIBS;
708
709    default:
710      return false;
711    }
712}
713
714/* Alpha implements a couple of integer vector mode operations when
715   TARGET_MAX is enabled.  We do not check TARGET_MAX here, however,
716   which allows the vectorizer to operate on e.g. move instructions,
717   or when expand_vector_operations can do something useful.  */
718
719static bool
720alpha_vector_mode_supported_p (machine_mode mode)
721{
722  return mode == V8QImode || mode == V4HImode || mode == V2SImode;
723}
724
725/* Return 1 if this function can directly return via $26.  */
726
727int
728direct_return (void)
729{
730  return (TARGET_ABI_OSF
731	  && reload_completed
732	  && alpha_sa_size () == 0
733	  && get_frame_size () == 0
734	  && crtl->outgoing_args_size == 0
735	  && crtl->args.pretend_args_size == 0);
736}
737
738/* Return the TLS model to use for SYMBOL.  */
739
740static enum tls_model
741tls_symbolic_operand_type (rtx symbol)
742{
743  enum tls_model model;
744
745  if (GET_CODE (symbol) != SYMBOL_REF)
746    return TLS_MODEL_NONE;
747  model = SYMBOL_REF_TLS_MODEL (symbol);
748
749  /* Local-exec with a 64-bit size is the same code as initial-exec.  */
750  if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
751    model = TLS_MODEL_INITIAL_EXEC;
752
753  return model;
754}
755
756/* Return true if the function DECL will share the same GP as any
757   function in the current unit of translation.  */
758
759static bool
760decl_has_samegp (const_tree decl)
761{
762  /* Functions that are not local can be overridden, and thus may
763     not share the same gp.  */
764  if (!(*targetm.binds_local_p) (decl))
765    return false;
766
767  /* If -msmall-data is in effect, assume that there is only one GP
768     for the module, and so any local symbol has this property.  We
769     need explicit relocations to be able to enforce this for symbols
770     not defined in this unit of translation, however.  */
771  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
772    return true;
773
774  /* Functions that are not external are defined in this UoT.  */
775  /* ??? Irritatingly, static functions not yet emitted are still
776     marked "external".  Apply this to non-static functions only.  */
777  return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
778}
779
780/* Return true if EXP should be placed in the small data section.  */
781
782static bool
783alpha_in_small_data_p (const_tree exp)
784{
785  /* We want to merge strings, so we never consider them small data.  */
786  if (TREE_CODE (exp) == STRING_CST)
787    return false;
788
789  /* Functions are never in the small data area.  Duh.  */
790  if (TREE_CODE (exp) == FUNCTION_DECL)
791    return false;
792
793  /* COMMON symbols are never small data.  */
794  if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp))
795    return false;
796
797  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
798    {
799      const char *section = DECL_SECTION_NAME (exp);
800      if (strcmp (section, ".sdata") == 0
801	  || strcmp (section, ".sbss") == 0)
802	return true;
803    }
804  else
805    {
806      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
807
808      /* If this is an incomplete type with size 0, then we can't put it
809	 in sdata because it might be too big when completed.  */
810      if (size > 0 && size <= g_switch_value)
811	return true;
812    }
813
814  return false;
815}
816
817#if TARGET_ABI_OPEN_VMS
818static bool
819vms_valid_pointer_mode (machine_mode mode)
820{
821  return (mode == SImode || mode == DImode);
822}
823
824static bool
825alpha_linkage_symbol_p (const char *symname)
826{
827  int symlen = strlen (symname);
828
829  if (symlen > 4)
830    return strcmp (&symname [symlen - 4], "..lk") == 0;
831
832  return false;
833}
834
835#define LINKAGE_SYMBOL_REF_P(X) \
836  ((GET_CODE (X) == SYMBOL_REF   \
837    && alpha_linkage_symbol_p (XSTR (X, 0))) \
838   || (GET_CODE (X) == CONST                 \
839       && GET_CODE (XEXP (X, 0)) == PLUS     \
840       && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
841       && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
842#endif
843
844/* legitimate_address_p recognizes an RTL expression that is a valid
845   memory address for an instruction.  The MODE argument is the
846   machine mode for the MEM expression that wants to use this address.
847
848   For Alpha, we have either a constant address or the sum of a
849   register and a constant address, or just a register.  For DImode,
850   any of those forms can be surrounded with an AND that clear the
851   low-order three bits; this is an "unaligned" access.  */
852
853static bool
854alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict)
855{
856  /* If this is an ldq_u type address, discard the outer AND.  */
857  if (mode == DImode
858      && GET_CODE (x) == AND
859      && CONST_INT_P (XEXP (x, 1))
860      && INTVAL (XEXP (x, 1)) == -8)
861    x = XEXP (x, 0);
862
863  /* Discard non-paradoxical subregs.  */
864  if (SUBREG_P (x)
865      && (GET_MODE_SIZE (GET_MODE (x))
866	  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
867    x = SUBREG_REG (x);
868
869  /* Unadorned general registers are valid.  */
870  if (REG_P (x)
871      && (strict
872	  ? STRICT_REG_OK_FOR_BASE_P (x)
873	  : NONSTRICT_REG_OK_FOR_BASE_P (x)))
874    return true;
875
876  /* Constant addresses (i.e. +/- 32k) are valid.  */
877  if (CONSTANT_ADDRESS_P (x))
878    return true;
879
880#if TARGET_ABI_OPEN_VMS
881  if (LINKAGE_SYMBOL_REF_P (x))
882    return true;
883#endif
884
885  /* Register plus a small constant offset is valid.  */
886  if (GET_CODE (x) == PLUS)
887    {
888      rtx ofs = XEXP (x, 1);
889      x = XEXP (x, 0);
890
891      /* Discard non-paradoxical subregs.  */
892      if (SUBREG_P (x)
893          && (GET_MODE_SIZE (GET_MODE (x))
894	      < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
895	x = SUBREG_REG (x);
896
897      if (REG_P (x))
898	{
899	  if (! strict
900	      && NONSTRICT_REG_OK_FP_BASE_P (x)
901	      && CONST_INT_P (ofs))
902	    return true;
903	  if ((strict
904	       ? STRICT_REG_OK_FOR_BASE_P (x)
905	       : NONSTRICT_REG_OK_FOR_BASE_P (x))
906	      && CONSTANT_ADDRESS_P (ofs))
907	    return true;
908	}
909    }
910
911  /* If we're managing explicit relocations, LO_SUM is valid, as are small
912     data symbols.  Avoid explicit relocations of modes larger than word
913     mode since i.e. $LC0+8($1) can fold around +/- 32k offset.  */
914  else if (TARGET_EXPLICIT_RELOCS
915	   && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
916    {
917      if (small_symbolic_operand (x, Pmode))
918	return true;
919
920      if (GET_CODE (x) == LO_SUM)
921	{
922	  rtx ofs = XEXP (x, 1);
923	  x = XEXP (x, 0);
924
925	  /* Discard non-paradoxical subregs.  */
926	  if (SUBREG_P (x)
927	      && (GET_MODE_SIZE (GET_MODE (x))
928		  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
929	    x = SUBREG_REG (x);
930
931	  /* Must have a valid base register.  */
932	  if (! (REG_P (x)
933		 && (strict
934		     ? STRICT_REG_OK_FOR_BASE_P (x)
935		     : NONSTRICT_REG_OK_FOR_BASE_P (x))))
936	    return false;
937
938	  /* The symbol must be local.  */
939	  if (local_symbolic_operand (ofs, Pmode)
940	      || dtp32_symbolic_operand (ofs, Pmode)
941	      || tp32_symbolic_operand (ofs, Pmode))
942	    return true;
943	}
944    }
945
946  return false;
947}
948
949/* Build the SYMBOL_REF for __tls_get_addr.  */
950
951static GTY(()) rtx tls_get_addr_libfunc;
952
953static rtx
954get_tls_get_addr (void)
955{
956  if (!tls_get_addr_libfunc)
957    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
958  return tls_get_addr_libfunc;
959}
960
961/* Try machine-dependent ways of modifying an illegitimate address
962   to be legitimate.  If we find one, return the new, valid address.  */
963
964static rtx
965alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
966{
967  HOST_WIDE_INT addend;
968
969  /* If the address is (plus reg const_int) and the CONST_INT is not a
970     valid offset, compute the high part of the constant and add it to
971     the register.  Then our address is (plus temp low-part-const).  */
972  if (GET_CODE (x) == PLUS
973      && REG_P (XEXP (x, 0))
974      && CONST_INT_P (XEXP (x, 1))
975      && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
976    {
977      addend = INTVAL (XEXP (x, 1));
978      x = XEXP (x, 0);
979      goto split_addend;
980    }
981
982  /* If the address is (const (plus FOO const_int)), find the low-order
983     part of the CONST_INT.  Then load FOO plus any high-order part of the
984     CONST_INT into a register.  Our address is (plus reg low-part-const).
985     This is done to reduce the number of GOT entries.  */
986  if (can_create_pseudo_p ()
987      && GET_CODE (x) == CONST
988      && GET_CODE (XEXP (x, 0)) == PLUS
989      && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
990    {
991      addend = INTVAL (XEXP (XEXP (x, 0), 1));
992      x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
993      goto split_addend;
994    }
995
996  /* If we have a (plus reg const), emit the load as in (2), then add
997     the two registers, and finally generate (plus reg low-part-const) as
998     our address.  */
999  if (can_create_pseudo_p ()
1000      && GET_CODE (x) == PLUS
1001      && REG_P (XEXP (x, 0))
1002      && GET_CODE (XEXP (x, 1)) == CONST
1003      && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
1004      && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
1005    {
1006      addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1007      x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1008			       XEXP (XEXP (XEXP (x, 1), 0), 0),
1009			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
1010      goto split_addend;
1011    }
1012
1013  /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1014     Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1015     around +/- 32k offset.  */
1016  if (TARGET_EXPLICIT_RELOCS
1017      && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1018      && symbolic_operand (x, Pmode))
1019    {
1020      rtx r0, r16, eqv, tga, tp, dest, seq;
1021      rtx_insn *insn;
1022
1023      switch (tls_symbolic_operand_type (x))
1024	{
1025	case TLS_MODEL_NONE:
1026	  break;
1027
1028	case TLS_MODEL_GLOBAL_DYNAMIC:
1029	  {
1030	    start_sequence ();
1031
1032	    r0 = gen_rtx_REG (Pmode, 0);
1033	    r16 = gen_rtx_REG (Pmode, 16);
1034	    tga = get_tls_get_addr ();
1035	    dest = gen_reg_rtx (Pmode);
1036	    seq = GEN_INT (alpha_next_sequence_number++);
1037
1038	    emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1039	    rtx val = gen_call_value_osf_tlsgd (r0, tga, seq);
1040	    insn = emit_call_insn (val);
1041	    RTL_CONST_CALL_P (insn) = 1;
1042	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1043
1044	    insn = get_insns ();
1045	    end_sequence ();
1046
1047	    emit_libcall_block (insn, dest, r0, x);
1048	    return dest;
1049	  }
1050
1051	case TLS_MODEL_LOCAL_DYNAMIC:
1052	  {
1053	    start_sequence ();
1054
1055	    r0 = gen_rtx_REG (Pmode, 0);
1056	    r16 = gen_rtx_REG (Pmode, 16);
1057	    tga = get_tls_get_addr ();
1058	    scratch = gen_reg_rtx (Pmode);
1059	    seq = GEN_INT (alpha_next_sequence_number++);
1060
1061	    emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1062	    rtx val = gen_call_value_osf_tlsldm (r0, tga, seq);
1063	    insn = emit_call_insn (val);
1064	    RTL_CONST_CALL_P (insn) = 1;
1065	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1066
1067	    insn = get_insns ();
1068	    end_sequence ();
1069
1070	    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1071				  UNSPEC_TLSLDM_CALL);
1072	    emit_libcall_block (insn, scratch, r0, eqv);
1073
1074	    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1075	    eqv = gen_rtx_CONST (Pmode, eqv);
1076
1077	    if (alpha_tls_size == 64)
1078	      {
1079		dest = gen_reg_rtx (Pmode);
1080		emit_insn (gen_rtx_SET (dest, eqv));
1081		emit_insn (gen_adddi3 (dest, dest, scratch));
1082		return dest;
1083	      }
1084	    if (alpha_tls_size == 32)
1085	      {
1086		rtx temp = gen_rtx_HIGH (Pmode, eqv);
1087		temp = gen_rtx_PLUS (Pmode, scratch, temp);
1088		scratch = gen_reg_rtx (Pmode);
1089		emit_insn (gen_rtx_SET (scratch, temp));
1090	      }
1091	    return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1092	  }
1093
1094	case TLS_MODEL_INITIAL_EXEC:
1095	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1096	  eqv = gen_rtx_CONST (Pmode, eqv);
1097	  tp = gen_reg_rtx (Pmode);
1098	  scratch = gen_reg_rtx (Pmode);
1099	  dest = gen_reg_rtx (Pmode);
1100
1101	  emit_insn (gen_get_thread_pointerdi (tp));
1102	  emit_insn (gen_rtx_SET (scratch, eqv));
1103	  emit_insn (gen_adddi3 (dest, tp, scratch));
1104	  return dest;
1105
1106	case TLS_MODEL_LOCAL_EXEC:
1107	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1108	  eqv = gen_rtx_CONST (Pmode, eqv);
1109	  tp = gen_reg_rtx (Pmode);
1110
1111	  emit_insn (gen_get_thread_pointerdi (tp));
1112	  if (alpha_tls_size == 32)
1113	    {
1114	      rtx temp = gen_rtx_HIGH (Pmode, eqv);
1115	      temp = gen_rtx_PLUS (Pmode, tp, temp);
1116	      tp = gen_reg_rtx (Pmode);
1117	      emit_insn (gen_rtx_SET (tp, temp));
1118	    }
1119	  return gen_rtx_LO_SUM (Pmode, tp, eqv);
1120
1121	default:
1122	  gcc_unreachable ();
1123	}
1124
1125      if (local_symbolic_operand (x, Pmode))
1126	{
1127	  if (small_symbolic_operand (x, Pmode))
1128	    return x;
1129	  else
1130	    {
1131	      if (can_create_pseudo_p ())
1132	        scratch = gen_reg_rtx (Pmode);
1133	      emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
1134	      return gen_rtx_LO_SUM (Pmode, scratch, x);
1135	    }
1136	}
1137    }
1138
1139  return NULL;
1140
1141 split_addend:
1142  {
1143    HOST_WIDE_INT low, high;
1144
1145    low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1146    addend -= low;
1147    high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1148    addend -= high;
1149
1150    if (addend)
1151      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1152			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
1153			       1, OPTAB_LIB_WIDEN);
1154    if (high)
1155      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1156			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
1157			       1, OPTAB_LIB_WIDEN);
1158
1159    return plus_constant (Pmode, x, low);
1160  }
1161}
1162
1163
1164/* Try machine-dependent ways of modifying an illegitimate address
1165   to be legitimate.  Return X or the new, valid address.  */
1166
1167static rtx
1168alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1169			  machine_mode mode)
1170{
1171  rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1172  return new_x ? new_x : x;
1173}
1174
1175/* Return true if ADDR has an effect that depends on the machine mode it
1176   is used for.  On the Alpha this is true only for the unaligned modes.
1177   We can simplify the test since we know that the address must be valid.  */
1178
1179static bool
1180alpha_mode_dependent_address_p (const_rtx addr,
1181				addr_space_t as ATTRIBUTE_UNUSED)
1182{
1183  return GET_CODE (addr) == AND;
1184}
1185
1186/* Primarily this is required for TLS symbols, but given that our move
1187   patterns *ought* to be able to handle any symbol at any time, we
1188   should never be spilling symbolic operands to the constant pool, ever.  */
1189
1190static bool
1191alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1192{
1193  enum rtx_code code = GET_CODE (x);
1194  return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1195}
1196
1197/* We do not allow indirect calls to be optimized into sibling calls, nor
1198   can we allow a call to a function with a different GP to be optimized
1199   into a sibcall.  */
1200
1201static bool
1202alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1203{
1204  /* Can't do indirect tail calls, since we don't know if the target
1205     uses the same GP.  */
1206  if (!decl)
1207    return false;
1208
1209  /* Otherwise, we can make a tail call if the target function shares
1210     the same GP.  */
1211  return decl_has_samegp (decl);
1212}
1213
1214bool
1215some_small_symbolic_operand_int (rtx x)
1216{
1217  subrtx_var_iterator::array_type array;
1218  FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
1219    {
1220      rtx x = *iter;
1221      /* Don't re-split.  */
1222      if (GET_CODE (x) == LO_SUM)
1223	iter.skip_subrtxes ();
1224      else if (small_symbolic_operand (x, Pmode))
1225	return true;
1226    }
1227  return false;
1228}
1229
1230rtx
1231split_small_symbolic_operand (rtx x)
1232{
1233  x = copy_insn (x);
1234  subrtx_ptr_iterator::array_type array;
1235  FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
1236    {
1237      rtx *ptr = *iter;
1238      rtx x = *ptr;
1239      /* Don't re-split.  */
1240      if (GET_CODE (x) == LO_SUM)
1241	iter.skip_subrtxes ();
1242      else if (small_symbolic_operand (x, Pmode))
1243	{
1244	  *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1245	  iter.skip_subrtxes ();
1246	}
1247    }
1248  return x;
1249}
1250
1251/* Indicate that INSN cannot be duplicated.  This is true for any insn
1252   that we've marked with gpdisp relocs, since those have to stay in
1253   1-1 correspondence with one another.
1254
1255   Technically we could copy them if we could set up a mapping from one
1256   sequence number to another, across the set of insns to be duplicated.
1257   This seems overly complicated and error-prone since interblock motion
1258   from sched-ebb could move one of the pair of insns to a different block.
1259
1260   Also cannot allow jsr insns to be duplicated.  If they throw exceptions,
1261   then they'll be in a different block from their ldgp.  Which could lead
1262   the bb reorder code to think that it would be ok to copy just the block
1263   containing the call and branch to the block containing the ldgp.  */
1264
1265static bool
1266alpha_cannot_copy_insn_p (rtx_insn *insn)
1267{
1268  if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1269    return false;
1270  if (recog_memoized (insn) >= 0)
1271    return get_attr_cannot_copy (insn);
1272  else
1273    return false;
1274}
1275
1276
1277/* Try a machine-dependent way of reloading an illegitimate address
1278   operand.  If we find one, push the reload and return the new rtx.  */
1279
1280rtx
1281alpha_legitimize_reload_address (rtx x,
1282				 machine_mode mode ATTRIBUTE_UNUSED,
1283				 int opnum, int type,
1284				 int ind_levels ATTRIBUTE_UNUSED)
1285{
1286  /* We must recognize output that we have already generated ourselves.  */
1287  if (GET_CODE (x) == PLUS
1288      && GET_CODE (XEXP (x, 0)) == PLUS
1289      && REG_P (XEXP (XEXP (x, 0), 0))
1290      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1291      && CONST_INT_P (XEXP (x, 1)))
1292    {
1293      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1294		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1295		   opnum, (enum reload_type) type);
1296      return x;
1297    }
1298
1299  /* We wish to handle large displacements off a base register by
1300     splitting the addend across an ldah and the mem insn.  This
1301     cuts number of extra insns needed from 3 to 1.  */
1302  if (GET_CODE (x) == PLUS
1303      && REG_P (XEXP (x, 0))
1304      && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1305      && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1306      && CONST_INT_P (XEXP (x, 1)))
1307    {
1308      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1309      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1310      HOST_WIDE_INT high
1311	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1312
1313      /* Check for 32-bit overflow.  */
1314      if (high + low != val)
1315	return NULL_RTX;
1316
1317      /* Reload the high part into a base reg; leave the low part
1318	 in the mem directly.  */
1319      x = gen_rtx_PLUS (GET_MODE (x),
1320			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1321				      GEN_INT (high)),
1322			GEN_INT (low));
1323
1324      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1325		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1326		   opnum, (enum reload_type) type);
1327      return x;
1328    }
1329
1330  return NULL_RTX;
1331}
1332
1333/* Return the cost of moving between registers of various classes.  Moving
1334   between FLOAT_REGS and anything else except float regs is expensive.
1335   In fact, we make it quite expensive because we really don't want to
1336   do these moves unless it is clearly worth it.  Optimizations may
1337   reduce the impact of not being able to allocate a pseudo to a
1338   hard register.  */
1339
1340static int
1341alpha_register_move_cost (machine_mode /*mode*/,
1342			  reg_class_t from, reg_class_t to)
1343{
1344  if ((from == FLOAT_REGS) == (to == FLOAT_REGS))
1345    return 2;
1346
1347  if (TARGET_FIX)
1348    return (from == FLOAT_REGS) ? 6 : 8;
1349
1350  return 4 + 2 * alpha_memory_latency;
1351}
1352
1353/* Return the cost of moving data of MODE from a register to
1354   or from memory.  On the Alpha, bump this up a bit.  */
1355
1356static int
1357alpha_memory_move_cost (machine_mode /*mode*/, reg_class_t /*regclass*/,
1358			bool /*in*/)
1359{
1360  return 2 * alpha_memory_latency;
1361}
1362
1363/* Compute a (partial) cost for rtx X.  Return true if the complete
1364   cost has been computed, and false if subexpressions should be
1365   scanned.  In either case, *TOTAL contains the cost result.  */
1366
1367static bool
1368alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
1369		 bool speed)
1370{
1371  int code = GET_CODE (x);
1372  bool float_mode_p = FLOAT_MODE_P (mode);
1373  const struct alpha_rtx_cost_data *cost_data;
1374
1375  if (!speed)
1376    cost_data = &alpha_rtx_cost_size;
1377  else
1378    cost_data = &alpha_rtx_cost_data[alpha_tune];
1379
1380  switch (code)
1381    {
1382    case CONST_INT:
1383      /* If this is an 8-bit constant, return zero since it can be used
1384	 nearly anywhere with no cost.  If it is a valid operand for an
1385	 ADD or AND, likewise return 0 if we know it will be used in that
1386	 context.  Otherwise, return 2 since it might be used there later.
1387	 All other constants take at least two insns.  */
1388      if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1389	{
1390	  *total = 0;
1391	  return true;
1392	}
1393      /* FALLTHRU */
1394
1395    case CONST_DOUBLE:
1396    case CONST_WIDE_INT:
1397      if (x == CONST0_RTX (mode))
1398	*total = 0;
1399      else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1400	       || (outer_code == AND && and_operand (x, VOIDmode)))
1401	*total = 0;
1402      else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1403	*total = 2;
1404      else
1405	*total = COSTS_N_INSNS (2);
1406      return true;
1407
1408    case CONST:
1409    case SYMBOL_REF:
1410    case LABEL_REF:
1411      if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1412	*total = COSTS_N_INSNS (outer_code != MEM);
1413      else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1414	*total = COSTS_N_INSNS (1 + (outer_code != MEM));
1415      else if (tls_symbolic_operand_type (x))
1416	/* Estimate of cost for call_pal rduniq.  */
1417	/* ??? How many insns do we emit here?  More than one...  */
1418	*total = COSTS_N_INSNS (15);
1419      else
1420	/* Otherwise we do a load from the GOT.  */
1421	*total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1422      return true;
1423
1424    case HIGH:
1425      /* This is effectively an add_operand.  */
1426      *total = 2;
1427      return true;
1428
1429    case PLUS:
1430    case MINUS:
1431      if (float_mode_p)
1432	*total = cost_data->fp_add;
1433      else if (GET_CODE (XEXP (x, 0)) == ASHIFT
1434	       && const23_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1435	{
1436	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
1437			      (enum rtx_code) outer_code, opno, speed)
1438		    + rtx_cost (XEXP (x, 1), mode,
1439				(enum rtx_code) outer_code, opno, speed)
1440		    + COSTS_N_INSNS (1));
1441	  return true;
1442	}
1443      return false;
1444
1445    case MULT:
1446      if (float_mode_p)
1447	*total = cost_data->fp_mult;
1448      else if (mode == DImode)
1449	*total = cost_data->int_mult_di;
1450      else
1451	*total = cost_data->int_mult_si;
1452      return false;
1453
1454    case ASHIFT:
1455      if (CONST_INT_P (XEXP (x, 1))
1456	  && INTVAL (XEXP (x, 1)) <= 3)
1457	{
1458	  *total = COSTS_N_INSNS (1);
1459	  return false;
1460	}
1461      /* FALLTHRU */
1462
1463    case ASHIFTRT:
1464    case LSHIFTRT:
1465      *total = cost_data->int_shift;
1466      return false;
1467
1468    case IF_THEN_ELSE:
1469      if (float_mode_p)
1470        *total = cost_data->fp_add;
1471      else
1472        *total = cost_data->int_cmov;
1473      return false;
1474
1475    case DIV:
1476    case UDIV:
1477    case MOD:
1478    case UMOD:
1479      if (!float_mode_p)
1480	*total = cost_data->int_div;
1481      else if (mode == SFmode)
1482        *total = cost_data->fp_div_sf;
1483      else
1484        *total = cost_data->fp_div_df;
1485      return false;
1486
1487    case MEM:
1488      *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1489      return true;
1490
1491    case NEG:
1492      if (! float_mode_p)
1493	{
1494	  *total = COSTS_N_INSNS (1);
1495	  return false;
1496	}
1497      /* FALLTHRU */
1498
1499    case ABS:
1500      if (! float_mode_p)
1501	{
1502	  *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1503	  return false;
1504	}
1505      /* FALLTHRU */
1506
1507    case FLOAT:
1508    case UNSIGNED_FLOAT:
1509    case FIX:
1510    case UNSIGNED_FIX:
1511    case FLOAT_TRUNCATE:
1512      *total = cost_data->fp_add;
1513      return false;
1514
1515    case FLOAT_EXTEND:
1516      if (MEM_P (XEXP (x, 0)))
1517	*total = 0;
1518      else
1519	*total = cost_data->fp_add;
1520      return false;
1521
1522    default:
1523      return false;
1524    }
1525}
1526
1527/* REF is an alignable memory location.  Place an aligned SImode
1528   reference into *PALIGNED_MEM and the number of bits to shift into
1529   *PBITNUM.  SCRATCH is a free register for use in reloading out
1530   of range stack slots.  */
1531
1532void
1533get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1534{
1535  rtx base;
1536  HOST_WIDE_INT disp, offset;
1537
1538  gcc_assert (MEM_P (ref));
1539
1540  if (reload_in_progress)
1541    {
1542      base = find_replacement (&XEXP (ref, 0));
1543      gcc_assert (memory_address_p (GET_MODE (ref), base));
1544    }
1545  else
1546    base = XEXP (ref, 0);
1547
1548  if (GET_CODE (base) == PLUS)
1549    disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1550  else
1551    disp = 0;
1552
1553  /* Find the byte offset within an aligned word.  If the memory itself is
1554     claimed to be aligned, believe it.  Otherwise, aligned_memory_operand
1555     will have examined the base register and determined it is aligned, and
1556     thus displacements from it are naturally alignable.  */
1557  if (MEM_ALIGN (ref) >= 32)
1558    offset = 0;
1559  else
1560    offset = disp & 3;
1561
1562  /* The location should not cross aligned word boundary.  */
1563  gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1564	      <= GET_MODE_SIZE (SImode));
1565
1566  /* Access the entire aligned word.  */
1567  *paligned_mem = widen_memory_access (ref, SImode, -offset);
1568
1569  /* Convert the byte offset within the word to a bit offset.  */
1570  offset *= BITS_PER_UNIT;
1571  *pbitnum = GEN_INT (offset);
1572}
1573
1574/* Similar, but just get the address.  Handle the two reload cases.
1575   Add EXTRA_OFFSET to the address we return.  */
1576
1577rtx
1578get_unaligned_address (rtx ref)
1579{
1580  rtx base;
1581  HOST_WIDE_INT offset = 0;
1582
1583  gcc_assert (MEM_P (ref));
1584
1585  if (reload_in_progress)
1586    {
1587      base = find_replacement (&XEXP (ref, 0));
1588      gcc_assert (memory_address_p (GET_MODE (ref), base));
1589    }
1590  else
1591    base = XEXP (ref, 0);
1592
1593  if (GET_CODE (base) == PLUS)
1594    offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1595
1596  return plus_constant (Pmode, base, offset);
1597}
1598
1599/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1600   X is always returned in a register.  */
1601
1602rtx
1603get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1604{
1605  if (GET_CODE (addr) == PLUS)
1606    {
1607      ofs += INTVAL (XEXP (addr, 1));
1608      addr = XEXP (addr, 0);
1609    }
1610
1611  return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1612			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1613}
1614
1615/* On the Alpha, all (non-symbolic) constants except zero go into
1616   a floating-point register via memory.  Note that we cannot
1617   return anything that is not a subset of RCLASS, and that some
1618   symbolic constants cannot be dropped to memory.  */
1619
1620enum reg_class
1621alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1622{
1623  /* Zero is present in any register class.  */
1624  if (x == CONST0_RTX (GET_MODE (x)))
1625    return rclass;
1626
1627  /* These sorts of constants we can easily drop to memory.  */
1628  if (CONST_SCALAR_INT_P (x)
1629      || CONST_DOUBLE_P (x)
1630      || GET_CODE (x) == CONST_VECTOR)
1631    {
1632      if (rclass == FLOAT_REGS)
1633	return NO_REGS;
1634      if (rclass == ALL_REGS)
1635	return GENERAL_REGS;
1636      return rclass;
1637    }
1638
1639  /* All other kinds of constants should not (and in the case of HIGH
1640     cannot) be dropped to memory -- instead we use a GENERAL_REGS
1641     secondary reload.  */
1642  if (CONSTANT_P (x))
1643    return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1644
1645  return rclass;
1646}
1647
1648/* Inform reload about cases where moving X with a mode MODE to a register in
1649   RCLASS requires an extra scratch or immediate register.  Return the class
1650   needed for the immediate register.  */
1651
1652static reg_class_t
1653alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1654			machine_mode mode, secondary_reload_info *sri)
1655{
1656  enum reg_class rclass = (enum reg_class) rclass_i;
1657
1658  /* Loading and storing HImode or QImode values to and from memory
1659     usually requires a scratch register.  */
1660  if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1661    {
1662      if (any_memory_operand (x, mode))
1663	{
1664	  if (in_p)
1665	    {
1666	      if (!aligned_memory_operand (x, mode))
1667		sri->icode = direct_optab_handler (reload_in_optab, mode);
1668	    }
1669	  else
1670	    sri->icode = direct_optab_handler (reload_out_optab, mode);
1671	  return NO_REGS;
1672	}
1673    }
1674
1675  /* We also cannot do integral arithmetic into FP regs, as might result
1676     from register elimination into a DImode fp register.  */
1677  if (rclass == FLOAT_REGS)
1678    {
1679      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1680	return GENERAL_REGS;
1681      if (in_p && INTEGRAL_MODE_P (mode)
1682	  && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1683	return GENERAL_REGS;
1684    }
1685
1686  return NO_REGS;
1687}
1688
1689/* Given SEQ, which is an INSN list, look for any MEMs in either
1690   a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1691   volatile flags from REF into each of the MEMs found.  If REF is not
1692   a MEM, don't do anything.  */
1693
1694void
1695alpha_set_memflags (rtx seq, rtx ref)
1696{
1697  rtx_insn *insn;
1698
1699  if (!MEM_P (ref))
1700    return;
1701
1702  /* This is only called from alpha.md, after having had something
1703     generated from one of the insn patterns.  So if everything is
1704     zero, the pattern is already up-to-date.  */
1705  if (!MEM_VOLATILE_P (ref)
1706      && !MEM_NOTRAP_P (ref)
1707      && !MEM_READONLY_P (ref))
1708    return;
1709
1710  subrtx_var_iterator::array_type array;
1711  for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1712    if (INSN_P (insn))
1713      FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
1714	{
1715	  rtx x = *iter;
1716	  if (MEM_P (x))
1717	    {
1718	      MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
1719	      MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
1720	      MEM_READONLY_P (x) = MEM_READONLY_P (ref);
1721	      /* Sadly, we cannot use alias sets because the extra
1722		 aliasing produced by the AND interferes.  Given that
1723		 two-byte quantities are the only thing we would be
1724		 able to differentiate anyway, there does not seem to
1725		 be any point in convoluting the early out of the
1726		 alias check.  */
1727	      iter.skip_subrtxes ();
1728	    }
1729	}
1730    else
1731      gcc_unreachable ();
1732}
1733
1734static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT,
1735				 int, bool);
1736
1737/* Internal routine for alpha_emit_set_const to check for N or below insns.
1738   If NO_OUTPUT is true, then we only check to see if N insns are possible,
1739   and return pc_rtx if successful.  */
1740
1741static rtx
1742alpha_emit_set_const_1 (rtx target, machine_mode mode,
1743			HOST_WIDE_INT c, int n, bool no_output)
1744{
1745  HOST_WIDE_INT new_const;
1746  int i, bits;
1747  /* Use a pseudo if highly optimizing and still generating RTL.  */
1748  rtx subtarget
1749    = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1750  rtx temp, insn;
1751
1752  /* If this is a sign-extended 32-bit constant, we can do this in at most
1753     three insns, so do it if we have enough insns left.  */
1754
1755  if (c >> 31 == -1 || c >> 31 == 0)
1756    {
1757      HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1758      HOST_WIDE_INT tmp1 = c - low;
1759      HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1760      HOST_WIDE_INT extra = 0;
1761
1762      /* If HIGH will be interpreted as negative but the constant is
1763	 positive, we must adjust it to do two ldha insns.  */
1764
1765      if ((high & 0x8000) != 0 && c >= 0)
1766	{
1767	  extra = 0x4000;
1768	  tmp1 -= 0x40000000;
1769	  high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1770	}
1771
1772      if (c == low || (low == 0 && extra == 0))
1773	{
1774	  /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1775	     but that meant that we can't handle INT_MIN on 32-bit machines
1776	     (like NT/Alpha), because we recurse indefinitely through
1777	     emit_move_insn to gen_movdi.  So instead, since we know exactly
1778	     what we want, create it explicitly.  */
1779
1780	  if (no_output)
1781	    return pc_rtx;
1782	  if (target == NULL)
1783	    target = gen_reg_rtx (mode);
1784	  emit_insn (gen_rtx_SET (target, GEN_INT (c)));
1785	  return target;
1786	}
1787      else if (n >= 2 + (extra != 0))
1788	{
1789	  if (no_output)
1790	    return pc_rtx;
1791	  if (!can_create_pseudo_p ())
1792	    {
1793	      emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
1794	      temp = target;
1795	    }
1796	  else
1797	    temp = copy_to_suggested_reg (GEN_INT (high << 16),
1798					  subtarget, mode);
1799
1800	  /* As of 2002-02-23, addsi3 is only available when not optimizing.
1801	     This means that if we go through expand_binop, we'll try to
1802	     generate extensions, etc, which will require new pseudos, which
1803	     will fail during some split phases.  The SImode add patterns
1804	     still exist, but are not named.  So build the insns by hand.  */
1805
1806	  if (extra != 0)
1807	    {
1808	      if (! subtarget)
1809		subtarget = gen_reg_rtx (mode);
1810	      insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1811	      insn = gen_rtx_SET (subtarget, insn);
1812	      emit_insn (insn);
1813	      temp = subtarget;
1814	    }
1815
1816	  if (target == NULL)
1817	    target = gen_reg_rtx (mode);
1818	  insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1819	  insn = gen_rtx_SET (target, insn);
1820	  emit_insn (insn);
1821	  return target;
1822	}
1823    }
1824
1825  /* If we couldn't do it that way, try some other methods.  But if we have
1826     no instructions left, don't bother.  Likewise, if this is SImode and
1827     we can't make pseudos, we can't do anything since the expand_binop
1828     and expand_unop calls will widen and try to make pseudos.  */
1829
1830  if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1831    return 0;
1832
1833  /* Next, see if we can load a related constant and then shift and possibly
1834     negate it to get the constant we want.  Try this once each increasing
1835     numbers of insns.  */
1836
1837  for (i = 1; i < n; i++)
1838    {
1839      /* First, see if minus some low bits, we've an easy load of
1840	 high bits.  */
1841
1842      new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1843      if (new_const != 0)
1844	{
1845          temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1846	  if (temp)
1847	    {
1848	      if (no_output)
1849		return temp;
1850	      return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1851				   target, 0, OPTAB_WIDEN);
1852	    }
1853	}
1854
1855      /* Next try complementing.  */
1856      temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1857      if (temp)
1858	{
1859	  if (no_output)
1860	    return temp;
1861	  return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1862	}
1863
1864      /* Next try to form a constant and do a left shift.  We can do this
1865	 if some low-order bits are zero; the exact_log2 call below tells
1866	 us that information.  The bits we are shifting out could be any
1867	 value, but here we'll just try the 0- and sign-extended forms of
1868	 the constant.  To try to increase the chance of having the same
1869	 constant in more than one insn, start at the highest number of
1870	 bits to shift, but try all possibilities in case a ZAPNOT will
1871	 be useful.  */
1872
1873      bits = exact_log2 (c & -c);
1874      if (bits > 0)
1875	for (; bits > 0; bits--)
1876	  {
1877	    new_const = c >> bits;
1878	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1879	    if (!temp && c < 0)
1880	      {
1881		new_const = (unsigned HOST_WIDE_INT)c >> bits;
1882		temp = alpha_emit_set_const (subtarget, mode, new_const,
1883					     i, no_output);
1884	      }
1885	    if (temp)
1886	      {
1887		if (no_output)
1888		  return temp;
1889	        return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1890				     target, 0, OPTAB_WIDEN);
1891	      }
1892	  }
1893
1894      /* Now try high-order zero bits.  Here we try the shifted-in bits as
1895	 all zero and all ones.  Be careful to avoid shifting outside the
1896	 mode and to avoid shifting outside the host wide int size.  */
1897
1898      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1899	      - floor_log2 (c) - 1);
1900      if (bits > 0)
1901	for (; bits > 0; bits--)
1902	  {
1903	    new_const = c << bits;
1904	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1905	    if (!temp)
1906	      {
1907		new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1908	        temp = alpha_emit_set_const (subtarget, mode, new_const,
1909					     i, no_output);
1910	      }
1911	    if (temp)
1912	      {
1913		if (no_output)
1914		  return temp;
1915		return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1916				     target, 1, OPTAB_WIDEN);
1917	      }
1918	  }
1919
1920      /* Now try high-order 1 bits.  We get that with a sign-extension.
1921	 But one bit isn't enough here.  Be careful to avoid shifting outside
1922	 the mode and to avoid shifting outside the host wide int size.  */
1923
1924      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1925	      - floor_log2 (~ c) - 2);
1926      if (bits > 0)
1927	for (; bits > 0; bits--)
1928	  {
1929	    new_const = c << bits;
1930	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1931	    if (!temp)
1932	      {
1933		new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1934	        temp = alpha_emit_set_const (subtarget, mode, new_const,
1935					     i, no_output);
1936	      }
1937	    if (temp)
1938	      {
1939		if (no_output)
1940		  return temp;
1941		return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1942				     target, 0, OPTAB_WIDEN);
1943	      }
1944	  }
1945    }
1946
1947  /* Finally, see if can load a value into the target that is the same as the
1948     constant except that all bytes that are 0 are changed to be 0xff.  If we
1949     can, then we can do a ZAPNOT to obtain the desired constant.  */
1950
1951  new_const = c;
1952  for (i = 0; i < 64; i += 8)
1953    if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1954      new_const |= (HOST_WIDE_INT) 0xff << i;
1955
1956  /* We are only called for SImode and DImode.  If this is SImode, ensure that
1957     we are sign extended to a full word.  */
1958
1959  if (mode == SImode)
1960    new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1961
1962  if (new_const != c)
1963    {
1964      temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1965      if (temp)
1966	{
1967	  if (no_output)
1968	    return temp;
1969	  return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1970			       target, 0, OPTAB_WIDEN);
1971	}
1972    }
1973
1974  return 0;
1975}
1976
1977/* Try to output insns to set TARGET equal to the constant C if it can be
1978   done in less than N insns.  Do all computations in MODE.  Returns the place
1979   where the output has been placed if it can be done and the insns have been
1980   emitted.  If it would take more than N insns, zero is returned and no
1981   insns and emitted.  */
1982
1983static rtx
1984alpha_emit_set_const (rtx target, machine_mode mode,
1985		      HOST_WIDE_INT c, int n, bool no_output)
1986{
1987  machine_mode orig_mode = mode;
1988  rtx orig_target = target;
1989  rtx result = 0;
1990  int i;
1991
1992  /* If we can't make any pseudos, TARGET is an SImode hard register, we
1993     can't load this constant in one insn, do this in DImode.  */
1994  if (!can_create_pseudo_p () && mode == SImode
1995      && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
1996    {
1997      result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1998      if (result)
1999	return result;
2000
2001      target = no_output ? NULL : gen_lowpart (DImode, target);
2002      mode = DImode;
2003    }
2004  else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2005    {
2006      target = no_output ? NULL : gen_lowpart (DImode, target);
2007      mode = DImode;
2008    }
2009
2010  /* Try 1 insn, then 2, then up to N.  */
2011  for (i = 1; i <= n; i++)
2012    {
2013      result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2014      if (result)
2015	{
2016	  rtx_insn *insn;
2017	  rtx set;
2018
2019	  if (no_output)
2020	    return result;
2021
2022	  insn = get_last_insn ();
2023	  set = single_set (insn);
2024	  if (! CONSTANT_P (SET_SRC (set)))
2025	    set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2026	  break;
2027	}
2028    }
2029
2030  /* Allow for the case where we changed the mode of TARGET.  */
2031  if (result)
2032    {
2033      if (result == target)
2034	result = orig_target;
2035      else if (mode != orig_mode)
2036	result = gen_lowpart (orig_mode, result);
2037    }
2038
2039  return result;
2040}
2041
2042/* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2043   fall back to a straight forward decomposition.  We do this to avoid
2044   exponential run times encountered when looking for longer sequences
2045   with alpha_emit_set_const.  */
2046
2047static rtx
2048alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
2049{
2050  HOST_WIDE_INT d1, d2, d3, d4;
2051
2052  /* Decompose the entire word */
2053
2054  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2055  c1 -= d1;
2056  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2057  c1 = (c1 - d2) >> 32;
2058  d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2059  c1 -= d3;
2060  d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2061  gcc_assert (c1 == d4);
2062
2063  /* Construct the high word */
2064  if (d4)
2065    {
2066      emit_move_insn (target, GEN_INT (d4));
2067      if (d3)
2068	emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2069    }
2070  else
2071    emit_move_insn (target, GEN_INT (d3));
2072
2073  /* Shift it into place */
2074  emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2075
2076  /* Add in the low bits.  */
2077  if (d2)
2078    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2079  if (d1)
2080    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2081
2082  return target;
2083}
2084
2085/* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits.  */
2086
2087static HOST_WIDE_INT
2088alpha_extract_integer (rtx x)
2089{
2090  if (GET_CODE (x) == CONST_VECTOR)
2091    x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2092
2093  gcc_assert (CONST_INT_P (x));
2094
2095  return INTVAL (x);
2096}
2097
2098/* Implement TARGET_LEGITIMATE_CONSTANT_P.  This is all constants for which
2099   we are willing to load the value into a register via a move pattern.
2100   Normally this is all symbolic constants, integral constants that
2101   take three or fewer instructions, and floating-point zero.  */
2102
2103bool
2104alpha_legitimate_constant_p (machine_mode mode, rtx x)
2105{
2106  HOST_WIDE_INT i0;
2107
2108  switch (GET_CODE (x))
2109    {
2110    case LABEL_REF:
2111    case HIGH:
2112      return true;
2113
2114    case CONST:
2115      if (GET_CODE (XEXP (x, 0)) == PLUS
2116	  && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2117	x = XEXP (XEXP (x, 0), 0);
2118      else
2119	return true;
2120
2121      if (GET_CODE (x) != SYMBOL_REF)
2122	return true;
2123      /* FALLTHRU */
2124
2125    case SYMBOL_REF:
2126      /* TLS symbols are never valid.  */
2127      return SYMBOL_REF_TLS_MODEL (x) == 0;
2128
2129    case CONST_WIDE_INT:
2130      if (TARGET_BUILD_CONSTANTS)
2131	return true;
2132      if (x == CONST0_RTX (mode))
2133	return true;
2134      mode = DImode;
2135      gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
2136      i0 = CONST_WIDE_INT_ELT (x, 1);
2137      if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
2138	return false;
2139      i0 = CONST_WIDE_INT_ELT (x, 0);
2140      goto do_integer;
2141
2142    case CONST_DOUBLE:
2143      if (x == CONST0_RTX (mode))
2144	return true;
2145      return false;
2146
2147    case CONST_VECTOR:
2148      if (x == CONST0_RTX (mode))
2149	return true;
2150      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2151	return false;
2152      if (GET_MODE_SIZE (mode) != 8)
2153	return false;
2154      /* FALLTHRU */
2155
2156    case CONST_INT:
2157      if (TARGET_BUILD_CONSTANTS)
2158	return true;
2159      i0 = alpha_extract_integer (x);
2160    do_integer:
2161      return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
2162
2163    default:
2164      return false;
2165    }
2166}
2167
2168/* Operand 1 is known to be a constant, and should require more than one
2169   instruction to load.  Emit that multi-part load.  */
2170
2171bool
2172alpha_split_const_mov (machine_mode mode, rtx *operands)
2173{
2174  HOST_WIDE_INT i0;
2175  rtx temp = NULL_RTX;
2176
2177  i0 = alpha_extract_integer (operands[1]);
2178
2179  temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2180
2181  if (!temp && TARGET_BUILD_CONSTANTS)
2182    temp = alpha_emit_set_long_const (operands[0], i0);
2183
2184  if (temp)
2185    {
2186      if (!rtx_equal_p (operands[0], temp))
2187	emit_move_insn (operands[0], temp);
2188      return true;
2189    }
2190
2191  return false;
2192}
2193
2194/* Expand a move instruction; return true if all work is done.
2195   We don't handle non-bwx subword loads here.  */
2196
2197bool
2198alpha_expand_mov (machine_mode mode, rtx *operands)
2199{
2200  rtx tmp;
2201
2202  /* If the output is not a register, the input must be.  */
2203  if (MEM_P (operands[0])
2204      && ! reg_or_0_operand (operands[1], mode))
2205    operands[1] = force_reg (mode, operands[1]);
2206
2207  /* Allow legitimize_address to perform some simplifications.  */
2208  if (mode == Pmode && symbolic_operand (operands[1], mode))
2209    {
2210      tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2211      if (tmp)
2212	{
2213	  if (tmp == operands[0])
2214	    return true;
2215	  operands[1] = tmp;
2216	  return false;
2217	}
2218    }
2219
2220  /* Early out for non-constants and valid constants.  */
2221  if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2222    return false;
2223
2224  /* Split large integers.  */
2225  if (CONST_INT_P (operands[1])
2226      || GET_CODE (operands[1]) == CONST_VECTOR)
2227    {
2228      if (alpha_split_const_mov (mode, operands))
2229	return true;
2230    }
2231
2232  /* Otherwise we've nothing left but to drop the thing to memory.  */
2233  tmp = force_const_mem (mode, operands[1]);
2234
2235  if (tmp == NULL_RTX)
2236    return false;
2237
2238  if (reload_in_progress)
2239    {
2240      emit_move_insn (operands[0], XEXP (tmp, 0));
2241      operands[1] = replace_equiv_address (tmp, operands[0]);
2242    }
2243  else
2244    operands[1] = validize_mem (tmp);
2245  return false;
2246}
2247
2248/* Expand a non-bwx QImode or HImode move instruction;
2249   return true if all work is done.  */
2250
2251bool
2252alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
2253{
2254  rtx seq;
2255
2256  /* If the output is not a register, the input must be.  */
2257  if (MEM_P (operands[0]))
2258    operands[1] = force_reg (mode, operands[1]);
2259
2260  /* Handle four memory cases, unaligned and aligned for either the input
2261     or the output.  The only case where we can be called during reload is
2262     for aligned loads; all other cases require temporaries.  */
2263
2264  if (any_memory_operand (operands[1], mode))
2265    {
2266      if (aligned_memory_operand (operands[1], mode))
2267	{
2268	  if (reload_in_progress)
2269	    {
2270	      if (mode == QImode)
2271		seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2272	      else
2273		seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2274	      emit_insn (seq);
2275	    }
2276	  else
2277	    {
2278	      rtx aligned_mem, bitnum;
2279	      rtx scratch = gen_reg_rtx (SImode);
2280	      rtx subtarget;
2281	      bool copyout;
2282
2283	      get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2284
2285	      subtarget = operands[0];
2286	      if (REG_P (subtarget))
2287		subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2288	      else
2289		subtarget = gen_reg_rtx (DImode), copyout = true;
2290
2291	      if (mode == QImode)
2292		seq = gen_aligned_loadqi (subtarget, aligned_mem,
2293					  bitnum, scratch);
2294	      else
2295		seq = gen_aligned_loadhi (subtarget, aligned_mem,
2296					  bitnum, scratch);
2297	      emit_insn (seq);
2298
2299	      if (copyout)
2300		emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2301	    }
2302	}
2303      else
2304	{
2305	  /* Don't pass these as parameters since that makes the generated
2306	     code depend on parameter evaluation order which will cause
2307	     bootstrap failures.  */
2308
2309	  rtx temp1, temp2, subtarget, ua;
2310	  bool copyout;
2311
2312	  temp1 = gen_reg_rtx (DImode);
2313	  temp2 = gen_reg_rtx (DImode);
2314
2315	  subtarget = operands[0];
2316	  if (REG_P (subtarget))
2317	    subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2318	  else
2319	    subtarget = gen_reg_rtx (DImode), copyout = true;
2320
2321	  ua = get_unaligned_address (operands[1]);
2322	  if (mode == QImode)
2323	    seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2324	  else
2325	    seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2326
2327	  alpha_set_memflags (seq, operands[1]);
2328	  emit_insn (seq);
2329
2330	  if (copyout)
2331	    emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2332	}
2333      return true;
2334    }
2335
2336  if (any_memory_operand (operands[0], mode))
2337    {
2338      if (aligned_memory_operand (operands[0], mode))
2339	{
2340	  rtx aligned_mem, bitnum;
2341	  rtx temp1 = gen_reg_rtx (SImode);
2342	  rtx temp2 = gen_reg_rtx (SImode);
2343
2344	  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2345
2346	  emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2347					temp1, temp2));
2348	}
2349      else
2350	{
2351	  rtx temp1 = gen_reg_rtx (DImode);
2352	  rtx temp2 = gen_reg_rtx (DImode);
2353	  rtx temp3 = gen_reg_rtx (DImode);
2354	  rtx ua = get_unaligned_address (operands[0]);
2355
2356	  if (mode == QImode)
2357	    seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2358	  else
2359	    seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2360
2361	  alpha_set_memflags (seq, operands[0]);
2362	  emit_insn (seq);
2363	}
2364      return true;
2365    }
2366
2367  return false;
2368}
2369
2370/* Implement the movmisalign patterns.  One of the operands is a memory
2371   that is not naturally aligned.  Emit instructions to load it.  */
2372
2373void
2374alpha_expand_movmisalign (machine_mode mode, rtx *operands)
2375{
2376  /* Honor misaligned loads, for those we promised to do so.  */
2377  if (MEM_P (operands[1]))
2378    {
2379      rtx tmp;
2380
2381      if (register_operand (operands[0], mode))
2382	tmp = operands[0];
2383      else
2384	tmp = gen_reg_rtx (mode);
2385
2386      alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2387      if (tmp != operands[0])
2388	emit_move_insn (operands[0], tmp);
2389    }
2390  else if (MEM_P (operands[0]))
2391    {
2392      if (!reg_or_0_operand (operands[1], mode))
2393	operands[1] = force_reg (mode, operands[1]);
2394      alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2395    }
2396  else
2397    gcc_unreachable ();
2398}
2399
2400/* Generate an unsigned DImode to FP conversion.  This is the same code
2401   optabs would emit if we didn't have TFmode patterns.
2402
2403   For SFmode, this is the only construction I've found that can pass
2404   gcc.c-torture/execute/ieee/rbug.c.  No scenario that uses DFmode
2405   intermediates will work, because you'll get intermediate rounding
2406   that ruins the end result.  Some of this could be fixed by turning
2407   on round-to-positive-infinity, but that requires diddling the fpsr,
2408   which kills performance.  I tried turning this around and converting
2409   to a negative number, so that I could turn on /m, but either I did
2410   it wrong or there's something else cause I wound up with the exact
2411   same single-bit error.  There is a branch-less form of this same code:
2412
2413	srl     $16,1,$1
2414	and     $16,1,$2
2415	cmplt   $16,0,$3
2416	or      $1,$2,$2
2417	cmovge  $16,$16,$2
2418	itoft	$3,$f10
2419	itoft	$2,$f11
2420	cvtqs   $f11,$f11
2421	adds    $f11,$f11,$f0
2422	fcmoveq $f10,$f11,$f0
2423
2424   I'm not using it because it's the same number of instructions as
2425   this branch-full form, and it has more serialized long latency
2426   instructions on the critical path.
2427
2428   For DFmode, we can avoid rounding errors by breaking up the word
2429   into two pieces, converting them separately, and adding them back:
2430
2431   LC0: .long 0,0x5f800000
2432
2433	itoft	$16,$f11
2434	lda	$2,LC0
2435	cmplt	$16,0,$1
2436	cpyse	$f11,$f31,$f10
2437	cpyse	$f31,$f11,$f11
2438	s4addq	$1,$2,$1
2439	lds	$f12,0($1)
2440	cvtqt	$f10,$f10
2441	cvtqt	$f11,$f11
2442	addt	$f12,$f10,$f0
2443	addt	$f0,$f11,$f0
2444
2445   This doesn't seem to be a clear-cut win over the optabs form.
2446   It probably all depends on the distribution of numbers being
2447   converted -- in the optabs form, all but high-bit-set has a
2448   much lower minimum execution time.  */
2449
2450void
2451alpha_emit_floatuns (rtx operands[2])
2452{
2453  rtx neglab, donelab, i0, i1, f0, in, out;
2454  machine_mode mode;
2455
2456  out = operands[0];
2457  in = force_reg (DImode, operands[1]);
2458  mode = GET_MODE (out);
2459  neglab = gen_label_rtx ();
2460  donelab = gen_label_rtx ();
2461  i0 = gen_reg_rtx (DImode);
2462  i1 = gen_reg_rtx (DImode);
2463  f0 = gen_reg_rtx (mode);
2464
2465  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2466
2467  emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
2468  emit_jump_insn (gen_jump (donelab));
2469  emit_barrier ();
2470
2471  emit_label (neglab);
2472
2473  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2474  emit_insn (gen_anddi3 (i1, in, const1_rtx));
2475  emit_insn (gen_iordi3 (i0, i0, i1));
2476  emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
2477  emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
2478
2479  emit_label (donelab);
2480}
2481
2482/* Generate the comparison for a conditional branch.  */
2483
2484void
2485alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
2486{
2487  enum rtx_code cmp_code, branch_code;
2488  machine_mode branch_mode = VOIDmode;
2489  enum rtx_code code = GET_CODE (operands[0]);
2490  rtx op0 = operands[1], op1 = operands[2];
2491  rtx tem;
2492
2493  if (cmp_mode == TFmode)
2494    {
2495      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2496      op1 = const0_rtx;
2497      cmp_mode = DImode;
2498    }
2499
2500  /* The general case: fold the comparison code to the types of compares
2501     that we have, choosing the branch as necessary.  */
2502  switch (code)
2503    {
2504    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2505    case UNORDERED:
2506      /* We have these compares.  */
2507      cmp_code = code, branch_code = NE;
2508      break;
2509
2510    case NE:
2511    case ORDERED:
2512      /* These must be reversed.  */
2513      cmp_code = reverse_condition (code), branch_code = EQ;
2514      break;
2515
2516    case GE:  case GT: case GEU:  case GTU:
2517      /* For FP, we swap them, for INT, we reverse them.  */
2518      if (cmp_mode == DFmode)
2519	{
2520	  cmp_code = swap_condition (code);
2521	  branch_code = NE;
2522	  std::swap (op0, op1);
2523	}
2524      else
2525	{
2526	  cmp_code = reverse_condition (code);
2527	  branch_code = EQ;
2528	}
2529      break;
2530
2531    default:
2532      gcc_unreachable ();
2533    }
2534
2535  if (cmp_mode == DFmode)
2536    {
2537      if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2538	{
2539	  /* When we are not as concerned about non-finite values, and we
2540	     are comparing against zero, we can branch directly.  */
2541	  if (op1 == CONST0_RTX (DFmode))
2542	    cmp_code = UNKNOWN, branch_code = code;
2543	  else if (op0 == CONST0_RTX (DFmode))
2544	    {
2545	      /* Undo the swap we probably did just above.  */
2546	      std::swap (op0, op1);
2547	      branch_code = swap_condition (cmp_code);
2548	      cmp_code = UNKNOWN;
2549	    }
2550	}
2551      else
2552	{
2553	  /* ??? We mark the branch mode to be CCmode to prevent the
2554	     compare and branch from being combined, since the compare
2555	     insn follows IEEE rules that the branch does not.  */
2556	  branch_mode = CCmode;
2557	}
2558    }
2559  else
2560    {
2561      /* The following optimizations are only for signed compares.  */
2562      if (code != LEU && code != LTU && code != GEU && code != GTU)
2563	{
2564	  /* Whee.  Compare and branch against 0 directly.  */
2565	  if (op1 == const0_rtx)
2566	    cmp_code = UNKNOWN, branch_code = code;
2567
2568	  /* If the constants doesn't fit into an immediate, but can
2569 	     be generated by lda/ldah, we adjust the argument and
2570 	     compare against zero, so we can use beq/bne directly.  */
2571	  /* ??? Don't do this when comparing against symbols, otherwise
2572	     we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2573	     be declared false out of hand (at least for non-weak).  */
2574	  else if (CONST_INT_P (op1)
2575		   && (code == EQ || code == NE)
2576		   && !(symbolic_operand (op0, VOIDmode)
2577			|| (REG_P (op0) && REG_POINTER (op0))))
2578	    {
2579	      rtx n_op1 = GEN_INT (-INTVAL (op1));
2580
2581	      if (! satisfies_constraint_I (op1)
2582		  && (satisfies_constraint_K (n_op1)
2583		      || satisfies_constraint_L (n_op1)))
2584		cmp_code = PLUS, branch_code = code, op1 = n_op1;
2585	    }
2586	}
2587
2588      if (!reg_or_0_operand (op0, DImode))
2589	op0 = force_reg (DImode, op0);
2590      if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2591	op1 = force_reg (DImode, op1);
2592    }
2593
2594  /* Emit an initial compare instruction, if necessary.  */
2595  tem = op0;
2596  if (cmp_code != UNKNOWN)
2597    {
2598      tem = gen_reg_rtx (cmp_mode);
2599      emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2600    }
2601
2602  /* Emit the branch instruction.  */
2603  tem = gen_rtx_SET (pc_rtx,
2604		     gen_rtx_IF_THEN_ELSE (VOIDmode,
2605					   gen_rtx_fmt_ee (branch_code,
2606							   branch_mode, tem,
2607							   CONST0_RTX (cmp_mode)),
2608					   gen_rtx_LABEL_REF (VOIDmode,
2609							      operands[3]),
2610					   pc_rtx));
2611  emit_jump_insn (tem);
2612}
2613
2614/* Certain simplifications can be done to make invalid setcc operations
2615   valid.  Return the final comparison, or NULL if we can't work.  */
2616
2617bool
2618alpha_emit_setcc (rtx operands[], machine_mode cmp_mode)
2619{
2620  enum rtx_code cmp_code;
2621  enum rtx_code code = GET_CODE (operands[1]);
2622  rtx op0 = operands[2], op1 = operands[3];
2623  rtx tmp;
2624
2625  if (cmp_mode == TFmode)
2626    {
2627      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2628      op1 = const0_rtx;
2629      cmp_mode = DImode;
2630    }
2631
2632  if (cmp_mode == DFmode && !TARGET_FIX)
2633    return 0;
2634
2635  /* The general case: fold the comparison code to the types of compares
2636     that we have, choosing the branch as necessary.  */
2637
2638  cmp_code = UNKNOWN;
2639  switch (code)
2640    {
2641    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2642    case UNORDERED:
2643      /* We have these compares.  */
2644      if (cmp_mode == DFmode)
2645	cmp_code = code, code = NE;
2646      break;
2647
2648    case NE:
2649      if (cmp_mode == DImode && op1 == const0_rtx)
2650	break;
2651      /* FALLTHRU */
2652
2653    case ORDERED:
2654      cmp_code = reverse_condition (code);
2655      code = EQ;
2656      break;
2657
2658    case GE:  case GT: case GEU:  case GTU:
2659      /* These normally need swapping, but for integer zero we have
2660	 special patterns that recognize swapped operands.  */
2661      if (cmp_mode == DImode && op1 == const0_rtx)
2662	break;
2663      code = swap_condition (code);
2664      if (cmp_mode == DFmode)
2665	cmp_code = code, code = NE;
2666      std::swap (op0, op1);
2667      break;
2668
2669    default:
2670      gcc_unreachable ();
2671    }
2672
2673  if (cmp_mode == DImode)
2674    {
2675      if (!register_operand (op0, DImode))
2676	op0 = force_reg (DImode, op0);
2677      if (!reg_or_8bit_operand (op1, DImode))
2678	op1 = force_reg (DImode, op1);
2679    }
2680
2681  /* Emit an initial compare instruction, if necessary.  */
2682  if (cmp_code != UNKNOWN)
2683    {
2684      tmp = gen_reg_rtx (cmp_mode);
2685      emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2686						   op0, op1)));
2687
2688      op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2689      op1 = const0_rtx;
2690    }
2691
2692  /* Emit the setcc instruction.  */
2693  emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode,
2694						       op0, op1)));
2695  return true;
2696}
2697
2698
2699/* Rewrite a comparison against zero CMP of the form
2700   (CODE (cc0) (const_int 0)) so it can be written validly in
2701   a conditional move (if_then_else CMP ...).
2702   If both of the operands that set cc0 are nonzero we must emit
2703   an insn to perform the compare (it can't be done within
2704   the conditional move).  */
2705
2706rtx
2707alpha_emit_conditional_move (rtx cmp, machine_mode mode)
2708{
2709  enum rtx_code code = GET_CODE (cmp);
2710  enum rtx_code cmov_code = NE;
2711  rtx op0 = XEXP (cmp, 0);
2712  rtx op1 = XEXP (cmp, 1);
2713  machine_mode cmp_mode
2714    = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2715  machine_mode cmov_mode = VOIDmode;
2716  int local_fast_math = flag_unsafe_math_optimizations;
2717  rtx tem;
2718
2719  if (cmp_mode == TFmode)
2720    {
2721      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2722      op1 = const0_rtx;
2723      cmp_mode = DImode;
2724    }
2725
2726  gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2727
2728  if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2729    {
2730      enum rtx_code cmp_code;
2731
2732      if (! TARGET_FIX)
2733	return 0;
2734
2735      /* If we have fp<->int register move instructions, do a cmov by
2736	 performing the comparison in fp registers, and move the
2737	 zero/nonzero value to integer registers, where we can then
2738	 use a normal cmov, or vice-versa.  */
2739
2740      switch (code)
2741	{
2742	case EQ: case LE: case LT: case LEU: case LTU:
2743	case UNORDERED:
2744	  /* We have these compares.  */
2745	  cmp_code = code, code = NE;
2746	  break;
2747
2748	case NE:
2749	case ORDERED:
2750	  /* These must be reversed.  */
2751	  cmp_code = reverse_condition (code), code = EQ;
2752	  break;
2753
2754	case GE: case GT: case GEU: case GTU:
2755	  /* These normally need swapping, but for integer zero we have
2756	     special patterns that recognize swapped operands.  */
2757	  if (cmp_mode == DImode && op1 == const0_rtx)
2758	    cmp_code = code, code = NE;
2759	  else
2760	    {
2761	      cmp_code = swap_condition (code);
2762	      code = NE;
2763	      std::swap (op0, op1);
2764	    }
2765	  break;
2766
2767	default:
2768	  gcc_unreachable ();
2769	}
2770
2771      if (cmp_mode == DImode)
2772	{
2773	  if (!reg_or_0_operand (op0, DImode))
2774	    op0 = force_reg (DImode, op0);
2775	  if (!reg_or_8bit_operand (op1, DImode))
2776	    op1 = force_reg (DImode, op1);
2777	}
2778
2779      tem = gen_reg_rtx (cmp_mode);
2780      emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2781						   op0, op1)));
2782
2783      cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2784      op0 = gen_lowpart (cmp_mode, tem);
2785      op1 = CONST0_RTX (cmp_mode);
2786      cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2787      local_fast_math = 1;
2788    }
2789
2790  if (cmp_mode == DImode)
2791    {
2792      if (!reg_or_0_operand (op0, DImode))
2793	op0 = force_reg (DImode, op0);
2794      if (!reg_or_8bit_operand (op1, DImode))
2795	op1 = force_reg (DImode, op1);
2796    }
2797
2798  /* We may be able to use a conditional move directly.
2799     This avoids emitting spurious compares.  */
2800  if (signed_comparison_operator (cmp, VOIDmode)
2801      && (cmp_mode == DImode || local_fast_math)
2802      && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2803    return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2804
2805  /* We can't put the comparison inside the conditional move;
2806     emit a compare instruction and put that inside the
2807     conditional move.  Make sure we emit only comparisons we have;
2808     swap or reverse as necessary.  */
2809
2810  if (!can_create_pseudo_p ())
2811    return NULL_RTX;
2812
2813  switch (code)
2814    {
2815    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2816    case UNORDERED:
2817      /* We have these compares: */
2818      break;
2819
2820    case NE:
2821    case ORDERED:
2822      /* These must be reversed.  */
2823      code = reverse_condition (code);
2824      cmov_code = EQ;
2825      break;
2826
2827    case GE:  case GT:  case GEU:  case GTU:
2828      /* These normally need swapping, but for integer zero we have
2829	 special patterns that recognize swapped operands.  */
2830      if (cmp_mode == DImode && op1 == const0_rtx)
2831	break;
2832      code = swap_condition (code);
2833      std::swap (op0, op1);
2834      break;
2835
2836    default:
2837      gcc_unreachable ();
2838    }
2839
2840  if (cmp_mode == DImode)
2841    {
2842      if (!reg_or_0_operand (op0, DImode))
2843	op0 = force_reg (DImode, op0);
2844      if (!reg_or_8bit_operand (op1, DImode))
2845	op1 = force_reg (DImode, op1);
2846    }
2847
2848  /* ??? We mark the branch mode to be CCmode to prevent the compare
2849     and cmov from being combined, since the compare insn follows IEEE
2850     rules that the cmov does not.  */
2851  if (cmp_mode == DFmode && !local_fast_math)
2852    cmov_mode = CCmode;
2853
2854  tem = gen_reg_rtx (cmp_mode);
2855  emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2856  return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2857}
2858
2859/* Simplify a conditional move of two constants into a setcc with
2860   arithmetic.  This is done with a splitter since combine would
2861   just undo the work if done during code generation.  It also catches
2862   cases we wouldn't have before cse.  */
2863
2864int
2865alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2866			      rtx t_rtx, rtx f_rtx)
2867{
2868  HOST_WIDE_INT t, f, diff;
2869  machine_mode mode;
2870  rtx target, subtarget, tmp;
2871
2872  mode = GET_MODE (dest);
2873  t = INTVAL (t_rtx);
2874  f = INTVAL (f_rtx);
2875  diff = t - f;
2876
2877  if (((code == NE || code == EQ) && diff < 0)
2878      || (code == GE || code == GT))
2879    {
2880      code = reverse_condition (code);
2881      diff = t, t = f, f = diff;
2882      diff = t - f;
2883    }
2884
2885  subtarget = target = dest;
2886  if (mode != DImode)
2887    {
2888      target = gen_lowpart (DImode, dest);
2889      if (can_create_pseudo_p ())
2890        subtarget = gen_reg_rtx (DImode);
2891      else
2892	subtarget = target;
2893    }
2894  /* Below, we must be careful to use copy_rtx on target and subtarget
2895     in intermediate insns, as they may be a subreg rtx, which may not
2896     be shared.  */
2897
2898  if (f == 0 && exact_log2 (diff) > 0
2899      /* On EV6, we've got enough shifters to make non-arithmetic shifts
2900	 viable over a longer latency cmove.  On EV5, the E0 slot is a
2901	 scarce resource, and on EV4 shift has the same latency as a cmove.  */
2902      && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2903    {
2904      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2905      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2906
2907      tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2908			    GEN_INT (exact_log2 (t)));
2909      emit_insn (gen_rtx_SET (target, tmp));
2910    }
2911  else if (f == 0 && t == -1)
2912    {
2913      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2914      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2915
2916      emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2917    }
2918  else if (diff == 1 || diff == 4 || diff == 8)
2919    {
2920      rtx add_op;
2921
2922      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2923      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2924
2925      if (diff == 1)
2926	emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2927      else
2928	{
2929	  add_op = GEN_INT (f);
2930	  if (sext_add_operand (add_op, mode))
2931	    {
2932	      tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2933				    GEN_INT (exact_log2 (diff)));
2934	      tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2935	      emit_insn (gen_rtx_SET (target, tmp));
2936	    }
2937	  else
2938	    return 0;
2939	}
2940    }
2941  else
2942    return 0;
2943
2944  return 1;
2945}
2946
2947/* Look up the function X_floating library function name for the
2948   given operation.  */
2949
2950struct GTY(()) xfloating_op
2951{
2952  const enum rtx_code code;
2953  const char *const GTY((skip)) osf_func;
2954  const char *const GTY((skip)) vms_func;
2955  rtx libcall;
2956};
2957
2958static GTY(()) struct xfloating_op xfloating_ops[] =
2959{
2960  { PLUS,		"_OtsAddX", "OTS$ADD_X", 0 },
2961  { MINUS,		"_OtsSubX", "OTS$SUB_X", 0 },
2962  { MULT,		"_OtsMulX", "OTS$MUL_X", 0 },
2963  { DIV,		"_OtsDivX", "OTS$DIV_X", 0 },
2964  { EQ,			"_OtsEqlX", "OTS$EQL_X", 0 },
2965  { NE,			"_OtsNeqX", "OTS$NEQ_X", 0 },
2966  { LT,			"_OtsLssX", "OTS$LSS_X", 0 },
2967  { LE,			"_OtsLeqX", "OTS$LEQ_X", 0 },
2968  { GT,			"_OtsGtrX", "OTS$GTR_X", 0 },
2969  { GE,			"_OtsGeqX", "OTS$GEQ_X", 0 },
2970  { FIX,		"_OtsCvtXQ", "OTS$CVTXQ", 0 },
2971  { FLOAT,		"_OtsCvtQX", "OTS$CVTQX", 0 },
2972  { UNSIGNED_FLOAT,	"_OtsCvtQUX", "OTS$CVTQUX", 0 },
2973  { FLOAT_EXTEND,	"_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2974  { FLOAT_TRUNCATE,	"_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2975};
2976
2977static GTY(()) struct xfloating_op vax_cvt_ops[] =
2978{
2979  { FLOAT_EXTEND,	"_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2980  { FLOAT_TRUNCATE,	"_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2981};
2982
2983static rtx
2984alpha_lookup_xfloating_lib_func (enum rtx_code code)
2985{
2986  struct xfloating_op *ops = xfloating_ops;
2987  long n = ARRAY_SIZE (xfloating_ops);
2988  long i;
2989
2990  gcc_assert (TARGET_HAS_XFLOATING_LIBS);
2991
2992  /* How irritating.  Nothing to key off for the main table.  */
2993  if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
2994    {
2995      ops = vax_cvt_ops;
2996      n = ARRAY_SIZE (vax_cvt_ops);
2997    }
2998
2999  for (i = 0; i < n; ++i, ++ops)
3000    if (ops->code == code)
3001      {
3002	rtx func = ops->libcall;
3003	if (!func)
3004	  {
3005	    func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3006				     ? ops->vms_func : ops->osf_func);
3007	    ops->libcall = func;
3008	  }
3009        return func;
3010      }
3011
3012  gcc_unreachable ();
3013}
3014
3015/* Most X_floating operations take the rounding mode as an argument.
3016   Compute that here.  */
3017
3018static int
3019alpha_compute_xfloating_mode_arg (enum rtx_code code,
3020				  enum alpha_fp_rounding_mode round)
3021{
3022  int mode;
3023
3024  switch (round)
3025    {
3026    case ALPHA_FPRM_NORM:
3027      mode = 2;
3028      break;
3029    case ALPHA_FPRM_MINF:
3030      mode = 1;
3031      break;
3032    case ALPHA_FPRM_CHOP:
3033      mode = 0;
3034      break;
3035    case ALPHA_FPRM_DYN:
3036      mode = 4;
3037      break;
3038    default:
3039      gcc_unreachable ();
3040
3041    /* XXX For reference, round to +inf is mode = 3.  */
3042    }
3043
3044  if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3045    mode |= 0x10000;
3046
3047  return mode;
3048}
3049
3050/* Emit an X_floating library function call.
3051
3052   Note that these functions do not follow normal calling conventions:
3053   TFmode arguments are passed in two integer registers (as opposed to
3054   indirect); TFmode return values appear in R16+R17.
3055
3056   FUNC is the function to call.
3057   TARGET is where the output belongs.
3058   OPERANDS are the inputs.
3059   NOPERANDS is the count of inputs.
3060   EQUIV is the expression equivalent for the function.
3061*/
3062
3063static void
3064alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3065			      int noperands, rtx equiv)
3066{
3067  rtx usage = NULL_RTX, reg;
3068  int regno = 16, i;
3069
3070  start_sequence ();
3071
3072  for (i = 0; i < noperands; ++i)
3073    {
3074      switch (GET_MODE (operands[i]))
3075	{
3076	case TFmode:
3077	  reg = gen_rtx_REG (TFmode, regno);
3078	  regno += 2;
3079	  break;
3080
3081	case DFmode:
3082	  reg = gen_rtx_REG (DFmode, regno + 32);
3083	  regno += 1;
3084	  break;
3085
3086	case VOIDmode:
3087	  gcc_assert (CONST_INT_P (operands[i]));
3088	  /* FALLTHRU */
3089	case DImode:
3090	  reg = gen_rtx_REG (DImode, regno);
3091	  regno += 1;
3092	  break;
3093
3094	default:
3095	  gcc_unreachable ();
3096	}
3097
3098      emit_move_insn (reg, operands[i]);
3099      use_reg (&usage, reg);
3100    }
3101
3102  switch (GET_MODE (target))
3103    {
3104    case TFmode:
3105      reg = gen_rtx_REG (TFmode, 16);
3106      break;
3107    case DFmode:
3108      reg = gen_rtx_REG (DFmode, 32);
3109      break;
3110    case DImode:
3111      reg = gen_rtx_REG (DImode, 0);
3112      break;
3113    default:
3114      gcc_unreachable ();
3115    }
3116
3117  rtx mem = gen_rtx_MEM (QImode, func);
3118  rtx_insn *tmp = emit_call_insn (gen_call_value (reg, mem, const0_rtx,
3119						  const0_rtx, const0_rtx));
3120  CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3121  RTL_CONST_CALL_P (tmp) = 1;
3122
3123  tmp = get_insns ();
3124  end_sequence ();
3125
3126  emit_libcall_block (tmp, target, reg, equiv);
3127}
3128
3129/* Emit an X_floating library function call for arithmetic (+,-,*,/).  */
3130
3131void
3132alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3133{
3134  rtx func;
3135  int mode;
3136  rtx out_operands[3];
3137
3138  func = alpha_lookup_xfloating_lib_func (code);
3139  mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3140
3141  out_operands[0] = operands[1];
3142  out_operands[1] = operands[2];
3143  out_operands[2] = GEN_INT (mode);
3144  alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3145				gen_rtx_fmt_ee (code, TFmode, operands[1],
3146						operands[2]));
3147}
3148
3149/* Emit an X_floating library function call for a comparison.  */
3150
3151static rtx
3152alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3153{
3154  enum rtx_code cmp_code, res_code;
3155  rtx func, out, operands[2], note;
3156
3157  /* X_floating library comparison functions return
3158	   -1  unordered
3159	    0  false
3160	    1  true
3161     Convert the compare against the raw return value.  */
3162
3163  cmp_code = *pcode;
3164  switch (cmp_code)
3165    {
3166    case UNORDERED:
3167      cmp_code = EQ;
3168      res_code = LT;
3169      break;
3170    case ORDERED:
3171      cmp_code = EQ;
3172      res_code = GE;
3173      break;
3174    case NE:
3175      res_code = NE;
3176      break;
3177    case EQ:
3178    case LT:
3179    case GT:
3180    case LE:
3181    case GE:
3182      res_code = GT;
3183      break;
3184    default:
3185      gcc_unreachable ();
3186    }
3187  *pcode = res_code;
3188
3189  func = alpha_lookup_xfloating_lib_func (cmp_code);
3190
3191  operands[0] = op0;
3192  operands[1] = op1;
3193  out = gen_reg_rtx (DImode);
3194
3195  /* What's actually returned is -1,0,1, not a proper boolean value.  */
3196  note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3197  note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3198  alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3199
3200  return out;
3201}
3202
3203/* Emit an X_floating library function call for a conversion.  */
3204
3205void
3206alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3207{
3208  int noperands = 1, mode;
3209  rtx out_operands[2];
3210  rtx func;
3211  enum rtx_code code = orig_code;
3212
3213  if (code == UNSIGNED_FIX)
3214    code = FIX;
3215
3216  func = alpha_lookup_xfloating_lib_func (code);
3217
3218  out_operands[0] = operands[1];
3219
3220  switch (code)
3221    {
3222    case FIX:
3223      mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3224      out_operands[1] = GEN_INT (mode);
3225      noperands = 2;
3226      break;
3227    case FLOAT_TRUNCATE:
3228      mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3229      out_operands[1] = GEN_INT (mode);
3230      noperands = 2;
3231      break;
3232    default:
3233      break;
3234    }
3235
3236  alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3237				gen_rtx_fmt_e (orig_code,
3238					       GET_MODE (operands[0]),
3239					       operands[1]));
3240}
3241
3242/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3243   DImode moves from OP[2,3] to OP[0,1].  If FIXUP_OVERLAP is true,
3244   guarantee that the sequence
3245     set (OP[0] OP[2])
3246     set (OP[1] OP[3])
3247   is valid.  Naturally, output operand ordering is little-endian.
3248   This is used by *movtf_internal and *movti_internal.  */
3249
3250void
3251alpha_split_tmode_pair (rtx operands[4], machine_mode mode,
3252			bool fixup_overlap)
3253{
3254  switch (GET_CODE (operands[1]))
3255    {
3256    case REG:
3257      operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3258      operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3259      break;
3260
3261    case MEM:
3262      operands[3] = adjust_address (operands[1], DImode, 8);
3263      operands[2] = adjust_address (operands[1], DImode, 0);
3264      break;
3265
3266    CASE_CONST_SCALAR_INT:
3267    case CONST_DOUBLE:
3268      gcc_assert (operands[1] == CONST0_RTX (mode));
3269      operands[2] = operands[3] = const0_rtx;
3270      break;
3271
3272    default:
3273      gcc_unreachable ();
3274    }
3275
3276  switch (GET_CODE (operands[0]))
3277    {
3278    case REG:
3279      operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3280      operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3281      break;
3282
3283    case MEM:
3284      operands[1] = adjust_address (operands[0], DImode, 8);
3285      operands[0] = adjust_address (operands[0], DImode, 0);
3286      break;
3287
3288    default:
3289      gcc_unreachable ();
3290    }
3291
3292  if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3293    {
3294      std::swap (operands[0], operands[1]);
3295      std::swap (operands[2], operands[3]);
3296    }
3297}
3298
3299/* Implement negtf2 or abstf2.  Op0 is destination, op1 is source,
3300   op2 is a register containing the sign bit, operation is the
3301   logical operation to be performed.  */
3302
3303void
3304alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3305{
3306  rtx high_bit = operands[2];
3307  rtx scratch;
3308  int move;
3309
3310  alpha_split_tmode_pair (operands, TFmode, false);
3311
3312  /* Detect three flavors of operand overlap.  */
3313  move = 1;
3314  if (rtx_equal_p (operands[0], operands[2]))
3315    move = 0;
3316  else if (rtx_equal_p (operands[1], operands[2]))
3317    {
3318      if (rtx_equal_p (operands[0], high_bit))
3319	move = 2;
3320      else
3321	move = -1;
3322    }
3323
3324  if (move < 0)
3325    emit_move_insn (operands[0], operands[2]);
3326
3327  /* ??? If the destination overlaps both source tf and high_bit, then
3328     assume source tf is dead in its entirety and use the other half
3329     for a scratch register.  Otherwise "scratch" is just the proper
3330     destination register.  */
3331  scratch = operands[move < 2 ? 1 : 3];
3332
3333  emit_insn ((*operation) (scratch, high_bit, operands[3]));
3334
3335  if (move > 0)
3336    {
3337      emit_move_insn (operands[0], operands[2]);
3338      if (move > 1)
3339	emit_move_insn (operands[1], scratch);
3340    }
3341}
3342
3343/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3344   unaligned data:
3345
3346           unsigned:                       signed:
3347   word:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
3348           ldq_u  r2,X+1(r11)              ldq_u  r2,X+1(r11)
3349           lda    r3,X(r11)                lda    r3,X+2(r11)
3350           extwl  r1,r3,r1                 extql  r1,r3,r1
3351           extwh  r2,r3,r2                 extqh  r2,r3,r2
3352           or     r1.r2.r1                 or     r1,r2,r1
3353                                           sra    r1,48,r1
3354
3355   long:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
3356           ldq_u  r2,X+3(r11)              ldq_u  r2,X+3(r11)
3357           lda    r3,X(r11)                lda    r3,X(r11)
3358           extll  r1,r3,r1                 extll  r1,r3,r1
3359           extlh  r2,r3,r2                 extlh  r2,r3,r2
3360           or     r1.r2.r1                 addl   r1,r2,r1
3361
3362   quad:   ldq_u  r1,X(r11)
3363           ldq_u  r2,X+7(r11)
3364           lda    r3,X(r11)
3365           extql  r1,r3,r1
3366           extqh  r2,r3,r2
3367           or     r1.r2.r1
3368*/
3369
3370void
3371alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3372			     HOST_WIDE_INT ofs, int sign)
3373{
3374  rtx meml, memh, addr, extl, exth, tmp, mema;
3375  machine_mode mode;
3376
3377  if (TARGET_BWX && size == 2)
3378    {
3379      meml = adjust_address (mem, QImode, ofs);
3380      memh = adjust_address (mem, QImode, ofs+1);
3381      extl = gen_reg_rtx (DImode);
3382      exth = gen_reg_rtx (DImode);
3383      emit_insn (gen_zero_extendqidi2 (extl, meml));
3384      emit_insn (gen_zero_extendqidi2 (exth, memh));
3385      exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3386				  NULL, 1, OPTAB_LIB_WIDEN);
3387      addr = expand_simple_binop (DImode, IOR, extl, exth,
3388				  NULL, 1, OPTAB_LIB_WIDEN);
3389
3390      if (sign && GET_MODE (tgt) != HImode)
3391	{
3392	  addr = gen_lowpart (HImode, addr);
3393	  emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3394	}
3395      else
3396	{
3397	  if (GET_MODE (tgt) != DImode)
3398	    addr = gen_lowpart (GET_MODE (tgt), addr);
3399	  emit_move_insn (tgt, addr);
3400	}
3401      return;
3402    }
3403
3404  meml = gen_reg_rtx (DImode);
3405  memh = gen_reg_rtx (DImode);
3406  addr = gen_reg_rtx (DImode);
3407  extl = gen_reg_rtx (DImode);
3408  exth = gen_reg_rtx (DImode);
3409
3410  mema = XEXP (mem, 0);
3411  if (GET_CODE (mema) == LO_SUM)
3412    mema = force_reg (Pmode, mema);
3413
3414  /* AND addresses cannot be in any alias set, since they may implicitly
3415     alias surrounding code.  Ideally we'd have some alias set that
3416     covered all types except those with alignment 8 or higher.  */
3417
3418  tmp = change_address (mem, DImode,
3419			gen_rtx_AND (DImode,
3420				     plus_constant (DImode, mema, ofs),
3421				     GEN_INT (-8)));
3422  set_mem_alias_set (tmp, 0);
3423  emit_move_insn (meml, tmp);
3424
3425  tmp = change_address (mem, DImode,
3426			gen_rtx_AND (DImode,
3427				     plus_constant (DImode, mema,
3428						    ofs + size - 1),
3429				     GEN_INT (-8)));
3430  set_mem_alias_set (tmp, 0);
3431  emit_move_insn (memh, tmp);
3432
3433  if (sign && size == 2)
3434    {
3435      emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3436
3437      emit_insn (gen_extql (extl, meml, addr));
3438      emit_insn (gen_extqh (exth, memh, addr));
3439
3440      /* We must use tgt here for the target.  Alpha-vms port fails if we use
3441	 addr for the target, because addr is marked as a pointer and combine
3442	 knows that pointers are always sign-extended 32-bit values.  */
3443      addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3444      addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3445			   addr, 1, OPTAB_WIDEN);
3446    }
3447  else
3448    {
3449      emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3450      emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3451      switch ((int) size)
3452	{
3453	case 2:
3454	  emit_insn (gen_extwh (exth, memh, addr));
3455	  mode = HImode;
3456	  break;
3457	case 4:
3458	  emit_insn (gen_extlh (exth, memh, addr));
3459	  mode = SImode;
3460	  break;
3461	case 8:
3462	  emit_insn (gen_extqh (exth, memh, addr));
3463	  mode = DImode;
3464	  break;
3465	default:
3466	  gcc_unreachable ();
3467	}
3468
3469      addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3470			   gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3471			   sign, OPTAB_WIDEN);
3472    }
3473
3474  if (addr != tgt)
3475    emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3476}
3477
3478/* Similarly, use ins and msk instructions to perform unaligned stores.  */
3479
3480void
3481alpha_expand_unaligned_store (rtx dst, rtx src,
3482			      HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3483{
3484  rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3485
3486  if (TARGET_BWX && size == 2)
3487    {
3488      if (src != const0_rtx)
3489	{
3490	  dstl = gen_lowpart (QImode, src);
3491	  dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3492				      NULL, 1, OPTAB_LIB_WIDEN);
3493	  dsth = gen_lowpart (QImode, dsth);
3494	}
3495      else
3496	dstl = dsth = const0_rtx;
3497
3498      meml = adjust_address (dst, QImode, ofs);
3499      memh = adjust_address (dst, QImode, ofs+1);
3500
3501      emit_move_insn (meml, dstl);
3502      emit_move_insn (memh, dsth);
3503      return;
3504    }
3505
3506  dstl = gen_reg_rtx (DImode);
3507  dsth = gen_reg_rtx (DImode);
3508  insl = gen_reg_rtx (DImode);
3509  insh = gen_reg_rtx (DImode);
3510
3511  dsta = XEXP (dst, 0);
3512  if (GET_CODE (dsta) == LO_SUM)
3513    dsta = force_reg (Pmode, dsta);
3514
3515  /* AND addresses cannot be in any alias set, since they may implicitly
3516     alias surrounding code.  Ideally we'd have some alias set that
3517     covered all types except those with alignment 8 or higher.  */
3518
3519  meml = change_address (dst, DImode,
3520			 gen_rtx_AND (DImode,
3521				      plus_constant (DImode, dsta, ofs),
3522				      GEN_INT (-8)));
3523  set_mem_alias_set (meml, 0);
3524
3525  memh = change_address (dst, DImode,
3526			 gen_rtx_AND (DImode,
3527				      plus_constant (DImode, dsta,
3528						     ofs + size - 1),
3529				      GEN_INT (-8)));
3530  set_mem_alias_set (memh, 0);
3531
3532  emit_move_insn (dsth, memh);
3533  emit_move_insn (dstl, meml);
3534
3535  addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3536
3537  if (src != CONST0_RTX (GET_MODE (src)))
3538    {
3539      emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3540			    GEN_INT (size*8), addr));
3541
3542      switch ((int) size)
3543	{
3544	case 2:
3545	  emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3546	  break;
3547	case 4:
3548	  emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3549	  break;
3550	case 8:
3551	  emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3552	  break;
3553	default:
3554	  gcc_unreachable ();
3555	}
3556    }
3557
3558  emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3559
3560  switch ((int) size)
3561    {
3562    case 2:
3563      emit_insn (gen_mskwl (dstl, dstl, addr));
3564      break;
3565    case 4:
3566      emit_insn (gen_mskll (dstl, dstl, addr));
3567      break;
3568    case 8:
3569      emit_insn (gen_mskql (dstl, dstl, addr));
3570      break;
3571    default:
3572      gcc_unreachable ();
3573    }
3574
3575  if (src != CONST0_RTX (GET_MODE (src)))
3576    {
3577      dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3578      dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3579    }
3580
3581  /* Must store high before low for degenerate case of aligned.  */
3582  emit_move_insn (memh, dsth);
3583  emit_move_insn (meml, dstl);
3584}
3585
3586/* The block move code tries to maximize speed by separating loads and
3587   stores at the expense of register pressure: we load all of the data
3588   before we store it back out.  There are two secondary effects worth
3589   mentioning, that this speeds copying to/from aligned and unaligned
3590   buffers, and that it makes the code significantly easier to write.  */
3591
3592#define MAX_MOVE_WORDS	8
3593
3594/* Load an integral number of consecutive unaligned quadwords.  */
3595
3596static void
3597alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3598				   HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3599{
3600  rtx const im8 = GEN_INT (-8);
3601  rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3602  rtx sreg, areg, tmp, smema;
3603  HOST_WIDE_INT i;
3604
3605  smema = XEXP (smem, 0);
3606  if (GET_CODE (smema) == LO_SUM)
3607    smema = force_reg (Pmode, smema);
3608
3609  /* Generate all the tmp registers we need.  */
3610  for (i = 0; i < words; ++i)
3611    {
3612      data_regs[i] = out_regs[i];
3613      ext_tmps[i] = gen_reg_rtx (DImode);
3614    }
3615  data_regs[words] = gen_reg_rtx (DImode);
3616
3617  if (ofs != 0)
3618    smem = adjust_address (smem, GET_MODE (smem), ofs);
3619
3620  /* Load up all of the source data.  */
3621  for (i = 0; i < words; ++i)
3622    {
3623      tmp = change_address (smem, DImode,
3624			    gen_rtx_AND (DImode,
3625					 plus_constant (DImode, smema, 8*i),
3626					 im8));
3627      set_mem_alias_set (tmp, 0);
3628      emit_move_insn (data_regs[i], tmp);
3629    }
3630
3631  tmp = change_address (smem, DImode,
3632			gen_rtx_AND (DImode,
3633				     plus_constant (DImode, smema,
3634						    8*words - 1),
3635				     im8));
3636  set_mem_alias_set (tmp, 0);
3637  emit_move_insn (data_regs[words], tmp);
3638
3639  /* Extract the half-word fragments.  Unfortunately DEC decided to make
3640     extxh with offset zero a noop instead of zeroing the register, so
3641     we must take care of that edge condition ourselves with cmov.  */
3642
3643  sreg = copy_addr_to_reg (smema);
3644  areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3645		       1, OPTAB_WIDEN);
3646  for (i = 0; i < words; ++i)
3647    {
3648      emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3649      emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3650      emit_insn (gen_rtx_SET (ext_tmps[i],
3651			      gen_rtx_IF_THEN_ELSE (DImode,
3652						    gen_rtx_EQ (DImode, areg,
3653								const0_rtx),
3654						    const0_rtx, ext_tmps[i])));
3655    }
3656
3657  /* Merge the half-words into whole words.  */
3658  for (i = 0; i < words; ++i)
3659    {
3660      out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3661				  ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3662    }
3663}
3664
3665/* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
3666   may be NULL to store zeros.  */
3667
3668static void
3669alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3670				    HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3671{
3672  rtx const im8 = GEN_INT (-8);
3673  rtx ins_tmps[MAX_MOVE_WORDS];
3674  rtx st_tmp_1, st_tmp_2, dreg;
3675  rtx st_addr_1, st_addr_2, dmema;
3676  HOST_WIDE_INT i;
3677
3678  dmema = XEXP (dmem, 0);
3679  if (GET_CODE (dmema) == LO_SUM)
3680    dmema = force_reg (Pmode, dmema);
3681
3682  /* Generate all the tmp registers we need.  */
3683  if (data_regs != NULL)
3684    for (i = 0; i < words; ++i)
3685      ins_tmps[i] = gen_reg_rtx(DImode);
3686  st_tmp_1 = gen_reg_rtx(DImode);
3687  st_tmp_2 = gen_reg_rtx(DImode);
3688
3689  if (ofs != 0)
3690    dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3691
3692  st_addr_2 = change_address (dmem, DImode,
3693			      gen_rtx_AND (DImode,
3694					   plus_constant (DImode, dmema,
3695							  words*8 - 1),
3696					   im8));
3697  set_mem_alias_set (st_addr_2, 0);
3698
3699  st_addr_1 = change_address (dmem, DImode,
3700			      gen_rtx_AND (DImode, dmema, im8));
3701  set_mem_alias_set (st_addr_1, 0);
3702
3703  /* Load up the destination end bits.  */
3704  emit_move_insn (st_tmp_2, st_addr_2);
3705  emit_move_insn (st_tmp_1, st_addr_1);
3706
3707  /* Shift the input data into place.  */
3708  dreg = copy_addr_to_reg (dmema);
3709  if (data_regs != NULL)
3710    {
3711      for (i = words-1; i >= 0; --i)
3712	{
3713	  emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3714	  emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3715	}
3716      for (i = words-1; i > 0; --i)
3717	{
3718	  ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3719					ins_tmps[i-1], ins_tmps[i-1], 1,
3720					OPTAB_WIDEN);
3721	}
3722    }
3723
3724  /* Split and merge the ends with the destination data.  */
3725  emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3726  emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3727
3728  if (data_regs != NULL)
3729    {
3730      st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3731			       st_tmp_2, 1, OPTAB_WIDEN);
3732      st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3733			       st_tmp_1, 1, OPTAB_WIDEN);
3734    }
3735
3736  /* Store it all.  */
3737  emit_move_insn (st_addr_2, st_tmp_2);
3738  for (i = words-1; i > 0; --i)
3739    {
3740      rtx tmp = change_address (dmem, DImode,
3741				gen_rtx_AND (DImode,
3742					     plus_constant (DImode,
3743							    dmema, i*8),
3744					     im8));
3745      set_mem_alias_set (tmp, 0);
3746      emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3747    }
3748  emit_move_insn (st_addr_1, st_tmp_1);
3749}
3750
3751
3752/* Expand string/block move operations.
3753
3754   operands[0] is the pointer to the destination.
3755   operands[1] is the pointer to the source.
3756   operands[2] is the number of bytes to move.
3757   operands[3] is the alignment.  */
3758
3759int
3760alpha_expand_block_move (rtx operands[])
3761{
3762  rtx bytes_rtx	= operands[2];
3763  rtx align_rtx = operands[3];
3764  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3765  HOST_WIDE_INT bytes = orig_bytes;
3766  HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3767  HOST_WIDE_INT dst_align = src_align;
3768  rtx orig_src = operands[1];
3769  rtx orig_dst = operands[0];
3770  rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3771  rtx tmp;
3772  unsigned int i, words, ofs, nregs = 0;
3773
3774  if (orig_bytes <= 0)
3775    return 1;
3776  else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3777    return 0;
3778
3779  /* Look for additional alignment information from recorded register info.  */
3780
3781  tmp = XEXP (orig_src, 0);
3782  if (REG_P (tmp))
3783    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3784  else if (GET_CODE (tmp) == PLUS
3785	   && REG_P (XEXP (tmp, 0))
3786	   && CONST_INT_P (XEXP (tmp, 1)))
3787    {
3788      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3789      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3790
3791      if (a > src_align)
3792	{
3793          if (a >= 64 && c % 8 == 0)
3794	    src_align = 64;
3795          else if (a >= 32 && c % 4 == 0)
3796	    src_align = 32;
3797          else if (a >= 16 && c % 2 == 0)
3798	    src_align = 16;
3799	}
3800    }
3801
3802  tmp = XEXP (orig_dst, 0);
3803  if (REG_P (tmp))
3804    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3805  else if (GET_CODE (tmp) == PLUS
3806	   && REG_P (XEXP (tmp, 0))
3807	   && CONST_INT_P (XEXP (tmp, 1)))
3808    {
3809      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3810      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3811
3812      if (a > dst_align)
3813	{
3814          if (a >= 64 && c % 8 == 0)
3815	    dst_align = 64;
3816          else if (a >= 32 && c % 4 == 0)
3817	    dst_align = 32;
3818          else if (a >= 16 && c % 2 == 0)
3819	    dst_align = 16;
3820	}
3821    }
3822
3823  ofs = 0;
3824  if (src_align >= 64 && bytes >= 8)
3825    {
3826      words = bytes / 8;
3827
3828      for (i = 0; i < words; ++i)
3829	data_regs[nregs + i] = gen_reg_rtx (DImode);
3830
3831      for (i = 0; i < words; ++i)
3832	emit_move_insn (data_regs[nregs + i],
3833			adjust_address (orig_src, DImode, ofs + i * 8));
3834
3835      nregs += words;
3836      bytes -= words * 8;
3837      ofs += words * 8;
3838    }
3839
3840  if (src_align >= 32 && bytes >= 4)
3841    {
3842      words = bytes / 4;
3843
3844      for (i = 0; i < words; ++i)
3845	data_regs[nregs + i] = gen_reg_rtx (SImode);
3846
3847      for (i = 0; i < words; ++i)
3848	emit_move_insn (data_regs[nregs + i],
3849			adjust_address (orig_src, SImode, ofs + i * 4));
3850
3851      nregs += words;
3852      bytes -= words * 4;
3853      ofs += words * 4;
3854    }
3855
3856  if (bytes >= 8)
3857    {
3858      words = bytes / 8;
3859
3860      for (i = 0; i < words+1; ++i)
3861	data_regs[nregs + i] = gen_reg_rtx (DImode);
3862
3863      alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3864					 words, ofs);
3865
3866      nregs += words;
3867      bytes -= words * 8;
3868      ofs += words * 8;
3869    }
3870
3871  if (! TARGET_BWX && bytes >= 4)
3872    {
3873      data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3874      alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3875      bytes -= 4;
3876      ofs += 4;
3877    }
3878
3879  if (bytes >= 2)
3880    {
3881      if (src_align >= 16)
3882	{
3883	  do {
3884	    data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3885	    emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3886	    bytes -= 2;
3887	    ofs += 2;
3888	  } while (bytes >= 2);
3889	}
3890      else if (! TARGET_BWX)
3891	{
3892	  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3893	  alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3894	  bytes -= 2;
3895	  ofs += 2;
3896	}
3897    }
3898
3899  while (bytes > 0)
3900    {
3901      data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3902      emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3903      bytes -= 1;
3904      ofs += 1;
3905    }
3906
3907  gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3908
3909  /* Now save it back out again.  */
3910
3911  i = 0, ofs = 0;
3912
3913  /* Write out the data in whatever chunks reading the source allowed.  */
3914  if (dst_align >= 64)
3915    {
3916      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3917	{
3918	  emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3919			  data_regs[i]);
3920	  ofs += 8;
3921	  i++;
3922	}
3923    }
3924
3925  if (dst_align >= 32)
3926    {
3927      /* If the source has remaining DImode regs, write them out in
3928	 two pieces.  */
3929      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3930	{
3931	  tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3932			      NULL_RTX, 1, OPTAB_WIDEN);
3933
3934	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3935			  gen_lowpart (SImode, data_regs[i]));
3936	  emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3937			  gen_lowpart (SImode, tmp));
3938	  ofs += 8;
3939	  i++;
3940	}
3941
3942      while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3943	{
3944	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3945			  data_regs[i]);
3946	  ofs += 4;
3947	  i++;
3948	}
3949    }
3950
3951  if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3952    {
3953      /* Write out a remaining block of words using unaligned methods.  */
3954
3955      for (words = 1; i + words < nregs; words++)
3956	if (GET_MODE (data_regs[i + words]) != DImode)
3957	  break;
3958
3959      if (words == 1)
3960	alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3961      else
3962        alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3963					    words, ofs);
3964
3965      i += words;
3966      ofs += words * 8;
3967    }
3968
3969  /* Due to the above, this won't be aligned.  */
3970  /* ??? If we have more than one of these, consider constructing full
3971     words in registers and using alpha_expand_unaligned_store_words.  */
3972  while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3973    {
3974      alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3975      ofs += 4;
3976      i++;
3977    }
3978
3979  if (dst_align >= 16)
3980    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3981      {
3982	emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
3983	i++;
3984	ofs += 2;
3985      }
3986  else
3987    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3988      {
3989	alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
3990	i++;
3991	ofs += 2;
3992      }
3993
3994  /* The remainder must be byte copies.  */
3995  while (i < nregs)
3996    {
3997      gcc_assert (GET_MODE (data_regs[i]) == QImode);
3998      emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
3999      i++;
4000      ofs += 1;
4001    }
4002
4003  return 1;
4004}
4005
4006int
4007alpha_expand_block_clear (rtx operands[])
4008{
4009  rtx bytes_rtx	= operands[1];
4010  rtx align_rtx = operands[3];
4011  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4012  HOST_WIDE_INT bytes = orig_bytes;
4013  HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4014  HOST_WIDE_INT alignofs = 0;
4015  rtx orig_dst = operands[0];
4016  rtx tmp;
4017  int i, words, ofs = 0;
4018
4019  if (orig_bytes <= 0)
4020    return 1;
4021  if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4022    return 0;
4023
4024  /* Look for stricter alignment.  */
4025  tmp = XEXP (orig_dst, 0);
4026  if (REG_P (tmp))
4027    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4028  else if (GET_CODE (tmp) == PLUS
4029	   && REG_P (XEXP (tmp, 0))
4030	   && CONST_INT_P (XEXP (tmp, 1)))
4031    {
4032      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4033      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4034
4035      if (a > align)
4036	{
4037          if (a >= 64)
4038	    align = a, alignofs = 8 - c % 8;
4039          else if (a >= 32)
4040	    align = a, alignofs = 4 - c % 4;
4041          else if (a >= 16)
4042	    align = a, alignofs = 2 - c % 2;
4043	}
4044    }
4045
4046  /* Handle an unaligned prefix first.  */
4047
4048  if (alignofs > 0)
4049    {
4050      /* Given that alignofs is bounded by align, the only time BWX could
4051	 generate three stores is for a 7 byte fill.  Prefer two individual
4052	 stores over a load/mask/store sequence.  */
4053      if ((!TARGET_BWX || alignofs == 7)
4054	       && align >= 32
4055	       && !(alignofs == 4 && bytes >= 4))
4056	{
4057	  machine_mode mode = (align >= 64 ? DImode : SImode);
4058	  int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4059	  rtx mem, tmp;
4060	  HOST_WIDE_INT mask;
4061
4062	  mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4063	  set_mem_alias_set (mem, 0);
4064
4065	  mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8));
4066	  if (bytes < alignofs)
4067	    {
4068	      mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8);
4069	      ofs += bytes;
4070	      bytes = 0;
4071	    }
4072	  else
4073	    {
4074	      bytes -= alignofs;
4075	      ofs += alignofs;
4076	    }
4077	  alignofs = 0;
4078
4079	  tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4080			      NULL_RTX, 1, OPTAB_WIDEN);
4081
4082	  emit_move_insn (mem, tmp);
4083	}
4084
4085      if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4086	{
4087	  emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4088	  bytes -= 1;
4089	  ofs += 1;
4090	  alignofs -= 1;
4091	}
4092      if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4093	{
4094	  emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4095	  bytes -= 2;
4096	  ofs += 2;
4097	  alignofs -= 2;
4098	}
4099      if (alignofs == 4 && bytes >= 4)
4100	{
4101	  emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4102	  bytes -= 4;
4103	  ofs += 4;
4104	  alignofs = 0;
4105	}
4106
4107      /* If we've not used the extra lead alignment information by now,
4108	 we won't be able to.  Downgrade align to match what's left over.  */
4109      if (alignofs > 0)
4110	{
4111	  alignofs = alignofs & -alignofs;
4112	  align = MIN (align, alignofs * BITS_PER_UNIT);
4113	}
4114    }
4115
4116  /* Handle a block of contiguous long-words.  */
4117
4118  if (align >= 64 && bytes >= 8)
4119    {
4120      words = bytes / 8;
4121
4122      for (i = 0; i < words; ++i)
4123	emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4124			const0_rtx);
4125
4126      bytes -= words * 8;
4127      ofs += words * 8;
4128    }
4129
4130  /* If the block is large and appropriately aligned, emit a single
4131     store followed by a sequence of stq_u insns.  */
4132
4133  if (align >= 32 && bytes > 16)
4134    {
4135      rtx orig_dsta;
4136
4137      emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4138      bytes -= 4;
4139      ofs += 4;
4140
4141      orig_dsta = XEXP (orig_dst, 0);
4142      if (GET_CODE (orig_dsta) == LO_SUM)
4143	orig_dsta = force_reg (Pmode, orig_dsta);
4144
4145      words = bytes / 8;
4146      for (i = 0; i < words; ++i)
4147	{
4148	  rtx mem
4149	    = change_address (orig_dst, DImode,
4150			      gen_rtx_AND (DImode,
4151					   plus_constant (DImode, orig_dsta,
4152							  ofs + i*8),
4153					   GEN_INT (-8)));
4154	  set_mem_alias_set (mem, 0);
4155	  emit_move_insn (mem, const0_rtx);
4156	}
4157
4158      /* Depending on the alignment, the first stq_u may have overlapped
4159	 with the initial stl, which means that the last stq_u didn't
4160	 write as much as it would appear.  Leave those questionable bytes
4161	 unaccounted for.  */
4162      bytes -= words * 8 - 4;
4163      ofs += words * 8 - 4;
4164    }
4165
4166  /* Handle a smaller block of aligned words.  */
4167
4168  if ((align >= 64 && bytes == 4)
4169      || (align == 32 && bytes >= 4))
4170    {
4171      words = bytes / 4;
4172
4173      for (i = 0; i < words; ++i)
4174	emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4175			const0_rtx);
4176
4177      bytes -= words * 4;
4178      ofs += words * 4;
4179    }
4180
4181  /* An unaligned block uses stq_u stores for as many as possible.  */
4182
4183  if (bytes >= 8)
4184    {
4185      words = bytes / 8;
4186
4187      alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4188
4189      bytes -= words * 8;
4190      ofs += words * 8;
4191    }
4192
4193  /* Next clean up any trailing pieces.  */
4194
4195  /* Count the number of bits in BYTES for which aligned stores could
4196     be emitted.  */
4197  words = 0;
4198  for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4199    if (bytes & i)
4200      words += 1;
4201
4202  /* If we have appropriate alignment (and it wouldn't take too many
4203     instructions otherwise), mask out the bytes we need.  */
4204  if (TARGET_BWX ? words > 2 : bytes > 0)
4205    {
4206      if (align >= 64)
4207	{
4208	  rtx mem, tmp;
4209	  HOST_WIDE_INT mask;
4210
4211	  mem = adjust_address (orig_dst, DImode, ofs);
4212	  set_mem_alias_set (mem, 0);
4213
4214	  mask = HOST_WIDE_INT_M1U << (bytes * 8);
4215
4216	  tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4217			      NULL_RTX, 1, OPTAB_WIDEN);
4218
4219	  emit_move_insn (mem, tmp);
4220	  return 1;
4221	}
4222      else if (align >= 32 && bytes < 4)
4223	{
4224	  rtx mem, tmp;
4225	  HOST_WIDE_INT mask;
4226
4227	  mem = adjust_address (orig_dst, SImode, ofs);
4228	  set_mem_alias_set (mem, 0);
4229
4230	  mask = HOST_WIDE_INT_M1U << (bytes * 8);
4231
4232	  tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4233			      NULL_RTX, 1, OPTAB_WIDEN);
4234
4235	  emit_move_insn (mem, tmp);
4236	  return 1;
4237	}
4238    }
4239
4240  if (!TARGET_BWX && bytes >= 4)
4241    {
4242      alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4243      bytes -= 4;
4244      ofs += 4;
4245    }
4246
4247  if (bytes >= 2)
4248    {
4249      if (align >= 16)
4250	{
4251	  do {
4252	    emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4253			    const0_rtx);
4254	    bytes -= 2;
4255	    ofs += 2;
4256	  } while (bytes >= 2);
4257	}
4258      else if (! TARGET_BWX)
4259	{
4260	  alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4261	  bytes -= 2;
4262	  ofs += 2;
4263	}
4264    }
4265
4266  while (bytes > 0)
4267    {
4268      emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4269      bytes -= 1;
4270      ofs += 1;
4271    }
4272
4273  return 1;
4274}
4275
4276/* Returns a mask so that zap(x, value) == x & mask.  */
4277
4278rtx
4279alpha_expand_zap_mask (HOST_WIDE_INT value)
4280{
4281  rtx result;
4282  int i;
4283  HOST_WIDE_INT mask = 0;
4284
4285  for (i = 7; i >= 0; --i)
4286    {
4287      mask <<= 8;
4288      if (!((value >> i) & 1))
4289	mask |= 0xff;
4290    }
4291
4292  result = gen_int_mode (mask, DImode);
4293  return result;
4294}
4295
4296void
4297alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4298				   machine_mode mode,
4299				   rtx op0, rtx op1, rtx op2)
4300{
4301  op0 = gen_lowpart (mode, op0);
4302
4303  if (op1 == const0_rtx)
4304    op1 = CONST0_RTX (mode);
4305  else
4306    op1 = gen_lowpart (mode, op1);
4307
4308  if (op2 == const0_rtx)
4309    op2 = CONST0_RTX (mode);
4310  else
4311    op2 = gen_lowpart (mode, op2);
4312
4313  emit_insn ((*gen) (op0, op1, op2));
4314}
4315
4316/* A subroutine of the atomic operation splitters.  Jump to LABEL if
4317   COND is true.  Mark the jump as unlikely to be taken.  */
4318
4319static void
4320emit_unlikely_jump (rtx cond, rtx label)
4321{
4322  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
4323  rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4324  rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
4325  add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
4326}
4327
4328/* A subroutine of the atomic operation splitters.  Emit a load-locked
4329   instruction in MODE.  */
4330
4331static void
4332emit_load_locked (machine_mode mode, rtx reg, rtx mem)
4333{
4334  rtx (*fn) (rtx, rtx) = NULL;
4335  if (mode == SImode)
4336    fn = gen_load_locked_si;
4337  else if (mode == DImode)
4338    fn = gen_load_locked_di;
4339  emit_insn (fn (reg, mem));
4340}
4341
4342/* A subroutine of the atomic operation splitters.  Emit a store-conditional
4343   instruction in MODE.  */
4344
4345static void
4346emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
4347{
4348  rtx (*fn) (rtx, rtx, rtx) = NULL;
4349  if (mode == SImode)
4350    fn = gen_store_conditional_si;
4351  else if (mode == DImode)
4352    fn = gen_store_conditional_di;
4353  emit_insn (fn (res, mem, val));
4354}
4355
4356/* Subroutines of the atomic operation splitters.  Emit barriers
4357   as needed for the memory MODEL.  */
4358
4359static void
4360alpha_pre_atomic_barrier (enum memmodel model)
4361{
4362  if (need_atomic_barrier_p (model, true))
4363    emit_insn (gen_memory_barrier ());
4364}
4365
4366static void
4367alpha_post_atomic_barrier (enum memmodel model)
4368{
4369  if (need_atomic_barrier_p (model, false))
4370    emit_insn (gen_memory_barrier ());
4371}
4372
4373/* A subroutine of the atomic operation splitters.  Emit an insxl
4374   instruction in MODE.  */
4375
4376static rtx
4377emit_insxl (machine_mode mode, rtx op1, rtx op2)
4378{
4379  rtx ret = gen_reg_rtx (DImode);
4380  rtx (*fn) (rtx, rtx, rtx);
4381
4382  switch (mode)
4383    {
4384    case QImode:
4385      fn = gen_insbl;
4386      break;
4387    case HImode:
4388      fn = gen_inswl;
4389      break;
4390    case SImode:
4391      fn = gen_insll;
4392      break;
4393    case DImode:
4394      fn = gen_insql;
4395      break;
4396    default:
4397      gcc_unreachable ();
4398    }
4399
4400  op1 = force_reg (mode, op1);
4401  emit_insn (fn (ret, op1, op2));
4402
4403  return ret;
4404}
4405
4406/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
4407   to perform.  MEM is the memory on which to operate.  VAL is the second
4408   operand of the binary operator.  BEFORE and AFTER are optional locations to
4409   return the value of MEM either before of after the operation.  SCRATCH is
4410   a scratch register.  */
4411
4412void
4413alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4414		       rtx after, rtx scratch, enum memmodel model)
4415{
4416  machine_mode mode = GET_MODE (mem);
4417  rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4418
4419  alpha_pre_atomic_barrier (model);
4420
4421  label = gen_label_rtx ();
4422  emit_label (label);
4423  label = gen_rtx_LABEL_REF (DImode, label);
4424
4425  if (before == NULL)
4426    before = scratch;
4427  emit_load_locked (mode, before, mem);
4428
4429  if (code == NOT)
4430    {
4431      x = gen_rtx_AND (mode, before, val);
4432      emit_insn (gen_rtx_SET (val, x));
4433
4434      x = gen_rtx_NOT (mode, val);
4435    }
4436  else
4437    x = gen_rtx_fmt_ee (code, mode, before, val);
4438  if (after)
4439    emit_insn (gen_rtx_SET (after, copy_rtx (x)));
4440  emit_insn (gen_rtx_SET (scratch, x));
4441
4442  emit_store_conditional (mode, cond, mem, scratch);
4443
4444  x = gen_rtx_EQ (DImode, cond, const0_rtx);
4445  emit_unlikely_jump (x, label);
4446
4447  alpha_post_atomic_barrier (model);
4448}
4449
4450/* Expand a compare and swap operation.  */
4451
4452void
4453alpha_split_compare_and_swap (rtx operands[])
4454{
4455  rtx cond, retval, mem, oldval, newval;
4456  bool is_weak;
4457  enum memmodel mod_s, mod_f;
4458  machine_mode mode;
4459  rtx label1, label2, x;
4460
4461  cond = operands[0];
4462  retval = operands[1];
4463  mem = operands[2];
4464  oldval = operands[3];
4465  newval = operands[4];
4466  is_weak = (operands[5] != const0_rtx);
4467  mod_s = memmodel_from_int (INTVAL (operands[6]));
4468  mod_f = memmodel_from_int (INTVAL (operands[7]));
4469  mode = GET_MODE (mem);
4470
4471  alpha_pre_atomic_barrier (mod_s);
4472
4473  label1 = NULL_RTX;
4474  if (!is_weak)
4475    {
4476      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4477      emit_label (XEXP (label1, 0));
4478    }
4479  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4480
4481  emit_load_locked (mode, retval, mem);
4482
4483  x = gen_lowpart (DImode, retval);
4484  if (oldval == const0_rtx)
4485    {
4486      emit_move_insn (cond, const0_rtx);
4487      x = gen_rtx_NE (DImode, x, const0_rtx);
4488    }
4489  else
4490    {
4491      x = gen_rtx_EQ (DImode, x, oldval);
4492      emit_insn (gen_rtx_SET (cond, x));
4493      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4494    }
4495  emit_unlikely_jump (x, label2);
4496
4497  emit_move_insn (cond, newval);
4498  emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
4499
4500  if (!is_weak)
4501    {
4502      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4503      emit_unlikely_jump (x, label1);
4504    }
4505
4506  if (!is_mm_relaxed (mod_f))
4507    emit_label (XEXP (label2, 0));
4508
4509  alpha_post_atomic_barrier (mod_s);
4510
4511  if (is_mm_relaxed (mod_f))
4512    emit_label (XEXP (label2, 0));
4513}
4514
4515void
4516alpha_expand_compare_and_swap_12 (rtx operands[])
4517{
4518  rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4519  machine_mode mode;
4520  rtx addr, align, wdst;
4521  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
4522
4523  cond = operands[0];
4524  dst = operands[1];
4525  mem = operands[2];
4526  oldval = operands[3];
4527  newval = operands[4];
4528  is_weak = operands[5];
4529  mod_s = operands[6];
4530  mod_f = operands[7];
4531  mode = GET_MODE (mem);
4532
4533  /* We forced the address into a register via mem_noofs_operand.  */
4534  addr = XEXP (mem, 0);
4535  gcc_assert (register_operand (addr, DImode));
4536
4537  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4538			       NULL_RTX, 1, OPTAB_DIRECT);
4539
4540  oldval = convert_modes (DImode, mode, oldval, 1);
4541
4542  if (newval != const0_rtx)
4543    newval = emit_insxl (mode, newval, addr);
4544
4545  wdst = gen_reg_rtx (DImode);
4546  if (mode == QImode)
4547    gen = gen_atomic_compare_and_swapqi_1;
4548  else
4549    gen = gen_atomic_compare_and_swaphi_1;
4550  emit_insn (gen (cond, wdst, mem, oldval, newval, align,
4551		  is_weak, mod_s, mod_f));
4552
4553  emit_move_insn (dst, gen_lowpart (mode, wdst));
4554}
4555
4556void
4557alpha_split_compare_and_swap_12 (rtx operands[])
4558{
4559  rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4560  machine_mode mode;
4561  bool is_weak;
4562  enum memmodel mod_s, mod_f;
4563  rtx label1, label2, mem, addr, width, mask, x;
4564
4565  cond = operands[0];
4566  dest = operands[1];
4567  orig_mem = operands[2];
4568  oldval = operands[3];
4569  newval = operands[4];
4570  align = operands[5];
4571  is_weak = (operands[6] != const0_rtx);
4572  mod_s = memmodel_from_int (INTVAL (operands[7]));
4573  mod_f = memmodel_from_int (INTVAL (operands[8]));
4574  scratch = operands[9];
4575  mode = GET_MODE (orig_mem);
4576  addr = XEXP (orig_mem, 0);
4577
4578  mem = gen_rtx_MEM (DImode, align);
4579  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4580  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4581    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4582
4583  alpha_pre_atomic_barrier (mod_s);
4584
4585  label1 = NULL_RTX;
4586  if (!is_weak)
4587    {
4588      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4589      emit_label (XEXP (label1, 0));
4590    }
4591  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4592
4593  emit_load_locked (DImode, scratch, mem);
4594
4595  width = GEN_INT (GET_MODE_BITSIZE (mode));
4596  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4597  emit_insn (gen_extxl (dest, scratch, width, addr));
4598
4599  if (oldval == const0_rtx)
4600    {
4601      emit_move_insn (cond, const0_rtx);
4602      x = gen_rtx_NE (DImode, dest, const0_rtx);
4603    }
4604  else
4605    {
4606      x = gen_rtx_EQ (DImode, dest, oldval);
4607      emit_insn (gen_rtx_SET (cond, x));
4608      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4609    }
4610  emit_unlikely_jump (x, label2);
4611
4612  emit_insn (gen_mskxl (cond, scratch, mask, addr));
4613
4614  if (newval != const0_rtx)
4615    emit_insn (gen_iordi3 (cond, cond, newval));
4616
4617  emit_store_conditional (DImode, cond, mem, cond);
4618
4619  if (!is_weak)
4620    {
4621      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4622      emit_unlikely_jump (x, label1);
4623    }
4624
4625  if (!is_mm_relaxed (mod_f))
4626    emit_label (XEXP (label2, 0));
4627
4628  alpha_post_atomic_barrier (mod_s);
4629
4630  if (is_mm_relaxed (mod_f))
4631    emit_label (XEXP (label2, 0));
4632}
4633
4634/* Expand an atomic exchange operation.  */
4635
4636void
4637alpha_split_atomic_exchange (rtx operands[])
4638{
4639  rtx retval, mem, val, scratch;
4640  enum memmodel model;
4641  machine_mode mode;
4642  rtx label, x, cond;
4643
4644  retval = operands[0];
4645  mem = operands[1];
4646  val = operands[2];
4647  model = (enum memmodel) INTVAL (operands[3]);
4648  scratch = operands[4];
4649  mode = GET_MODE (mem);
4650  cond = gen_lowpart (DImode, scratch);
4651
4652  alpha_pre_atomic_barrier (model);
4653
4654  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4655  emit_label (XEXP (label, 0));
4656
4657  emit_load_locked (mode, retval, mem);
4658  emit_move_insn (scratch, val);
4659  emit_store_conditional (mode, cond, mem, scratch);
4660
4661  x = gen_rtx_EQ (DImode, cond, const0_rtx);
4662  emit_unlikely_jump (x, label);
4663
4664  alpha_post_atomic_barrier (model);
4665}
4666
4667void
4668alpha_expand_atomic_exchange_12 (rtx operands[])
4669{
4670  rtx dst, mem, val, model;
4671  machine_mode mode;
4672  rtx addr, align, wdst;
4673  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
4674
4675  dst = operands[0];
4676  mem = operands[1];
4677  val = operands[2];
4678  model = operands[3];
4679  mode = GET_MODE (mem);
4680
4681  /* We forced the address into a register via mem_noofs_operand.  */
4682  addr = XEXP (mem, 0);
4683  gcc_assert (register_operand (addr, DImode));
4684
4685  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4686			       NULL_RTX, 1, OPTAB_DIRECT);
4687
4688  /* Insert val into the correct byte location within the word.  */
4689  if (val != const0_rtx)
4690    val = emit_insxl (mode, val, addr);
4691
4692  wdst = gen_reg_rtx (DImode);
4693  if (mode == QImode)
4694    gen = gen_atomic_exchangeqi_1;
4695  else
4696    gen = gen_atomic_exchangehi_1;
4697  emit_insn (gen (wdst, mem, val, align, model));
4698
4699  emit_move_insn (dst, gen_lowpart (mode, wdst));
4700}
4701
4702void
4703alpha_split_atomic_exchange_12 (rtx operands[])
4704{
4705  rtx dest, orig_mem, addr, val, align, scratch;
4706  rtx label, mem, width, mask, x;
4707  machine_mode mode;
4708  enum memmodel model;
4709
4710  dest = operands[0];
4711  orig_mem = operands[1];
4712  val = operands[2];
4713  align = operands[3];
4714  model = (enum memmodel) INTVAL (operands[4]);
4715  scratch = operands[5];
4716  mode = GET_MODE (orig_mem);
4717  addr = XEXP (orig_mem, 0);
4718
4719  mem = gen_rtx_MEM (DImode, align);
4720  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4721  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4722    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4723
4724  alpha_pre_atomic_barrier (model);
4725
4726  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4727  emit_label (XEXP (label, 0));
4728
4729  emit_load_locked (DImode, scratch, mem);
4730
4731  width = GEN_INT (GET_MODE_BITSIZE (mode));
4732  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4733  emit_insn (gen_extxl (dest, scratch, width, addr));
4734  emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4735  if (val != const0_rtx)
4736    emit_insn (gen_iordi3 (scratch, scratch, val));
4737
4738  emit_store_conditional (DImode, scratch, mem, scratch);
4739
4740  x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4741  emit_unlikely_jump (x, label);
4742
4743  alpha_post_atomic_barrier (model);
4744}
4745
4746/* Adjust the cost of a scheduling dependency.  Return the new cost of
4747   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4748
4749static int
4750alpha_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4751		   unsigned int)
4752{
4753  enum attr_type dep_insn_type;
4754
4755  /* If the dependence is an anti-dependence, there is no cost.  For an
4756     output dependence, there is sometimes a cost, but it doesn't seem
4757     worth handling those few cases.  */
4758  if (dep_type != 0)
4759    return cost;
4760
4761  /* If we can't recognize the insns, we can't really do anything.  */
4762  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4763    return cost;
4764
4765  dep_insn_type = get_attr_type (dep_insn);
4766
4767  /* Bring in the user-defined memory latency.  */
4768  if (dep_insn_type == TYPE_ILD
4769      || dep_insn_type == TYPE_FLD
4770      || dep_insn_type == TYPE_LDSYM)
4771    cost += alpha_memory_latency-1;
4772
4773  /* Everything else handled in DFA bypasses now.  */
4774
4775  return cost;
4776}
4777
4778/* The number of instructions that can be issued per cycle.  */
4779
4780static int
4781alpha_issue_rate (void)
4782{
4783  return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4784}
4785
4786/* How many alternative schedules to try.  This should be as wide as the
4787   scheduling freedom in the DFA, but no wider.  Making this value too
4788   large results extra work for the scheduler.
4789
4790   For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4791   alternative schedules.  For EV5, we can choose between E0/E1 and
4792   FA/FM.  For EV6, an arithmetic insn can be issued to U0/U1/L0/L1.  */
4793
4794static int
4795alpha_multipass_dfa_lookahead (void)
4796{
4797  return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4798}
4799
4800/* Machine-specific function data.  */
4801
4802struct GTY(()) alpha_links;
4803
4804struct GTY(()) machine_function
4805{
4806  /* For flag_reorder_blocks_and_partition.  */
4807  rtx gp_save_rtx;
4808
4809  /* For VMS condition handlers.  */
4810  bool uses_condition_handler;
4811
4812  /* Linkage entries.  */
4813  hash_map<nofree_string_hash, alpha_links *> *links;
4814};
4815
4816/* How to allocate a 'struct machine_function'.  */
4817
4818static struct machine_function *
4819alpha_init_machine_status (void)
4820{
4821  return ggc_cleared_alloc<machine_function> ();
4822}
4823
4824/* Support for frame based VMS condition handlers.  */
4825
4826/* A VMS condition handler may be established for a function with a call to
4827   __builtin_establish_vms_condition_handler, and cancelled with a call to
4828   __builtin_revert_vms_condition_handler.
4829
4830   The VMS Condition Handling Facility knows about the existence of a handler
4831   from the procedure descriptor .handler field.  As the VMS native compilers,
4832   we store the user specified handler's address at a fixed location in the
4833   stack frame and point the procedure descriptor at a common wrapper which
4834   fetches the real handler's address and issues an indirect call.
4835
4836   The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4837
4838   We force the procedure kind to PT_STACK, and the fixed frame location is
4839   fp+8, just before the register save area. We use the handler_data field in
4840   the procedure descriptor to state the fp offset at which the installed
4841   handler address can be found.  */
4842
4843#define VMS_COND_HANDLER_FP_OFFSET 8
4844
4845/* Expand code to store the currently installed user VMS condition handler
4846   into TARGET and install HANDLER as the new condition handler.  */
4847
4848void
4849alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4850{
4851  rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4852					    VMS_COND_HANDLER_FP_OFFSET);
4853
4854  rtx handler_slot
4855    = gen_rtx_MEM (DImode, handler_slot_address);
4856
4857  emit_move_insn (target, handler_slot);
4858  emit_move_insn (handler_slot, handler);
4859
4860  /* Notify the start/prologue/epilogue emitters that the condition handler
4861     slot is needed.  In addition to reserving the slot space, this will force
4862     the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4863     use above is correct.  */
4864  cfun->machine->uses_condition_handler = true;
4865}
4866
4867/* Expand code to store the current VMS condition handler into TARGET and
4868   nullify it.  */
4869
4870void
4871alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4872{
4873  /* We implement this by establishing a null condition handler, with the tiny
4874     side effect of setting uses_condition_handler.  This is a little bit
4875     pessimistic if no actual builtin_establish call is ever issued, which is
4876     not a real problem and expected never to happen anyway.  */
4877
4878  alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4879}
4880
4881/* Functions to save and restore alpha_return_addr_rtx.  */
4882
4883/* Start the ball rolling with RETURN_ADDR_RTX.  */
4884
4885rtx
4886alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4887{
4888  if (count != 0)
4889    return const0_rtx;
4890
4891  return get_hard_reg_initial_val (Pmode, REG_RA);
4892}
4893
4894/* Return or create a memory slot containing the gp value for the current
4895   function.  Needed only if TARGET_LD_BUGGY_LDGP.  */
4896
4897rtx
4898alpha_gp_save_rtx (void)
4899{
4900  rtx_insn *seq;
4901  rtx m = cfun->machine->gp_save_rtx;
4902
4903  if (m == NULL)
4904    {
4905      start_sequence ();
4906
4907      m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4908      m = validize_mem (m);
4909      emit_move_insn (m, pic_offset_table_rtx);
4910
4911      seq = get_insns ();
4912      end_sequence ();
4913
4914      /* We used to simply emit the sequence after entry_of_function.
4915	 However this breaks the CFG if the first instruction in the
4916	 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4917	 label.  Emit the sequence properly on the edge.  We are only
4918	 invoked from dw2_build_landing_pads and finish_eh_generation
4919	 will call commit_edge_insertions thanks to a kludge.  */
4920      insert_insn_on_edge (seq,
4921			   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
4922
4923      cfun->machine->gp_save_rtx = m;
4924    }
4925
4926  return m;
4927}
4928
4929static void
4930alpha_instantiate_decls (void)
4931{
4932  if (cfun->machine->gp_save_rtx != NULL_RTX)
4933    instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4934}
4935
4936static int
4937alpha_ra_ever_killed (void)
4938{
4939  rtx_insn *top;
4940
4941  if (!has_hard_reg_initial_val (Pmode, REG_RA))
4942    return (int)df_regs_ever_live_p (REG_RA);
4943
4944  push_topmost_sequence ();
4945  top = get_insns ();
4946  pop_topmost_sequence ();
4947
4948  return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
4949}
4950
4951
4952/* Return the trap mode suffix applicable to the current
4953   instruction, or NULL.  */
4954
4955static const char *
4956get_trap_mode_suffix (void)
4957{
4958  enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4959
4960  switch (s)
4961    {
4962    case TRAP_SUFFIX_NONE:
4963      return NULL;
4964
4965    case TRAP_SUFFIX_SU:
4966      if (alpha_fptm >= ALPHA_FPTM_SU)
4967	return "su";
4968      return NULL;
4969
4970    case TRAP_SUFFIX_SUI:
4971      if (alpha_fptm >= ALPHA_FPTM_SUI)
4972	return "sui";
4973      return NULL;
4974
4975    case TRAP_SUFFIX_V_SV:
4976      switch (alpha_fptm)
4977	{
4978	case ALPHA_FPTM_N:
4979	  return NULL;
4980	case ALPHA_FPTM_U:
4981	  return "v";
4982	case ALPHA_FPTM_SU:
4983	case ALPHA_FPTM_SUI:
4984	  return "sv";
4985	default:
4986	  gcc_unreachable ();
4987	}
4988
4989    case TRAP_SUFFIX_V_SV_SVI:
4990      switch (alpha_fptm)
4991	{
4992	case ALPHA_FPTM_N:
4993	  return NULL;
4994	case ALPHA_FPTM_U:
4995	  return "v";
4996	case ALPHA_FPTM_SU:
4997	  return "sv";
4998	case ALPHA_FPTM_SUI:
4999	  return "svi";
5000	default:
5001	  gcc_unreachable ();
5002	}
5003      break;
5004
5005    case TRAP_SUFFIX_U_SU_SUI:
5006      switch (alpha_fptm)
5007	{
5008	case ALPHA_FPTM_N:
5009	  return NULL;
5010	case ALPHA_FPTM_U:
5011	  return "u";
5012	case ALPHA_FPTM_SU:
5013	  return "su";
5014	case ALPHA_FPTM_SUI:
5015	  return "sui";
5016	default:
5017	  gcc_unreachable ();
5018	}
5019      break;
5020
5021    default:
5022      gcc_unreachable ();
5023    }
5024  gcc_unreachable ();
5025}
5026
5027/* Return the rounding mode suffix applicable to the current
5028   instruction, or NULL.  */
5029
5030static const char *
5031get_round_mode_suffix (void)
5032{
5033  enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
5034
5035  switch (s)
5036    {
5037    case ROUND_SUFFIX_NONE:
5038      return NULL;
5039    case ROUND_SUFFIX_NORMAL:
5040      switch (alpha_fprm)
5041	{
5042	case ALPHA_FPRM_NORM:
5043	  return NULL;
5044	case ALPHA_FPRM_MINF:
5045	  return "m";
5046	case ALPHA_FPRM_CHOP:
5047	  return "c";
5048	case ALPHA_FPRM_DYN:
5049	  return "d";
5050	default:
5051	  gcc_unreachable ();
5052	}
5053      break;
5054
5055    case ROUND_SUFFIX_C:
5056      return "c";
5057
5058    default:
5059      gcc_unreachable ();
5060    }
5061  gcc_unreachable ();
5062}
5063
5064/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
5065
5066static bool
5067alpha_print_operand_punct_valid_p (unsigned char code)
5068{
5069  return (code == '/' || code == ',' || code == '-' || code == '~'
5070	  || code == '#' || code == '*' || code == '&');
5071}
5072
5073/* Implement TARGET_PRINT_OPERAND.  The alpha-specific
5074   operand codes are documented below.  */
5075
5076static void
5077alpha_print_operand (FILE *file, rtx x, int code)
5078{
5079  int i;
5080
5081  switch (code)
5082    {
5083    case '~':
5084      /* Print the assembler name of the current function.  */
5085      assemble_name (file, alpha_fnname);
5086      break;
5087
5088    case '&':
5089      if (const char *name = get_some_local_dynamic_name ())
5090	assemble_name (file, name);
5091      else
5092	output_operand_lossage ("'%%&' used without any "
5093				"local dynamic TLS references");
5094      break;
5095
5096    case '/':
5097      /* Generates the instruction suffix.  The TRAP_SUFFIX and ROUND_SUFFIX
5098	 attributes are examined to determine what is appropriate.  */
5099      {
5100	const char *trap = get_trap_mode_suffix ();
5101	const char *round = get_round_mode_suffix ();
5102
5103	if (trap || round)
5104	  fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5105	break;
5106      }
5107
5108    case ',':
5109      /* Generates single precision suffix for floating point
5110	 instructions (s for IEEE, f for VAX).  */
5111      fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5112      break;
5113
5114    case '-':
5115      /* Generates double precision suffix for floating point
5116	 instructions (t for IEEE, g for VAX).  */
5117      fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5118      break;
5119
5120    case '#':
5121      if (alpha_this_literal_sequence_number == 0)
5122	alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5123      fprintf (file, "%d", alpha_this_literal_sequence_number);
5124      break;
5125
5126    case '*':
5127      if (alpha_this_gpdisp_sequence_number == 0)
5128	alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5129      fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5130      break;
5131
5132    case 'J':
5133      {
5134	const char *lituse;
5135
5136        if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5137	  {
5138	    x = XVECEXP (x, 0, 0);
5139	    lituse = "lituse_tlsgd";
5140	  }
5141	else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5142	  {
5143	    x = XVECEXP (x, 0, 0);
5144	    lituse = "lituse_tlsldm";
5145	  }
5146	else if (CONST_INT_P (x))
5147	  lituse = "lituse_jsr";
5148	else
5149	  {
5150	    output_operand_lossage ("invalid %%J value");
5151	    break;
5152	  }
5153
5154	if (x != const0_rtx)
5155	  fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5156      }
5157      break;
5158
5159    case 'j':
5160      {
5161	const char *lituse;
5162
5163#ifdef HAVE_AS_JSRDIRECT_RELOCS
5164	lituse = "lituse_jsrdirect";
5165#else
5166	lituse = "lituse_jsr";
5167#endif
5168
5169	gcc_assert (INTVAL (x) != 0);
5170	fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5171      }
5172      break;
5173    case 'r':
5174      /* If this operand is the constant zero, write it as "$31".  */
5175      if (REG_P (x))
5176	fprintf (file, "%s", reg_names[REGNO (x)]);
5177      else if (x == CONST0_RTX (GET_MODE (x)))
5178	fprintf (file, "$31");
5179      else
5180	output_operand_lossage ("invalid %%r value");
5181      break;
5182
5183    case 'R':
5184      /* Similar, but for floating-point.  */
5185      if (REG_P (x))
5186	fprintf (file, "%s", reg_names[REGNO (x)]);
5187      else if (x == CONST0_RTX (GET_MODE (x)))
5188	fprintf (file, "$f31");
5189      else
5190	output_operand_lossage ("invalid %%R value");
5191      break;
5192
5193    case 'N':
5194      /* Write the 1's complement of a constant.  */
5195      if (!CONST_INT_P (x))
5196	output_operand_lossage ("invalid %%N value");
5197
5198      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5199      break;
5200
5201    case 'P':
5202      /* Write 1 << C, for a constant C.  */
5203      if (!CONST_INT_P (x))
5204	output_operand_lossage ("invalid %%P value");
5205
5206      fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x));
5207      break;
5208
5209    case 'h':
5210      /* Write the high-order 16 bits of a constant, sign-extended.  */
5211      if (!CONST_INT_P (x))
5212	output_operand_lossage ("invalid %%h value");
5213
5214      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5215      break;
5216
5217    case 'L':
5218      /* Write the low-order 16 bits of a constant, sign-extended.  */
5219      if (!CONST_INT_P (x))
5220	output_operand_lossage ("invalid %%L value");
5221
5222      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5223	       (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5224      break;
5225
5226    case 'm':
5227      /* Write mask for ZAP insn.  */
5228      if (CONST_INT_P (x))
5229	{
5230	  HOST_WIDE_INT mask = 0, value = INTVAL (x);
5231
5232	  for (i = 0; i < 8; i++, value >>= 8)
5233	    if (value & 0xff)
5234	      mask |= (1 << i);
5235
5236	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5237	}
5238      else
5239	output_operand_lossage ("invalid %%m value");
5240      break;
5241
5242    case 'M':
5243      /* 'b', 'w', 'l', or 'q' as the value of the constant.  */
5244      if (!mode_width_operand (x, VOIDmode))
5245	output_operand_lossage ("invalid %%M value");
5246
5247      fprintf (file, "%s",
5248	       (INTVAL (x) == 8 ? "b"
5249		: INTVAL (x) == 16 ? "w"
5250		: INTVAL (x) == 32 ? "l"
5251		: "q"));
5252      break;
5253
5254    case 'U':
5255      /* Similar, except do it from the mask.  */
5256      if (CONST_INT_P (x))
5257	{
5258	  HOST_WIDE_INT value = INTVAL (x);
5259
5260	  if (value == 0xff)
5261	    {
5262	      fputc ('b', file);
5263	      break;
5264	    }
5265	  if (value == 0xffff)
5266	    {
5267	      fputc ('w', file);
5268	      break;
5269	    }
5270	  if (value == 0xffffffff)
5271	    {
5272	      fputc ('l', file);
5273	      break;
5274	    }
5275	  if (value == -1)
5276	    {
5277	      fputc ('q', file);
5278	      break;
5279	    }
5280	}
5281
5282      output_operand_lossage ("invalid %%U value");
5283      break;
5284
5285    case 's':
5286      /* Write the constant value divided by 8.  */
5287      if (!CONST_INT_P (x)
5288	  || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5289	  || (INTVAL (x) & 7) != 0)
5290	output_operand_lossage ("invalid %%s value");
5291
5292      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5293      break;
5294
5295    case 'S':
5296      /* Same, except compute (64 - c) / 8 */
5297
5298      if (!CONST_INT_P (x)
5299	  && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5300	  && (INTVAL (x) & 7) != 8)
5301	output_operand_lossage ("invalid %%s value");
5302
5303      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5304      break;
5305
5306    case 'C': case 'D': case 'c': case 'd':
5307      /* Write out comparison name.  */
5308      {
5309	enum rtx_code c = GET_CODE (x);
5310
5311        if (!COMPARISON_P (x))
5312	  output_operand_lossage ("invalid %%C value");
5313
5314	else if (code == 'D')
5315	  c = reverse_condition (c);
5316	else if (code == 'c')
5317	  c = swap_condition (c);
5318	else if (code == 'd')
5319	  c = swap_condition (reverse_condition (c));
5320
5321        if (c == LEU)
5322	  fprintf (file, "ule");
5323        else if (c == LTU)
5324	  fprintf (file, "ult");
5325	else if (c == UNORDERED)
5326	  fprintf (file, "un");
5327        else
5328	  fprintf (file, "%s", GET_RTX_NAME (c));
5329      }
5330      break;
5331
5332    case 'E':
5333      /* Write the divide or modulus operator.  */
5334      switch (GET_CODE (x))
5335	{
5336	case DIV:
5337	  fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5338	  break;
5339	case UDIV:
5340	  fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5341	  break;
5342	case MOD:
5343	  fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5344	  break;
5345	case UMOD:
5346	  fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5347	  break;
5348	default:
5349	  output_operand_lossage ("invalid %%E value");
5350	  break;
5351	}
5352      break;
5353
5354    case 'A':
5355      /* Write "_u" for unaligned access.  */
5356      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5357	fprintf (file, "_u");
5358      break;
5359
5360    case 0:
5361      if (REG_P (x))
5362	fprintf (file, "%s", reg_names[REGNO (x)]);
5363      else if (MEM_P (x))
5364	output_address (GET_MODE (x), XEXP (x, 0));
5365      else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5366	{
5367	  switch (XINT (XEXP (x, 0), 1))
5368	    {
5369	    case UNSPEC_DTPREL:
5370	    case UNSPEC_TPREL:
5371	      output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5372	      break;
5373	    default:
5374	      output_operand_lossage ("unknown relocation unspec");
5375	      break;
5376	    }
5377	}
5378      else
5379	output_addr_const (file, x);
5380      break;
5381
5382    default:
5383      output_operand_lossage ("invalid %%xn code");
5384    }
5385}
5386
5387/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
5388
5389static void
5390alpha_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
5391{
5392  int basereg = 31;
5393  HOST_WIDE_INT offset = 0;
5394
5395  if (GET_CODE (addr) == AND)
5396    addr = XEXP (addr, 0);
5397
5398  if (GET_CODE (addr) == PLUS
5399      && CONST_INT_P (XEXP (addr, 1)))
5400    {
5401      offset = INTVAL (XEXP (addr, 1));
5402      addr = XEXP (addr, 0);
5403    }
5404
5405  if (GET_CODE (addr) == LO_SUM)
5406    {
5407      const char *reloc16, *reloclo;
5408      rtx op1 = XEXP (addr, 1);
5409
5410      if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5411	{
5412	  op1 = XEXP (op1, 0);
5413	  switch (XINT (op1, 1))
5414	    {
5415	    case UNSPEC_DTPREL:
5416	      reloc16 = NULL;
5417	      reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5418	      break;
5419	    case UNSPEC_TPREL:
5420	      reloc16 = NULL;
5421	      reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5422	      break;
5423	    default:
5424	      output_operand_lossage ("unknown relocation unspec");
5425	      return;
5426	    }
5427
5428	  output_addr_const (file, XVECEXP (op1, 0, 0));
5429	}
5430      else
5431	{
5432	  reloc16 = "gprel";
5433	  reloclo = "gprellow";
5434	  output_addr_const (file, op1);
5435	}
5436
5437      if (offset)
5438	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5439
5440      addr = XEXP (addr, 0);
5441      switch (GET_CODE (addr))
5442	{
5443	case REG:
5444	  basereg = REGNO (addr);
5445	  break;
5446
5447	case SUBREG:
5448	  basereg = subreg_regno (addr);
5449	  break;
5450
5451	default:
5452	  gcc_unreachable ();
5453	}
5454
5455      fprintf (file, "($%d)\t\t!%s", basereg,
5456	       (basereg == 29 ? reloc16 : reloclo));
5457      return;
5458    }
5459
5460  switch (GET_CODE (addr))
5461    {
5462    case REG:
5463      basereg = REGNO (addr);
5464      break;
5465
5466    case SUBREG:
5467      basereg = subreg_regno (addr);
5468      break;
5469
5470    case CONST_INT:
5471      offset = INTVAL (addr);
5472      break;
5473
5474    case SYMBOL_REF:
5475      gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5476      fprintf (file, "%s", XSTR (addr, 0));
5477      return;
5478
5479    case CONST:
5480      gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5481      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5482		  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5483      fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5484	       XSTR (XEXP (XEXP (addr, 0), 0), 0),
5485	       INTVAL (XEXP (XEXP (addr, 0), 1)));
5486      return;
5487
5488    default:
5489      output_operand_lossage ("invalid operand address");
5490      return;
5491    }
5492
5493  fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5494}
5495
5496/* Emit RTL insns to initialize the variable parts of a trampoline at
5497   M_TRAMP.  FNDECL is target function's decl.  CHAIN_VALUE is an rtx
5498   for the static chain value for the function.  */
5499
5500static void
5501alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5502{
5503  rtx fnaddr, mem, word1, word2;
5504
5505  fnaddr = XEXP (DECL_RTL (fndecl), 0);
5506
5507#ifdef POINTERS_EXTEND_UNSIGNED
5508  fnaddr = convert_memory_address (Pmode, fnaddr);
5509  chain_value = convert_memory_address (Pmode, chain_value);
5510#endif
5511
5512  if (TARGET_ABI_OPEN_VMS)
5513    {
5514      const char *fnname;
5515      char *trname;
5516
5517      /* Construct the name of the trampoline entry point.  */
5518      fnname = XSTR (fnaddr, 0);
5519      trname = (char *) alloca (strlen (fnname) + 5);
5520      strcpy (trname, fnname);
5521      strcat (trname, "..tr");
5522      fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5523      word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5524
5525      /* Trampoline (or "bounded") procedure descriptor is constructed from
5526	 the function's procedure descriptor with certain fields zeroed IAW
5527	 the VMS calling standard. This is stored in the first quadword.  */
5528      word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5529      word1 = expand_and (DImode, word1,
5530			  GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5531			  NULL);
5532    }
5533  else
5534    {
5535      /* These 4 instructions are:
5536	    ldq $1,24($27)
5537	    ldq $27,16($27)
5538	    jmp $31,($27),0
5539	    nop
5540	 We don't bother setting the HINT field of the jump; the nop
5541	 is merely there for padding.  */
5542      word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5543      word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5544    }
5545
5546  /* Store the first two words, as computed above.  */
5547  mem = adjust_address (m_tramp, DImode, 0);
5548  emit_move_insn (mem, word1);
5549  mem = adjust_address (m_tramp, DImode, 8);
5550  emit_move_insn (mem, word2);
5551
5552  /* Store function address and static chain value.  */
5553  mem = adjust_address (m_tramp, Pmode, 16);
5554  emit_move_insn (mem, fnaddr);
5555  mem = adjust_address (m_tramp, Pmode, 24);
5556  emit_move_insn (mem, chain_value);
5557
5558  if (TARGET_ABI_OSF)
5559    {
5560      emit_insn (gen_imb ());
5561#ifdef HAVE_ENABLE_EXECUTE_STACK
5562      emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5563			 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
5564#endif
5565    }
5566}
5567
5568/* Determine where to put an argument to a function.
5569   Value is zero to push the argument on the stack,
5570   or a hard register in which to store the argument.
5571
5572   MODE is the argument's machine mode.
5573   TYPE is the data type of the argument (as a tree).
5574    This is null for libcalls where that information may
5575    not be available.
5576   CUM is a variable of type CUMULATIVE_ARGS which gives info about
5577    the preceding args and about the function being called.
5578   NAMED is nonzero if this argument is a named parameter
5579    (otherwise it is an extra parameter matching an ellipsis).
5580
5581   On Alpha the first 6 words of args are normally in registers
5582   and the rest are pushed.  */
5583
5584static rtx
5585alpha_function_arg (cumulative_args_t cum_v, machine_mode mode,
5586		    const_tree type, bool named ATTRIBUTE_UNUSED)
5587{
5588  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5589  int basereg;
5590  int num_args;
5591
5592  /* Don't get confused and pass small structures in FP registers.  */
5593  if (type && AGGREGATE_TYPE_P (type))
5594    basereg = 16;
5595  else
5596    {
5597      /* With alpha_split_complex_arg, we shouldn't see any raw complex
5598	 values here.  */
5599      gcc_checking_assert (!COMPLEX_MODE_P (mode));
5600
5601      /* Set up defaults for FP operands passed in FP registers, and
5602	 integral operands passed in integer registers.  */
5603      if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5604	basereg = 32 + 16;
5605      else
5606	basereg = 16;
5607    }
5608
5609  /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5610     the two platforms, so we can't avoid conditional compilation.  */
5611#if TARGET_ABI_OPEN_VMS
5612    {
5613      if (mode == VOIDmode)
5614	return alpha_arg_info_reg_val (*cum);
5615
5616      num_args = cum->num_args;
5617      if (num_args >= 6
5618	  || targetm.calls.must_pass_in_stack (mode, type))
5619	return NULL_RTX;
5620    }
5621#elif TARGET_ABI_OSF
5622    {
5623      if (*cum >= 6)
5624	return NULL_RTX;
5625      num_args = *cum;
5626
5627      /* VOID is passed as a special flag for "last argument".  */
5628      if (type == void_type_node)
5629	basereg = 16;
5630      else if (targetm.calls.must_pass_in_stack (mode, type))
5631	return NULL_RTX;
5632    }
5633#else
5634#error Unhandled ABI
5635#endif
5636
5637  return gen_rtx_REG (mode, num_args + basereg);
5638}
5639
5640/* Update the data in CUM to advance over an argument
5641   of mode MODE and data type TYPE.
5642   (TYPE is null for libcalls where that information may not be available.)  */
5643
5644static void
5645alpha_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
5646			    const_tree type, bool named ATTRIBUTE_UNUSED)
5647{
5648  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5649  bool onstack = targetm.calls.must_pass_in_stack (mode, type);
5650  int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type);
5651
5652#if TARGET_ABI_OSF
5653  *cum += increment;
5654#else
5655  if (!onstack && cum->num_args < 6)
5656    cum->atypes[cum->num_args] = alpha_arg_type (mode);
5657  cum->num_args += increment;
5658#endif
5659}
5660
5661static int
5662alpha_arg_partial_bytes (cumulative_args_t cum_v,
5663			 machine_mode mode ATTRIBUTE_UNUSED,
5664			 tree type ATTRIBUTE_UNUSED,
5665			 bool named ATTRIBUTE_UNUSED)
5666{
5667  int words = 0;
5668  CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5669
5670#if TARGET_ABI_OPEN_VMS
5671  if (cum->num_args < 6
5672      && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type))
5673    words = 6 - cum->num_args;
5674#elif TARGET_ABI_OSF
5675  if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type))
5676    words = 6 - *cum;
5677#else
5678#error Unhandled ABI
5679#endif
5680
5681  return words * UNITS_PER_WORD;
5682}
5683
5684
5685/* Return true if TYPE must be returned in memory, instead of in registers.  */
5686
5687static bool
5688alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5689{
5690  machine_mode mode = VOIDmode;
5691  int size;
5692
5693  if (type)
5694    {
5695      mode = TYPE_MODE (type);
5696
5697      /* All aggregates are returned in memory, except on OpenVMS where
5698	 records that fit 64 bits should be returned by immediate value
5699	 as required by section 3.8.7.1 of the OpenVMS Calling Standard.  */
5700      if (TARGET_ABI_OPEN_VMS
5701	  && TREE_CODE (type) != ARRAY_TYPE
5702	  && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5703	return false;
5704
5705      if (AGGREGATE_TYPE_P (type))
5706	return true;
5707    }
5708
5709  size = GET_MODE_SIZE (mode);
5710  switch (GET_MODE_CLASS (mode))
5711    {
5712    case MODE_VECTOR_FLOAT:
5713      /* Pass all float vectors in memory, like an aggregate.  */
5714      return true;
5715
5716    case MODE_COMPLEX_FLOAT:
5717      /* We judge complex floats on the size of their element,
5718	 not the size of the whole type.  */
5719      size = GET_MODE_UNIT_SIZE (mode);
5720      break;
5721
5722    case MODE_INT:
5723    case MODE_FLOAT:
5724    case MODE_COMPLEX_INT:
5725    case MODE_VECTOR_INT:
5726      break;
5727
5728    default:
5729      /* ??? We get called on all sorts of random stuff from
5730	 aggregate_value_p.  We must return something, but it's not
5731	 clear what's safe to return.  Pretend it's a struct I
5732	 guess.  */
5733      return true;
5734    }
5735
5736  /* Otherwise types must fit in one register.  */
5737  return size > UNITS_PER_WORD;
5738}
5739
5740/* Return true if TYPE should be passed by invisible reference.  */
5741
5742static bool
5743alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5744			 machine_mode mode,
5745			 const_tree type ATTRIBUTE_UNUSED,
5746			 bool named)
5747{
5748  /* Pass float and _Complex float variable arguments by reference.
5749     This avoids 64-bit store from a FP register to a pretend args save area
5750     and subsequent 32-bit load from the saved location to a FP register.
5751
5752     Note that 32-bit loads and stores to/from a FP register on alpha reorder
5753     bits to form a canonical 64-bit value in the FP register.  This fact
5754     invalidates compiler assumption that 32-bit FP value lives in the lower
5755     32-bits of the passed 64-bit FP value, so loading the 32-bit value from
5756     the stored 64-bit location using 32-bit FP load is invalid on alpha.
5757
5758     This introduces sort of ABI incompatibility, but until _Float32 was
5759     introduced, C-family languages promoted 32-bit float variable arg to
5760     a 64-bit double, and it was not allowed to pass float as a varible
5761     argument.  Passing _Complex float as a variable argument never
5762     worked on alpha.  Thus, we have no backward compatibility issues
5763     to worry about, and passing unpromoted _Float32 and _Complex float
5764     as a variable argument will actually work in the future.  */
5765
5766  if (mode == SFmode || mode == SCmode)
5767    return !named;
5768
5769  return mode == TFmode || mode == TCmode;
5770}
5771
5772/* Define how to find the value returned by a function.  VALTYPE is the
5773   data type of the value (as a tree).  If the precise function being
5774   called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5775   MODE is set instead of VALTYPE for libcalls.
5776
5777   On Alpha the value is found in $0 for integer functions and
5778   $f0 for floating-point functions.  */
5779
5780static rtx
5781alpha_function_value_1 (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5782			machine_mode mode)
5783{
5784  unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5785  enum mode_class mclass;
5786
5787  gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5788
5789  if (valtype)
5790    mode = TYPE_MODE (valtype);
5791
5792  mclass = GET_MODE_CLASS (mode);
5793  switch (mclass)
5794    {
5795    case MODE_INT:
5796      /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5797	 where we have them returning both SImode and DImode.  */
5798      if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5799        PROMOTE_MODE (mode, dummy, valtype);
5800      /* FALLTHRU */
5801
5802    case MODE_COMPLEX_INT:
5803    case MODE_VECTOR_INT:
5804      regnum = 0;
5805      break;
5806
5807    case MODE_FLOAT:
5808      regnum = 32;
5809      break;
5810
5811    case MODE_COMPLEX_FLOAT:
5812      {
5813	machine_mode cmode = GET_MODE_INNER (mode);
5814
5815	return gen_rtx_PARALLEL
5816	  (VOIDmode,
5817	   gen_rtvec (2,
5818		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5819				         const0_rtx),
5820		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5821				         GEN_INT (GET_MODE_SIZE (cmode)))));
5822      }
5823
5824    case MODE_RANDOM:
5825      /* We should only reach here for BLKmode on VMS.  */
5826      gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5827      regnum = 0;
5828      break;
5829
5830    default:
5831      gcc_unreachable ();
5832    }
5833
5834  return gen_rtx_REG (mode, regnum);
5835}
5836
5837/* Implement TARGET_FUNCTION_VALUE.  */
5838
5839static rtx
5840alpha_function_value (const_tree valtype, const_tree fn_decl_or_type,
5841		      bool /*outgoing*/)
5842{
5843  return alpha_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
5844}
5845
5846/* Implement TARGET_LIBCALL_VALUE.  */
5847
5848static rtx
5849alpha_libcall_value (machine_mode mode, const_rtx /*fun*/)
5850{
5851  return alpha_function_value_1 (NULL_TREE, NULL_TREE, mode);
5852}
5853
5854/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
5855
5856   On the Alpha, $0 $1 and $f0 $f1 are the only register thus used.  */
5857
5858static bool
5859alpha_function_value_regno_p (const unsigned int regno)
5860{
5861  return (regno == 0 || regno == 1 || regno == 32 || regno == 33);
5862}
5863
5864/* TCmode complex values are passed by invisible reference.  We
5865   should not split these values.  */
5866
5867static bool
5868alpha_split_complex_arg (const_tree type)
5869{
5870  return TYPE_MODE (type) != TCmode;
5871}
5872
5873static tree
5874alpha_build_builtin_va_list (void)
5875{
5876  tree base, ofs, space, record, type_decl;
5877
5878  if (TARGET_ABI_OPEN_VMS)
5879    return ptr_type_node;
5880
5881  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5882  type_decl = build_decl (BUILTINS_LOCATION,
5883			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
5884  TYPE_STUB_DECL (record) = type_decl;
5885  TYPE_NAME (record) = type_decl;
5886
5887  /* C++? SET_IS_AGGR_TYPE (record, 1); */
5888
5889  /* Dummy field to prevent alignment warnings.  */
5890  space = build_decl (BUILTINS_LOCATION,
5891		      FIELD_DECL, NULL_TREE, integer_type_node);
5892  DECL_FIELD_CONTEXT (space) = record;
5893  DECL_ARTIFICIAL (space) = 1;
5894  DECL_IGNORED_P (space) = 1;
5895
5896  ofs = build_decl (BUILTINS_LOCATION,
5897		    FIELD_DECL, get_identifier ("__offset"),
5898		    integer_type_node);
5899  DECL_FIELD_CONTEXT (ofs) = record;
5900  DECL_CHAIN (ofs) = space;
5901
5902  base = build_decl (BUILTINS_LOCATION,
5903		     FIELD_DECL, get_identifier ("__base"),
5904		     ptr_type_node);
5905  DECL_FIELD_CONTEXT (base) = record;
5906  DECL_CHAIN (base) = ofs;
5907
5908  TYPE_FIELDS (record) = base;
5909  layout_type (record);
5910
5911  va_list_gpr_counter_field = ofs;
5912  return record;
5913}
5914
5915#if TARGET_ABI_OSF
5916/* Helper function for alpha_stdarg_optimize_hook.  Skip over casts
5917   and constant additions.  */
5918
5919static gimple *
5920va_list_skip_additions (tree lhs)
5921{
5922  gimple  *stmt;
5923
5924  for (;;)
5925    {
5926      enum tree_code code;
5927
5928      stmt = SSA_NAME_DEF_STMT (lhs);
5929
5930      if (gimple_code (stmt) == GIMPLE_PHI)
5931	return stmt;
5932
5933      if (!is_gimple_assign (stmt)
5934	  || gimple_assign_lhs (stmt) != lhs)
5935	return NULL;
5936
5937      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5938	return stmt;
5939      code = gimple_assign_rhs_code (stmt);
5940      if (!CONVERT_EXPR_CODE_P (code)
5941	  && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5942	      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5943	      || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
5944	return stmt;
5945
5946      lhs = gimple_assign_rhs1 (stmt);
5947    }
5948}
5949
5950/* Check if LHS = RHS statement is
5951   LHS = *(ap.__base + ap.__offset + cst)
5952   or
5953   LHS = *(ap.__base
5954	   + ((ap.__offset + cst <= 47)
5955	      ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5956   If the former, indicate that GPR registers are needed,
5957   if the latter, indicate that FPR registers are needed.
5958
5959   Also look for LHS = (*ptr).field, where ptr is one of the forms
5960   listed above.
5961
5962   On alpha, cfun->va_list_gpr_size is used as size of the needed
5963   regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5964   registers are needed and bit 1 set if FPR registers are needed.
5965   Return true if va_list references should not be scanned for the
5966   current statement.  */
5967
5968static bool
5969alpha_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt)
5970{
5971  tree base, offset, rhs;
5972  int offset_arg = 1;
5973  gimple *base_stmt;
5974
5975  if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
5976      != GIMPLE_SINGLE_RHS)
5977    return false;
5978
5979  rhs = gimple_assign_rhs1 (stmt);
5980  while (handled_component_p (rhs))
5981    rhs = TREE_OPERAND (rhs, 0);
5982  if (TREE_CODE (rhs) != MEM_REF
5983      || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5984    return false;
5985
5986  stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5987  if (stmt == NULL
5988      || !is_gimple_assign (stmt)
5989      || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
5990    return false;
5991
5992  base = gimple_assign_rhs1 (stmt);
5993  if (TREE_CODE (base) == SSA_NAME)
5994    {
5995      base_stmt = va_list_skip_additions (base);
5996      if (base_stmt
5997	  && is_gimple_assign (base_stmt)
5998	  && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5999	base = gimple_assign_rhs1 (base_stmt);
6000    }
6001
6002  if (TREE_CODE (base) != COMPONENT_REF
6003      || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6004    {
6005      base = gimple_assign_rhs2 (stmt);
6006      if (TREE_CODE (base) == SSA_NAME)
6007	{
6008	  base_stmt = va_list_skip_additions (base);
6009	  if (base_stmt
6010	      && is_gimple_assign (base_stmt)
6011	      && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6012	    base = gimple_assign_rhs1 (base_stmt);
6013	}
6014
6015      if (TREE_CODE (base) != COMPONENT_REF
6016	  || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6017	return false;
6018
6019      offset_arg = 0;
6020    }
6021
6022  base = get_base_address (base);
6023  if (TREE_CODE (base) != VAR_DECL
6024      || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
6025    return false;
6026
6027  offset = gimple_op (stmt, 1 + offset_arg);
6028  if (TREE_CODE (offset) == SSA_NAME)
6029    {
6030      gimple *offset_stmt = va_list_skip_additions (offset);
6031
6032      if (offset_stmt
6033	  && gimple_code (offset_stmt) == GIMPLE_PHI)
6034	{
6035	  HOST_WIDE_INT sub;
6036	  gimple *arg1_stmt, *arg2_stmt;
6037	  tree arg1, arg2;
6038	  enum tree_code code1, code2;
6039
6040	  if (gimple_phi_num_args (offset_stmt) != 2)
6041	    goto escapes;
6042
6043	  arg1_stmt
6044	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
6045	  arg2_stmt
6046	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
6047	  if (arg1_stmt == NULL
6048	      || !is_gimple_assign (arg1_stmt)
6049	      || arg2_stmt == NULL
6050	      || !is_gimple_assign (arg2_stmt))
6051	    goto escapes;
6052
6053	  code1 = gimple_assign_rhs_code (arg1_stmt);
6054	  code2 = gimple_assign_rhs_code (arg2_stmt);
6055	  if (code1 == COMPONENT_REF
6056	      && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
6057	    /* Do nothing.  */;
6058	  else if (code2 == COMPONENT_REF
6059		   && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
6060	    {
6061	      gimple *tem = arg1_stmt;
6062	      code2 = code1;
6063	      arg1_stmt = arg2_stmt;
6064	      arg2_stmt = tem;
6065	    }
6066	  else
6067	    goto escapes;
6068
6069	  if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
6070	    goto escapes;
6071
6072	  sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
6073	  if (code2 == MINUS_EXPR)
6074	    sub = -sub;
6075	  if (sub < -48 || sub > -32)
6076	    goto escapes;
6077
6078	  arg1 = gimple_assign_rhs1 (arg1_stmt);
6079	  arg2 = gimple_assign_rhs1 (arg2_stmt);
6080	  if (TREE_CODE (arg2) == SSA_NAME)
6081	    {
6082	      arg2_stmt = va_list_skip_additions (arg2);
6083	      if (arg2_stmt == NULL
6084		  || !is_gimple_assign (arg2_stmt)
6085		  || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6086		goto escapes;
6087	      arg2 = gimple_assign_rhs1 (arg2_stmt);
6088	    }
6089	  if (arg1 != arg2)
6090	    goto escapes;
6091
6092	  if (TREE_CODE (arg1) != COMPONENT_REF
6093	      || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6094	      || get_base_address (arg1) != base)
6095	    goto escapes;
6096
6097	  /* Need floating point regs.  */
6098	  cfun->va_list_fpr_size |= 2;
6099	  return false;
6100	}
6101      if (offset_stmt
6102	  && is_gimple_assign (offset_stmt)
6103	  && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6104	offset = gimple_assign_rhs1 (offset_stmt);
6105    }
6106  if (TREE_CODE (offset) != COMPONENT_REF
6107      || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6108      || get_base_address (offset) != base)
6109    goto escapes;
6110  else
6111    /* Need general regs.  */
6112    cfun->va_list_fpr_size |= 1;
6113  return false;
6114
6115escapes:
6116  si->va_list_escapes = true;
6117  return false;
6118}
6119#endif
6120
6121/* Perform any needed actions needed for a function that is receiving a
6122   variable number of arguments.  */
6123
6124static void
6125alpha_setup_incoming_varargs (cumulative_args_t pcum, machine_mode mode,
6126			      tree type, int *pretend_size, int no_rtl)
6127{
6128  CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6129
6130  /* Skip the current argument.  */
6131  targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type,
6132				      true);
6133
6134#if TARGET_ABI_OPEN_VMS
6135  /* For VMS, we allocate space for all 6 arg registers plus a count.
6136
6137     However, if NO registers need to be saved, don't allocate any space.
6138     This is not only because we won't need the space, but because AP
6139     includes the current_pretend_args_size and we don't want to mess up
6140     any ap-relative addresses already made.  */
6141  if (cum.num_args < 6)
6142    {
6143      if (!no_rtl)
6144	{
6145	  emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6146	  emit_insn (gen_arg_home ());
6147	}
6148      *pretend_size = 7 * UNITS_PER_WORD;
6149    }
6150#else
6151  /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6152     only push those that are remaining.  However, if NO registers need to
6153     be saved, don't allocate any space.  This is not only because we won't
6154     need the space, but because AP includes the current_pretend_args_size
6155     and we don't want to mess up any ap-relative addresses already made.
6156
6157     If we are not to use the floating-point registers, save the integer
6158     registers where we would put the floating-point registers.  This is
6159     not the most efficient way to implement varargs with just one register
6160     class, but it isn't worth doing anything more efficient in this rare
6161     case.  */
6162  if (cum >= 6)
6163    return;
6164
6165  if (!no_rtl)
6166    {
6167      int count;
6168      alias_set_type set = get_varargs_alias_set ();
6169      rtx tmp;
6170
6171      count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6172      if (count > 6 - cum)
6173	count = 6 - cum;
6174
6175      /* Detect whether integer registers or floating-point registers
6176	 are needed by the detected va_arg statements.  See above for
6177	 how these values are computed.  Note that the "escape" value
6178	 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6179	 these bits set.  */
6180      gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6181
6182      if (cfun->va_list_fpr_size & 1)
6183	{
6184	  tmp = gen_rtx_MEM (BLKmode,
6185			     plus_constant (Pmode, virtual_incoming_args_rtx,
6186					    (cum + 6) * UNITS_PER_WORD));
6187	  MEM_NOTRAP_P (tmp) = 1;
6188	  set_mem_alias_set (tmp, set);
6189	  move_block_from_reg (16 + cum, tmp, count);
6190	}
6191
6192      if (cfun->va_list_fpr_size & 2)
6193	{
6194	  tmp = gen_rtx_MEM (BLKmode,
6195			     plus_constant (Pmode, virtual_incoming_args_rtx,
6196					    cum * UNITS_PER_WORD));
6197	  MEM_NOTRAP_P (tmp) = 1;
6198	  set_mem_alias_set (tmp, set);
6199	  move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6200	}
6201     }
6202  *pretend_size = 12 * UNITS_PER_WORD;
6203#endif
6204}
6205
6206static void
6207alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6208{
6209  HOST_WIDE_INT offset;
6210  tree t, offset_field, base_field;
6211
6212  if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6213    return;
6214
6215  /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6216     up by 48, storing fp arg registers in the first 48 bytes, and the
6217     integer arg registers in the next 48 bytes.  This is only done,
6218     however, if any integer registers need to be stored.
6219
6220     If no integer registers need be stored, then we must subtract 48
6221     in order to account for the integer arg registers which are counted
6222     in argsize above, but which are not actually stored on the stack.
6223     Must further be careful here about structures straddling the last
6224     integer argument register; that futzes with pretend_args_size,
6225     which changes the meaning of AP.  */
6226
6227  if (NUM_ARGS < 6)
6228    offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6229  else
6230    offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6231
6232  if (TARGET_ABI_OPEN_VMS)
6233    {
6234      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6235      t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6236      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6237      TREE_SIDE_EFFECTS (t) = 1;
6238      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6239    }
6240  else
6241    {
6242      base_field = TYPE_FIELDS (TREE_TYPE (valist));
6243      offset_field = DECL_CHAIN (base_field);
6244
6245      base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6246			   valist, base_field, NULL_TREE);
6247      offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6248			     valist, offset_field, NULL_TREE);
6249
6250      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6251      t = fold_build_pointer_plus_hwi (t, offset);
6252      t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6253      TREE_SIDE_EFFECTS (t) = 1;
6254      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6255
6256      t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6257      t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6258      TREE_SIDE_EFFECTS (t) = 1;
6259      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6260    }
6261}
6262
6263static tree
6264alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6265			 gimple_seq *pre_p)
6266{
6267  tree type_size, ptr_type, addend, t, addr;
6268  gimple_seq internal_post;
6269
6270  /* If the type could not be passed in registers, skip the block
6271     reserved for the registers.  */
6272  if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6273    {
6274      t = build_int_cst (TREE_TYPE (offset), 6*8);
6275      gimplify_assign (offset,
6276		       build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6277		       pre_p);
6278    }
6279
6280  addend = offset;
6281  ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6282
6283  if (TREE_CODE (type) == COMPLEX_TYPE)
6284    {
6285      tree real_part, imag_part, real_temp;
6286
6287      real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6288					   offset, pre_p);
6289
6290      /* Copy the value into a new temporary, lest the formal temporary
6291	 be reused out from under us.  */
6292      real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6293
6294      imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6295					   offset, pre_p);
6296
6297      return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6298    }
6299  else if (TREE_CODE (type) == REAL_TYPE)
6300    {
6301      tree fpaddend, cond, fourtyeight;
6302
6303      fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6304      fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6305			      addend, fourtyeight);
6306      cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6307      addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6308			    fpaddend, addend);
6309    }
6310
6311  /* Build the final address and force that value into a temporary.  */
6312  addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6313  internal_post = NULL;
6314  gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6315  gimple_seq_add_seq (pre_p, internal_post);
6316
6317  /* Update the offset field.  */
6318  type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6319  if (type_size == NULL || TREE_OVERFLOW (type_size))
6320    t = size_zero_node;
6321  else
6322    {
6323      t = size_binop (PLUS_EXPR, type_size, size_int (7));
6324      t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6325      t = size_binop (MULT_EXPR, t, size_int (8));
6326    }
6327  t = fold_convert (TREE_TYPE (offset), t);
6328  gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6329      		   pre_p);
6330
6331  return build_va_arg_indirect_ref (addr);
6332}
6333
6334static tree
6335alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6336		       gimple_seq *post_p)
6337{
6338  tree offset_field, base_field, offset, base, t, r;
6339  bool indirect;
6340
6341  if (TARGET_ABI_OPEN_VMS)
6342    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6343
6344  base_field = TYPE_FIELDS (va_list_type_node);
6345  offset_field = DECL_CHAIN (base_field);
6346  base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6347		       valist, base_field, NULL_TREE);
6348  offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6349			 valist, offset_field, NULL_TREE);
6350
6351  /* Pull the fields of the structure out into temporaries.  Since we never
6352     modify the base field, we can use a formal temporary.  Sign-extend the
6353     offset field so that it's the proper width for pointer arithmetic.  */
6354  base = get_formal_tmp_var (base_field, pre_p);
6355
6356  t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6357  offset = get_initialized_tmp_var (t, pre_p, NULL);
6358
6359  indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6360  if (indirect)
6361    type = build_pointer_type_for_mode (type, ptr_mode, true);
6362
6363  /* Find the value.  Note that this will be a stable indirection, or
6364     a composite of stable indirections in the case of complex.  */
6365  r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6366
6367  /* Stuff the offset temporary back into its field.  */
6368  gimplify_assign (unshare_expr (offset_field),
6369		   fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6370
6371  if (indirect)
6372    r = build_va_arg_indirect_ref (r);
6373
6374  return r;
6375}
6376
6377/* Builtins.  */
6378
6379enum alpha_builtin
6380{
6381  ALPHA_BUILTIN_CMPBGE,
6382  ALPHA_BUILTIN_EXTBL,
6383  ALPHA_BUILTIN_EXTWL,
6384  ALPHA_BUILTIN_EXTLL,
6385  ALPHA_BUILTIN_EXTQL,
6386  ALPHA_BUILTIN_EXTWH,
6387  ALPHA_BUILTIN_EXTLH,
6388  ALPHA_BUILTIN_EXTQH,
6389  ALPHA_BUILTIN_INSBL,
6390  ALPHA_BUILTIN_INSWL,
6391  ALPHA_BUILTIN_INSLL,
6392  ALPHA_BUILTIN_INSQL,
6393  ALPHA_BUILTIN_INSWH,
6394  ALPHA_BUILTIN_INSLH,
6395  ALPHA_BUILTIN_INSQH,
6396  ALPHA_BUILTIN_MSKBL,
6397  ALPHA_BUILTIN_MSKWL,
6398  ALPHA_BUILTIN_MSKLL,
6399  ALPHA_BUILTIN_MSKQL,
6400  ALPHA_BUILTIN_MSKWH,
6401  ALPHA_BUILTIN_MSKLH,
6402  ALPHA_BUILTIN_MSKQH,
6403  ALPHA_BUILTIN_UMULH,
6404  ALPHA_BUILTIN_ZAP,
6405  ALPHA_BUILTIN_ZAPNOT,
6406  ALPHA_BUILTIN_AMASK,
6407  ALPHA_BUILTIN_IMPLVER,
6408  ALPHA_BUILTIN_RPCC,
6409  ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6410  ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6411
6412  /* TARGET_MAX */
6413  ALPHA_BUILTIN_MINUB8,
6414  ALPHA_BUILTIN_MINSB8,
6415  ALPHA_BUILTIN_MINUW4,
6416  ALPHA_BUILTIN_MINSW4,
6417  ALPHA_BUILTIN_MAXUB8,
6418  ALPHA_BUILTIN_MAXSB8,
6419  ALPHA_BUILTIN_MAXUW4,
6420  ALPHA_BUILTIN_MAXSW4,
6421  ALPHA_BUILTIN_PERR,
6422  ALPHA_BUILTIN_PKLB,
6423  ALPHA_BUILTIN_PKWB,
6424  ALPHA_BUILTIN_UNPKBL,
6425  ALPHA_BUILTIN_UNPKBW,
6426
6427  /* TARGET_CIX */
6428  ALPHA_BUILTIN_CTTZ,
6429  ALPHA_BUILTIN_CTLZ,
6430  ALPHA_BUILTIN_CTPOP,
6431
6432  ALPHA_BUILTIN_max
6433};
6434
6435static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6436  CODE_FOR_builtin_cmpbge,
6437  CODE_FOR_extbl,
6438  CODE_FOR_extwl,
6439  CODE_FOR_extll,
6440  CODE_FOR_extql,
6441  CODE_FOR_extwh,
6442  CODE_FOR_extlh,
6443  CODE_FOR_extqh,
6444  CODE_FOR_builtin_insbl,
6445  CODE_FOR_builtin_inswl,
6446  CODE_FOR_builtin_insll,
6447  CODE_FOR_insql,
6448  CODE_FOR_inswh,
6449  CODE_FOR_inslh,
6450  CODE_FOR_insqh,
6451  CODE_FOR_mskbl,
6452  CODE_FOR_mskwl,
6453  CODE_FOR_mskll,
6454  CODE_FOR_mskql,
6455  CODE_FOR_mskwh,
6456  CODE_FOR_msklh,
6457  CODE_FOR_mskqh,
6458  CODE_FOR_umuldi3_highpart,
6459  CODE_FOR_builtin_zap,
6460  CODE_FOR_builtin_zapnot,
6461  CODE_FOR_builtin_amask,
6462  CODE_FOR_builtin_implver,
6463  CODE_FOR_builtin_rpcc,
6464  CODE_FOR_builtin_establish_vms_condition_handler,
6465  CODE_FOR_builtin_revert_vms_condition_handler,
6466
6467  /* TARGET_MAX */
6468  CODE_FOR_builtin_minub8,
6469  CODE_FOR_builtin_minsb8,
6470  CODE_FOR_builtin_minuw4,
6471  CODE_FOR_builtin_minsw4,
6472  CODE_FOR_builtin_maxub8,
6473  CODE_FOR_builtin_maxsb8,
6474  CODE_FOR_builtin_maxuw4,
6475  CODE_FOR_builtin_maxsw4,
6476  CODE_FOR_builtin_perr,
6477  CODE_FOR_builtin_pklb,
6478  CODE_FOR_builtin_pkwb,
6479  CODE_FOR_builtin_unpkbl,
6480  CODE_FOR_builtin_unpkbw,
6481
6482  /* TARGET_CIX */
6483  CODE_FOR_ctzdi2,
6484  CODE_FOR_clzdi2,
6485  CODE_FOR_popcountdi2
6486};
6487
6488struct alpha_builtin_def
6489{
6490  const char *name;
6491  enum alpha_builtin code;
6492  unsigned int target_mask;
6493  bool is_const;
6494};
6495
6496static struct alpha_builtin_def const zero_arg_builtins[] = {
6497  { "__builtin_alpha_implver",	ALPHA_BUILTIN_IMPLVER,	0, true },
6498  { "__builtin_alpha_rpcc",	ALPHA_BUILTIN_RPCC,	0, false }
6499};
6500
6501static struct alpha_builtin_def const one_arg_builtins[] = {
6502  { "__builtin_alpha_amask",	ALPHA_BUILTIN_AMASK,	0, true },
6503  { "__builtin_alpha_pklb",	ALPHA_BUILTIN_PKLB,	MASK_MAX, true },
6504  { "__builtin_alpha_pkwb",	ALPHA_BUILTIN_PKWB,	MASK_MAX, true },
6505  { "__builtin_alpha_unpkbl",	ALPHA_BUILTIN_UNPKBL,	MASK_MAX, true },
6506  { "__builtin_alpha_unpkbw",	ALPHA_BUILTIN_UNPKBW,	MASK_MAX, true },
6507  { "__builtin_alpha_cttz",	ALPHA_BUILTIN_CTTZ,	MASK_CIX, true },
6508  { "__builtin_alpha_ctlz",	ALPHA_BUILTIN_CTLZ,	MASK_CIX, true },
6509  { "__builtin_alpha_ctpop",	ALPHA_BUILTIN_CTPOP,	MASK_CIX, true }
6510};
6511
6512static struct alpha_builtin_def const two_arg_builtins[] = {
6513  { "__builtin_alpha_cmpbge",	ALPHA_BUILTIN_CMPBGE,	0, true },
6514  { "__builtin_alpha_extbl",	ALPHA_BUILTIN_EXTBL,	0, true },
6515  { "__builtin_alpha_extwl",	ALPHA_BUILTIN_EXTWL,	0, true },
6516  { "__builtin_alpha_extll",	ALPHA_BUILTIN_EXTLL,	0, true },
6517  { "__builtin_alpha_extql",	ALPHA_BUILTIN_EXTQL,	0, true },
6518  { "__builtin_alpha_extwh",	ALPHA_BUILTIN_EXTWH,	0, true },
6519  { "__builtin_alpha_extlh",	ALPHA_BUILTIN_EXTLH,	0, true },
6520  { "__builtin_alpha_extqh",	ALPHA_BUILTIN_EXTQH,	0, true },
6521  { "__builtin_alpha_insbl",	ALPHA_BUILTIN_INSBL,	0, true },
6522  { "__builtin_alpha_inswl",	ALPHA_BUILTIN_INSWL,	0, true },
6523  { "__builtin_alpha_insll",	ALPHA_BUILTIN_INSLL,	0, true },
6524  { "__builtin_alpha_insql",	ALPHA_BUILTIN_INSQL,	0, true },
6525  { "__builtin_alpha_inswh",	ALPHA_BUILTIN_INSWH,	0, true },
6526  { "__builtin_alpha_inslh",	ALPHA_BUILTIN_INSLH,	0, true },
6527  { "__builtin_alpha_insqh",	ALPHA_BUILTIN_INSQH,	0, true },
6528  { "__builtin_alpha_mskbl",	ALPHA_BUILTIN_MSKBL,	0, true },
6529  { "__builtin_alpha_mskwl",	ALPHA_BUILTIN_MSKWL,	0, true },
6530  { "__builtin_alpha_mskll",	ALPHA_BUILTIN_MSKLL,	0, true },
6531  { "__builtin_alpha_mskql",	ALPHA_BUILTIN_MSKQL,	0, true },
6532  { "__builtin_alpha_mskwh",	ALPHA_BUILTIN_MSKWH,	0, true },
6533  { "__builtin_alpha_msklh",	ALPHA_BUILTIN_MSKLH,	0, true },
6534  { "__builtin_alpha_mskqh",	ALPHA_BUILTIN_MSKQH,	0, true },
6535  { "__builtin_alpha_umulh",	ALPHA_BUILTIN_UMULH,	0, true },
6536  { "__builtin_alpha_zap",	ALPHA_BUILTIN_ZAP,	0, true },
6537  { "__builtin_alpha_zapnot",	ALPHA_BUILTIN_ZAPNOT,	0, true },
6538  { "__builtin_alpha_minub8",	ALPHA_BUILTIN_MINUB8,	MASK_MAX, true },
6539  { "__builtin_alpha_minsb8",	ALPHA_BUILTIN_MINSB8,	MASK_MAX, true },
6540  { "__builtin_alpha_minuw4",	ALPHA_BUILTIN_MINUW4,	MASK_MAX, true },
6541  { "__builtin_alpha_minsw4",	ALPHA_BUILTIN_MINSW4,	MASK_MAX, true },
6542  { "__builtin_alpha_maxub8",	ALPHA_BUILTIN_MAXUB8,	MASK_MAX, true },
6543  { "__builtin_alpha_maxsb8",	ALPHA_BUILTIN_MAXSB8,	MASK_MAX, true },
6544  { "__builtin_alpha_maxuw4",	ALPHA_BUILTIN_MAXUW4,	MASK_MAX, true },
6545  { "__builtin_alpha_maxsw4",	ALPHA_BUILTIN_MAXSW4,	MASK_MAX, true },
6546  { "__builtin_alpha_perr",	ALPHA_BUILTIN_PERR,	MASK_MAX, true }
6547};
6548
6549static GTY(()) tree alpha_dimode_u;
6550static GTY(()) tree alpha_v8qi_u;
6551static GTY(()) tree alpha_v8qi_s;
6552static GTY(()) tree alpha_v4hi_u;
6553static GTY(()) tree alpha_v4hi_s;
6554
6555static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6556
6557/* Return the alpha builtin for CODE.  */
6558
6559static tree
6560alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6561{
6562  if (code >= ALPHA_BUILTIN_max)
6563    return error_mark_node;
6564  return alpha_builtins[code];
6565}
6566
6567/* Helper function of alpha_init_builtins.  Add the built-in specified
6568   by NAME, TYPE, CODE, and ECF.  */
6569
6570static void
6571alpha_builtin_function (const char *name, tree ftype,
6572			enum alpha_builtin code, unsigned ecf)
6573{
6574  tree decl = add_builtin_function (name, ftype, (int) code,
6575				    BUILT_IN_MD, NULL, NULL_TREE);
6576
6577  if (ecf & ECF_CONST)
6578    TREE_READONLY (decl) = 1;
6579  if (ecf & ECF_NOTHROW)
6580    TREE_NOTHROW (decl) = 1;
6581
6582  alpha_builtins [(int) code] = decl;
6583}
6584
6585/* Helper function of alpha_init_builtins.  Add the COUNT built-in
6586   functions pointed to by P, with function type FTYPE.  */
6587
6588static void
6589alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6590		    tree ftype)
6591{
6592  size_t i;
6593
6594  for (i = 0; i < count; ++i, ++p)
6595    if ((target_flags & p->target_mask) == p->target_mask)
6596      alpha_builtin_function (p->name, ftype, p->code,
6597			      (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6598}
6599
6600static void
6601alpha_init_builtins (void)
6602{
6603  tree ftype;
6604
6605  alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6606  alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6607  alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6608  alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6609  alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6610
6611  ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6612  alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6613
6614  ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6615  alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6616
6617  ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6618				    alpha_dimode_u, NULL_TREE);
6619  alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6620
6621  if (TARGET_ABI_OPEN_VMS)
6622    {
6623      ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6624					NULL_TREE);
6625      alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6626			      ftype,
6627			      ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6628			      0);
6629
6630      ftype = build_function_type_list (ptr_type_node, void_type_node,
6631					NULL_TREE);
6632      alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6633			      ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6634
6635      vms_patch_builtins ();
6636    }
6637}
6638
6639/* Expand an expression EXP that calls a built-in function,
6640   with result going to TARGET if that's convenient
6641   (and in mode MODE if that's convenient).
6642   SUBTARGET may be used as the target for computing one of EXP's operands.
6643   IGNORE is nonzero if the value is to be ignored.  */
6644
6645static rtx
6646alpha_expand_builtin (tree exp, rtx target,
6647		      rtx subtarget ATTRIBUTE_UNUSED,
6648		      machine_mode mode ATTRIBUTE_UNUSED,
6649		      int ignore ATTRIBUTE_UNUSED)
6650{
6651#define MAX_ARGS 2
6652
6653  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6654  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6655  tree arg;
6656  call_expr_arg_iterator iter;
6657  enum insn_code icode;
6658  rtx op[MAX_ARGS], pat;
6659  int arity;
6660  bool nonvoid;
6661
6662  if (fcode >= ALPHA_BUILTIN_max)
6663    internal_error ("bad builtin fcode");
6664  icode = code_for_builtin[fcode];
6665  if (icode == 0)
6666    internal_error ("bad builtin fcode");
6667
6668  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6669
6670  arity = 0;
6671  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6672    {
6673      const struct insn_operand_data *insn_op;
6674
6675      if (arg == error_mark_node)
6676	return NULL_RTX;
6677      if (arity > MAX_ARGS)
6678	return NULL_RTX;
6679
6680      insn_op = &insn_data[icode].operand[arity + nonvoid];
6681
6682      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6683
6684      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6685	op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6686      arity++;
6687    }
6688
6689  if (nonvoid)
6690    {
6691      machine_mode tmode = insn_data[icode].operand[0].mode;
6692      if (!target
6693	  || GET_MODE (target) != tmode
6694	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6695	target = gen_reg_rtx (tmode);
6696    }
6697
6698  switch (arity)
6699    {
6700    case 0:
6701      pat = GEN_FCN (icode) (target);
6702      break;
6703    case 1:
6704      if (nonvoid)
6705        pat = GEN_FCN (icode) (target, op[0]);
6706      else
6707	pat = GEN_FCN (icode) (op[0]);
6708      break;
6709    case 2:
6710      pat = GEN_FCN (icode) (target, op[0], op[1]);
6711      break;
6712    default:
6713      gcc_unreachable ();
6714    }
6715  if (!pat)
6716    return NULL_RTX;
6717  emit_insn (pat);
6718
6719  if (nonvoid)
6720    return target;
6721  else
6722    return const0_rtx;
6723}
6724
6725/* Fold the builtin for the CMPBGE instruction.  This is a vector comparison
6726   with an 8-bit output vector.  OPINT contains the integer operands; bit N
6727   of OP_CONST is set if OPINT[N] is valid.  */
6728
6729static tree
6730alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6731{
6732  if (op_const == 3)
6733    {
6734      int i, val;
6735      for (i = 0, val = 0; i < 8; ++i)
6736	{
6737	  unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6738	  unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6739	  if (c0 >= c1)
6740	    val |= 1 << i;
6741	}
6742      return build_int_cst (alpha_dimode_u, val);
6743    }
6744  else if (op_const == 2 && opint[1] == 0)
6745    return build_int_cst (alpha_dimode_u, 0xff);
6746  return NULL;
6747}
6748
6749/* Fold the builtin for the ZAPNOT instruction.  This is essentially a
6750   specialized form of an AND operation.  Other byte manipulation instructions
6751   are defined in terms of this instruction, so this is also used as a
6752   subroutine for other builtins.
6753
6754   OP contains the tree operands; OPINT contains the extracted integer values.
6755   Bit N of OP_CONST it set if OPINT[N] is valid.  OP may be null if only
6756   OPINT may be considered.  */
6757
6758static tree
6759alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6760			   long op_const)
6761{
6762  if (op_const & 2)
6763    {
6764      unsigned HOST_WIDE_INT mask = 0;
6765      int i;
6766
6767      for (i = 0; i < 8; ++i)
6768	if ((opint[1] >> i) & 1)
6769	  mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6770
6771      if (op_const & 1)
6772	return build_int_cst (alpha_dimode_u, opint[0] & mask);
6773
6774      if (op)
6775	return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6776			    build_int_cst (alpha_dimode_u, mask));
6777    }
6778  else if ((op_const & 1) && opint[0] == 0)
6779    return build_int_cst (alpha_dimode_u, 0);
6780  return NULL;
6781}
6782
6783/* Fold the builtins for the EXT family of instructions.  */
6784
6785static tree
6786alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6787			  long op_const, unsigned HOST_WIDE_INT bytemask,
6788			  bool is_high)
6789{
6790  long zap_const = 2;
6791  tree *zap_op = NULL;
6792
6793  if (op_const & 2)
6794    {
6795      unsigned HOST_WIDE_INT loc;
6796
6797      loc = opint[1] & 7;
6798      loc *= BITS_PER_UNIT;
6799
6800      if (loc != 0)
6801	{
6802	  if (op_const & 1)
6803	    {
6804	      unsigned HOST_WIDE_INT temp = opint[0];
6805	      if (is_high)
6806		temp <<= loc;
6807	      else
6808		temp >>= loc;
6809	      opint[0] = temp;
6810	      zap_const = 3;
6811	    }
6812	}
6813      else
6814	zap_op = op;
6815    }
6816
6817  opint[1] = bytemask;
6818  return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6819}
6820
6821/* Fold the builtins for the INS family of instructions.  */
6822
6823static tree
6824alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6825			  long op_const, unsigned HOST_WIDE_INT bytemask,
6826			  bool is_high)
6827{
6828  if ((op_const & 1) && opint[0] == 0)
6829    return build_int_cst (alpha_dimode_u, 0);
6830
6831  if (op_const & 2)
6832    {
6833      unsigned HOST_WIDE_INT temp, loc, byteloc;
6834      tree *zap_op = NULL;
6835
6836      loc = opint[1] & 7;
6837      bytemask <<= loc;
6838
6839      temp = opint[0];
6840      if (is_high)
6841	{
6842	  byteloc = (64 - (loc * 8)) & 0x3f;
6843	  if (byteloc == 0)
6844	    zap_op = op;
6845	  else
6846	    temp >>= byteloc;
6847	  bytemask >>= 8;
6848	}
6849      else
6850	{
6851	  byteloc = loc * 8;
6852	  if (byteloc == 0)
6853	    zap_op = op;
6854	  else
6855	    temp <<= byteloc;
6856	}
6857
6858      opint[0] = temp;
6859      opint[1] = bytemask;
6860      return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6861    }
6862
6863  return NULL;
6864}
6865
6866static tree
6867alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6868			  long op_const, unsigned HOST_WIDE_INT bytemask,
6869			  bool is_high)
6870{
6871  if (op_const & 2)
6872    {
6873      unsigned HOST_WIDE_INT loc;
6874
6875      loc = opint[1] & 7;
6876      bytemask <<= loc;
6877
6878      if (is_high)
6879	bytemask >>= 8;
6880
6881      opint[1] = bytemask ^ 0xff;
6882    }
6883
6884  return alpha_fold_builtin_zapnot (op, opint, op_const);
6885}
6886
6887static tree
6888alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6889{
6890  tree op0 = fold_convert (vtype, op[0]);
6891  tree op1 = fold_convert (vtype, op[1]);
6892  tree val = fold_build2 (code, vtype, op0, op1);
6893  return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6894}
6895
6896static tree
6897alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6898{
6899  unsigned HOST_WIDE_INT temp = 0;
6900  int i;
6901
6902  if (op_const != 3)
6903    return NULL;
6904
6905  for (i = 0; i < 8; ++i)
6906    {
6907      unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6908      unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6909      if (a >= b)
6910	temp += a - b;
6911      else
6912	temp += b - a;
6913    }
6914
6915  return build_int_cst (alpha_dimode_u, temp);
6916}
6917
6918static tree
6919alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6920{
6921  unsigned HOST_WIDE_INT temp;
6922
6923  if (op_const == 0)
6924    return NULL;
6925
6926  temp = opint[0] & 0xff;
6927  temp |= (opint[0] >> 24) & 0xff00;
6928
6929  return build_int_cst (alpha_dimode_u, temp);
6930}
6931
6932static tree
6933alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6934{
6935  unsigned HOST_WIDE_INT temp;
6936
6937  if (op_const == 0)
6938    return NULL;
6939
6940  temp = opint[0] & 0xff;
6941  temp |= (opint[0] >>  8) & 0xff00;
6942  temp |= (opint[0] >> 16) & 0xff0000;
6943  temp |= (opint[0] >> 24) & 0xff000000;
6944
6945  return build_int_cst (alpha_dimode_u, temp);
6946}
6947
6948static tree
6949alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6950{
6951  unsigned HOST_WIDE_INT temp;
6952
6953  if (op_const == 0)
6954    return NULL;
6955
6956  temp = opint[0] & 0xff;
6957  temp |= (opint[0] & 0xff00) << 24;
6958
6959  return build_int_cst (alpha_dimode_u, temp);
6960}
6961
6962static tree
6963alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6964{
6965  unsigned HOST_WIDE_INT temp;
6966
6967  if (op_const == 0)
6968    return NULL;
6969
6970  temp = opint[0] & 0xff;
6971  temp |= (opint[0] & 0x0000ff00) << 8;
6972  temp |= (opint[0] & 0x00ff0000) << 16;
6973  temp |= (opint[0] & 0xff000000) << 24;
6974
6975  return build_int_cst (alpha_dimode_u, temp);
6976}
6977
6978static tree
6979alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6980{
6981  unsigned HOST_WIDE_INT temp;
6982
6983  if (op_const == 0)
6984    return NULL;
6985
6986  if (opint[0] == 0)
6987    temp = 64;
6988  else
6989    temp = exact_log2 (opint[0] & -opint[0]);
6990
6991  return build_int_cst (alpha_dimode_u, temp);
6992}
6993
6994static tree
6995alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6996{
6997  unsigned HOST_WIDE_INT temp;
6998
6999  if (op_const == 0)
7000    return NULL;
7001
7002  if (opint[0] == 0)
7003    temp = 64;
7004  else
7005    temp = 64 - floor_log2 (opint[0]) - 1;
7006
7007  return build_int_cst (alpha_dimode_u, temp);
7008}
7009
7010static tree
7011alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
7012{
7013  unsigned HOST_WIDE_INT temp, op;
7014
7015  if (op_const == 0)
7016    return NULL;
7017
7018  op = opint[0];
7019  temp = 0;
7020  while (op)
7021    temp++, op &= op - 1;
7022
7023  return build_int_cst (alpha_dimode_u, temp);
7024}
7025
7026/* Fold one of our builtin functions.  */
7027
7028static tree
7029alpha_fold_builtin (tree fndecl, int n_args, tree *op,
7030		    bool ignore ATTRIBUTE_UNUSED)
7031{
7032  unsigned HOST_WIDE_INT opint[MAX_ARGS];
7033  long op_const = 0;
7034  int i;
7035
7036  if (n_args > MAX_ARGS)
7037    return NULL;
7038
7039  for (i = 0; i < n_args; i++)
7040    {
7041      tree arg = op[i];
7042      if (arg == error_mark_node)
7043	return NULL;
7044
7045      opint[i] = 0;
7046      if (TREE_CODE (arg) == INTEGER_CST)
7047	{
7048          op_const |= 1L << i;
7049	  opint[i] = int_cst_value (arg);
7050	}
7051    }
7052
7053  switch (DECL_FUNCTION_CODE (fndecl))
7054    {
7055    case ALPHA_BUILTIN_CMPBGE:
7056      return alpha_fold_builtin_cmpbge (opint, op_const);
7057
7058    case ALPHA_BUILTIN_EXTBL:
7059      return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7060    case ALPHA_BUILTIN_EXTWL:
7061      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7062    case ALPHA_BUILTIN_EXTLL:
7063      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7064    case ALPHA_BUILTIN_EXTQL:
7065      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7066    case ALPHA_BUILTIN_EXTWH:
7067      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7068    case ALPHA_BUILTIN_EXTLH:
7069      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7070    case ALPHA_BUILTIN_EXTQH:
7071      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7072
7073    case ALPHA_BUILTIN_INSBL:
7074      return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7075    case ALPHA_BUILTIN_INSWL:
7076      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7077    case ALPHA_BUILTIN_INSLL:
7078      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7079    case ALPHA_BUILTIN_INSQL:
7080      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7081    case ALPHA_BUILTIN_INSWH:
7082      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7083    case ALPHA_BUILTIN_INSLH:
7084      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7085    case ALPHA_BUILTIN_INSQH:
7086      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7087
7088    case ALPHA_BUILTIN_MSKBL:
7089      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7090    case ALPHA_BUILTIN_MSKWL:
7091      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7092    case ALPHA_BUILTIN_MSKLL:
7093      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7094    case ALPHA_BUILTIN_MSKQL:
7095      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7096    case ALPHA_BUILTIN_MSKWH:
7097      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7098    case ALPHA_BUILTIN_MSKLH:
7099      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7100    case ALPHA_BUILTIN_MSKQH:
7101      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7102
7103    case ALPHA_BUILTIN_ZAP:
7104      opint[1] ^= 0xff;
7105      /* FALLTHRU */
7106    case ALPHA_BUILTIN_ZAPNOT:
7107      return alpha_fold_builtin_zapnot (op, opint, op_const);
7108
7109    case ALPHA_BUILTIN_MINUB8:
7110      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7111    case ALPHA_BUILTIN_MINSB8:
7112      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7113    case ALPHA_BUILTIN_MINUW4:
7114      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7115    case ALPHA_BUILTIN_MINSW4:
7116      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7117    case ALPHA_BUILTIN_MAXUB8:
7118      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7119    case ALPHA_BUILTIN_MAXSB8:
7120      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7121    case ALPHA_BUILTIN_MAXUW4:
7122      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7123    case ALPHA_BUILTIN_MAXSW4:
7124      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7125
7126    case ALPHA_BUILTIN_PERR:
7127      return alpha_fold_builtin_perr (opint, op_const);
7128    case ALPHA_BUILTIN_PKLB:
7129      return alpha_fold_builtin_pklb (opint, op_const);
7130    case ALPHA_BUILTIN_PKWB:
7131      return alpha_fold_builtin_pkwb (opint, op_const);
7132    case ALPHA_BUILTIN_UNPKBL:
7133      return alpha_fold_builtin_unpkbl (opint, op_const);
7134    case ALPHA_BUILTIN_UNPKBW:
7135      return alpha_fold_builtin_unpkbw (opint, op_const);
7136
7137    case ALPHA_BUILTIN_CTTZ:
7138      return alpha_fold_builtin_cttz (opint, op_const);
7139    case ALPHA_BUILTIN_CTLZ:
7140      return alpha_fold_builtin_ctlz (opint, op_const);
7141    case ALPHA_BUILTIN_CTPOP:
7142      return alpha_fold_builtin_ctpop (opint, op_const);
7143
7144    case ALPHA_BUILTIN_AMASK:
7145    case ALPHA_BUILTIN_IMPLVER:
7146    case ALPHA_BUILTIN_RPCC:
7147      /* None of these are foldable at compile-time.  */
7148    default:
7149      return NULL;
7150    }
7151}
7152
7153bool
7154alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi)
7155{
7156  bool changed = false;
7157  gimple *stmt = gsi_stmt (*gsi);
7158  tree call = gimple_call_fn (stmt);
7159  gimple *new_stmt = NULL;
7160
7161  if (call)
7162    {
7163      tree fndecl = gimple_call_fndecl (stmt);
7164
7165      if (fndecl)
7166	{
7167	  tree arg0, arg1;
7168
7169	  switch (DECL_FUNCTION_CODE (fndecl))
7170	    {
7171	    case ALPHA_BUILTIN_UMULH:
7172	      arg0 = gimple_call_arg (stmt, 0);
7173	      arg1 = gimple_call_arg (stmt, 1);
7174
7175	      new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
7176					      MULT_HIGHPART_EXPR, arg0, arg1);
7177	      break;
7178	    default:
7179	      break;
7180	    }
7181	}
7182    }
7183
7184  if (new_stmt)
7185    {
7186      gsi_replace (gsi, new_stmt, true);
7187      changed = true;
7188    }
7189
7190  return changed;
7191}
7192
7193/* This page contains routines that are used to determine what the function
7194   prologue and epilogue code will do and write them out.  */
7195
7196/* Compute the size of the save area in the stack.  */
7197
7198/* These variables are used for communication between the following functions.
7199   They indicate various things about the current function being compiled
7200   that are used to tell what kind of prologue, epilogue and procedure
7201   descriptor to generate.  */
7202
7203/* Nonzero if we need a stack procedure.  */
7204enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7205static enum alpha_procedure_types alpha_procedure_type;
7206
7207/* Register number (either FP or SP) that is used to unwind the frame.  */
7208static int vms_unwind_regno;
7209
7210/* Register number used to save FP.  We need not have one for RA since
7211   we don't modify it for register procedures.  This is only defined
7212   for register frame procedures.  */
7213static int vms_save_fp_regno;
7214
7215/* Register number used to reference objects off our PV.  */
7216static int vms_base_regno;
7217
7218/* Compute register masks for saved registers.  */
7219
7220static void
7221alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7222{
7223  unsigned long imask = 0;
7224  unsigned long fmask = 0;
7225  unsigned int i;
7226
7227  /* When outputting a thunk, we don't have valid register life info,
7228     but assemble_start_function wants to output .frame and .mask
7229     directives.  */
7230  if (cfun->is_thunk)
7231    {
7232      *imaskP = 0;
7233      *fmaskP = 0;
7234      return;
7235    }
7236
7237  if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7238    imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7239
7240  /* One for every register we have to save.  */
7241  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7242    if (! fixed_regs[i] && ! call_used_regs[i]
7243	&& df_regs_ever_live_p (i) && i != REG_RA)
7244      {
7245	if (i < 32)
7246	  imask |= (1UL << i);
7247	else
7248	  fmask |= (1UL << (i - 32));
7249      }
7250
7251  /* We need to restore these for the handler.  */
7252  if (crtl->calls_eh_return)
7253    {
7254      for (i = 0; ; ++i)
7255	{
7256	  unsigned regno = EH_RETURN_DATA_REGNO (i);
7257	  if (regno == INVALID_REGNUM)
7258	    break;
7259	  imask |= 1UL << regno;
7260	}
7261    }
7262
7263  /* If any register spilled, then spill the return address also.  */
7264  /* ??? This is required by the Digital stack unwind specification
7265     and isn't needed if we're doing Dwarf2 unwinding.  */
7266  if (imask || fmask || alpha_ra_ever_killed ())
7267    imask |= (1UL << REG_RA);
7268
7269  *imaskP = imask;
7270  *fmaskP = fmask;
7271}
7272
7273int
7274alpha_sa_size (void)
7275{
7276  unsigned long mask[2];
7277  int sa_size = 0;
7278  int i, j;
7279
7280  alpha_sa_mask (&mask[0], &mask[1]);
7281
7282  for (j = 0; j < 2; ++j)
7283    for (i = 0; i < 32; ++i)
7284      if ((mask[j] >> i) & 1)
7285	sa_size++;
7286
7287  if (TARGET_ABI_OPEN_VMS)
7288    {
7289      /* Start with a stack procedure if we make any calls (REG_RA used), or
7290	 need a frame pointer, with a register procedure if we otherwise need
7291	 at least a slot, and with a null procedure in other cases.  */
7292      if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
7293	alpha_procedure_type = PT_STACK;
7294      else if (get_frame_size() != 0)
7295	alpha_procedure_type = PT_REGISTER;
7296      else
7297	alpha_procedure_type = PT_NULL;
7298
7299      /* Don't reserve space for saving FP & RA yet.  Do that later after we've
7300	 made the final decision on stack procedure vs register procedure.  */
7301      if (alpha_procedure_type == PT_STACK)
7302	sa_size -= 2;
7303
7304      /* Decide whether to refer to objects off our PV via FP or PV.
7305	 If we need FP for something else or if we receive a nonlocal
7306	 goto (which expects PV to contain the value), we must use PV.
7307	 Otherwise, start by assuming we can use FP.  */
7308
7309      vms_base_regno
7310	= (frame_pointer_needed
7311	   || cfun->has_nonlocal_label
7312	   || alpha_procedure_type == PT_STACK
7313	   || crtl->outgoing_args_size)
7314	  ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7315
7316      /* If we want to copy PV into FP, we need to find some register
7317	 in which to save FP.  */
7318
7319      vms_save_fp_regno = -1;
7320      if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7321	for (i = 0; i < 32; i++)
7322	  if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
7323	    vms_save_fp_regno = i;
7324
7325      /* A VMS condition handler requires a stack procedure in our
7326	 implementation. (not required by the calling standard).  */
7327      if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7328	  || cfun->machine->uses_condition_handler)
7329	vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7330      else if (alpha_procedure_type == PT_NULL)
7331	vms_base_regno = REG_PV;
7332
7333      /* Stack unwinding should be done via FP unless we use it for PV.  */
7334      vms_unwind_regno = (vms_base_regno == REG_PV
7335			  ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7336
7337      /* If this is a stack procedure, allow space for saving FP, RA and
7338	 a condition handler slot if needed.  */
7339      if (alpha_procedure_type == PT_STACK)
7340	sa_size += 2 + cfun->machine->uses_condition_handler;
7341    }
7342  else
7343    {
7344      /* Our size must be even (multiple of 16 bytes).  */
7345      if (sa_size & 1)
7346	sa_size++;
7347    }
7348
7349  return sa_size * 8;
7350}
7351
7352/* Define the offset between two registers, one to be eliminated,
7353   and the other its replacement, at the start of a routine.  */
7354
7355HOST_WIDE_INT
7356alpha_initial_elimination_offset (unsigned int from,
7357				  unsigned int to ATTRIBUTE_UNUSED)
7358{
7359  HOST_WIDE_INT ret;
7360
7361  ret = alpha_sa_size ();
7362  ret += ALPHA_ROUND (crtl->outgoing_args_size);
7363
7364  switch (from)
7365    {
7366    case FRAME_POINTER_REGNUM:
7367      break;
7368
7369    case ARG_POINTER_REGNUM:
7370      ret += (ALPHA_ROUND (get_frame_size ()
7371			   + crtl->args.pretend_args_size)
7372	      - crtl->args.pretend_args_size);
7373      break;
7374
7375    default:
7376      gcc_unreachable ();
7377    }
7378
7379  return ret;
7380}
7381
7382#if TARGET_ABI_OPEN_VMS
7383
7384/* Worker function for TARGET_CAN_ELIMINATE.  */
7385
7386static bool
7387alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7388{
7389  /* We need the alpha_procedure_type to decide. Evaluate it now.  */
7390  alpha_sa_size ();
7391
7392  switch (alpha_procedure_type)
7393    {
7394    case PT_NULL:
7395      /* NULL procedures have no frame of their own and we only
7396	 know how to resolve from the current stack pointer.  */
7397      return to == STACK_POINTER_REGNUM;
7398
7399    case PT_REGISTER:
7400    case PT_STACK:
7401      /* We always eliminate except to the stack pointer if there is no
7402	 usable frame pointer at hand.  */
7403      return (to != STACK_POINTER_REGNUM
7404	      || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7405    }
7406
7407  gcc_unreachable ();
7408}
7409
7410/* FROM is to be eliminated for TO. Return the offset so that TO+offset
7411   designates the same location as FROM.  */
7412
7413HOST_WIDE_INT
7414alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7415{
7416  /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7417     HARD_FRAME or STACK_PTR.  We need the alpha_procedure_type to decide
7418     on the proper computations and will need the register save area size
7419     in most cases.  */
7420
7421  HOST_WIDE_INT sa_size = alpha_sa_size ();
7422
7423  /* PT_NULL procedures have no frame of their own and we only allow
7424     elimination to the stack pointer. This is the argument pointer and we
7425     resolve the soft frame pointer to that as well.  */
7426
7427  if (alpha_procedure_type == PT_NULL)
7428    return 0;
7429
7430  /* For a PT_STACK procedure the frame layout looks as follows
7431
7432                      -----> decreasing addresses
7433
7434		   <             size rounded up to 16       |   likewise   >
7435     --------------#------------------------------+++--------------+++-------#
7436     incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7437     --------------#---------------------------------------------------------#
7438                                   ^         ^              ^               ^
7439			      ARG_PTR FRAME_PTR HARD_FRAME_PTR       STACK_PTR
7440
7441
7442     PT_REGISTER procedures are similar in that they may have a frame of their
7443     own. They have no regs-sa/pv/outgoing-args area.
7444
7445     We first compute offset to HARD_FRAME_PTR, then add what we need to get
7446     to STACK_PTR if need be.  */
7447
7448  {
7449    HOST_WIDE_INT offset;
7450    HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7451
7452    switch (from)
7453      {
7454      case FRAME_POINTER_REGNUM:
7455	offset = ALPHA_ROUND (sa_size + pv_save_size);
7456	break;
7457      case ARG_POINTER_REGNUM:
7458	offset = (ALPHA_ROUND (sa_size + pv_save_size
7459			       + get_frame_size ()
7460			       + crtl->args.pretend_args_size)
7461		  - crtl->args.pretend_args_size);
7462	break;
7463      default:
7464	gcc_unreachable ();
7465      }
7466
7467    if (to == STACK_POINTER_REGNUM)
7468      offset += ALPHA_ROUND (crtl->outgoing_args_size);
7469
7470    return offset;
7471  }
7472}
7473
7474#define COMMON_OBJECT "common_object"
7475
7476static tree
7477common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7478		       tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7479		       bool *no_add_attrs ATTRIBUTE_UNUSED)
7480{
7481  tree decl = *node;
7482  gcc_assert (DECL_P (decl));
7483
7484  DECL_COMMON (decl) = 1;
7485  return NULL_TREE;
7486}
7487
7488static const struct attribute_spec vms_attribute_table[] =
7489{
7490  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7491       affects_type_identity } */
7492  { COMMON_OBJECT,   0, 1, true,  false, false, common_object_handler, false },
7493  { NULL,            0, 0, false, false, false, NULL, false }
7494};
7495
7496void
7497vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7498			       unsigned HOST_WIDE_INT size,
7499			       unsigned int align)
7500{
7501  tree attr = DECL_ATTRIBUTES (decl);
7502  fprintf (file, "%s", COMMON_ASM_OP);
7503  assemble_name (file, name);
7504  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7505  /* ??? Unlike on OSF/1, the alignment factor is not in log units.  */
7506  fprintf (file, ",%u", align / BITS_PER_UNIT);
7507  if (attr)
7508    {
7509      attr = lookup_attribute (COMMON_OBJECT, attr);
7510      if (attr)
7511        fprintf (file, ",%s",
7512		 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7513    }
7514  fputc ('\n', file);
7515}
7516
7517#undef COMMON_OBJECT
7518
7519#endif
7520
7521bool
7522alpha_find_lo_sum_using_gp (rtx insn)
7523{
7524  subrtx_iterator::array_type array;
7525  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
7526    {
7527      const_rtx x = *iter;
7528      if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx)
7529	return true;
7530    }
7531  return false;
7532}
7533
7534static int
7535alpha_does_function_need_gp (void)
7536{
7537  rtx_insn *insn;
7538
7539  /* The GP being variable is an OSF abi thing.  */
7540  if (! TARGET_ABI_OSF)
7541    return 0;
7542
7543  /* We need the gp to load the address of __mcount.  */
7544  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7545    return 1;
7546
7547  /* The code emitted by alpha_output_mi_thunk_osf uses the gp.  */
7548  if (cfun->is_thunk)
7549    return 1;
7550
7551  /* The nonlocal receiver pattern assumes that the gp is valid for
7552     the nested function.  Reasonable because it's almost always set
7553     correctly already.  For the cases where that's wrong, make sure
7554     the nested function loads its gp on entry.  */
7555  if (crtl->has_nonlocal_goto)
7556    return 1;
7557
7558  /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7559     Even if we are a static function, we still need to do this in case
7560     our address is taken and passed to something like qsort.  */
7561
7562  push_topmost_sequence ();
7563  insn = get_insns ();
7564  pop_topmost_sequence ();
7565
7566  for (; insn; insn = NEXT_INSN (insn))
7567    if (NONDEBUG_INSN_P (insn)
7568	&& GET_CODE (PATTERN (insn)) != USE
7569	&& GET_CODE (PATTERN (insn)) != CLOBBER
7570	&& get_attr_usegp (insn))
7571      return 1;
7572
7573  return 0;
7574}
7575
7576
7577/* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7578   sequences.  */
7579
7580static rtx_insn *
7581set_frame_related_p (void)
7582{
7583  rtx_insn *seq = get_insns ();
7584  rtx_insn *insn;
7585
7586  end_sequence ();
7587
7588  if (!seq)
7589    return NULL;
7590
7591  if (INSN_P (seq))
7592    {
7593      insn = seq;
7594      while (insn != NULL_RTX)
7595	{
7596	  RTX_FRAME_RELATED_P (insn) = 1;
7597	  insn = NEXT_INSN (insn);
7598	}
7599      seq = emit_insn (seq);
7600    }
7601  else
7602    {
7603      seq = emit_insn (seq);
7604      RTX_FRAME_RELATED_P (seq) = 1;
7605    }
7606  return seq;
7607}
7608
7609#define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
7610
7611/* Generates a store with the proper unwind info attached.  VALUE is
7612   stored at BASE_REG+BASE_OFS.  If FRAME_BIAS is nonzero, then BASE_REG
7613   contains SP+FRAME_BIAS, and that is the unwind info that should be
7614   generated.  If FRAME_REG != VALUE, then VALUE is being stored on
7615   behalf of FRAME_REG, and FRAME_REG should be present in the unwind.  */
7616
7617static void
7618emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7619		    HOST_WIDE_INT base_ofs, rtx frame_reg)
7620{
7621  rtx addr, mem;
7622  rtx_insn *insn;
7623
7624  addr = plus_constant (Pmode, base_reg, base_ofs);
7625  mem = gen_frame_mem (DImode, addr);
7626
7627  insn = emit_move_insn (mem, value);
7628  RTX_FRAME_RELATED_P (insn) = 1;
7629
7630  if (frame_bias || value != frame_reg)
7631    {
7632      if (frame_bias)
7633	{
7634	  addr = plus_constant (Pmode, stack_pointer_rtx,
7635			        frame_bias + base_ofs);
7636	  mem = gen_rtx_MEM (DImode, addr);
7637	}
7638
7639      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7640		    gen_rtx_SET (mem, frame_reg));
7641    }
7642}
7643
7644static void
7645emit_frame_store (unsigned int regno, rtx base_reg,
7646		  HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7647{
7648  rtx reg = gen_rtx_REG (DImode, regno);
7649  emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7650}
7651
7652/* Compute the frame size.  SIZE is the size of the "naked" frame
7653   and SA_SIZE is the size of the register save area.  */
7654
7655static HOST_WIDE_INT
7656compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
7657{
7658  if (TARGET_ABI_OPEN_VMS)
7659    return ALPHA_ROUND (sa_size
7660			+ (alpha_procedure_type == PT_STACK ? 8 : 0)
7661			+ size
7662			+ crtl->args.pretend_args_size);
7663  else
7664    return ALPHA_ROUND (crtl->outgoing_args_size)
7665	   + sa_size
7666	   + ALPHA_ROUND (size
7667			  + crtl->args.pretend_args_size);
7668}
7669
7670/* Write function prologue.  */
7671
7672/* On vms we have two kinds of functions:
7673
7674   - stack frame (PROC_STACK)
7675	these are 'normal' functions with local vars and which are
7676	calling other functions
7677   - register frame (PROC_REGISTER)
7678	keeps all data in registers, needs no stack
7679
7680   We must pass this to the assembler so it can generate the
7681   proper pdsc (procedure descriptor)
7682   This is done with the '.pdesc' command.
7683
7684   On not-vms, we don't really differentiate between the two, as we can
7685   simply allocate stack without saving registers.  */
7686
7687void
7688alpha_expand_prologue (void)
7689{
7690  /* Registers to save.  */
7691  unsigned long imask = 0;
7692  unsigned long fmask = 0;
7693  /* Stack space needed for pushing registers clobbered by us.  */
7694  HOST_WIDE_INT sa_size, sa_bias;
7695  /* Complete stack size needed.  */
7696  HOST_WIDE_INT frame_size;
7697  /* Probed stack size; it additionally includes the size of
7698     the "reserve region" if any.  */
7699  HOST_WIDE_INT probed_size;
7700  /* Offset from base reg to register save area.  */
7701  HOST_WIDE_INT reg_offset;
7702  rtx sa_reg;
7703  int i;
7704
7705  sa_size = alpha_sa_size ();
7706  frame_size = compute_frame_size (get_frame_size (), sa_size);
7707
7708  if (flag_stack_usage_info)
7709    current_function_static_stack_size = frame_size;
7710
7711  if (TARGET_ABI_OPEN_VMS)
7712    reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7713  else
7714    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7715
7716  alpha_sa_mask (&imask, &fmask);
7717
7718  /* Emit an insn to reload GP, if needed.  */
7719  if (TARGET_ABI_OSF)
7720    {
7721      alpha_function_needs_gp = alpha_does_function_need_gp ();
7722      if (alpha_function_needs_gp)
7723	emit_insn (gen_prologue_ldgp ());
7724    }
7725
7726  /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7727     the call to mcount ourselves, rather than having the linker do it
7728     magically in response to -pg.  Since _mcount has special linkage,
7729     don't represent the call as a call.  */
7730  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7731    emit_insn (gen_prologue_mcount ());
7732
7733  /* Adjust the stack by the frame size.  If the frame size is > 4096
7734     bytes, we need to be sure we probe somewhere in the first and last
7735     4096 bytes (we can probably get away without the latter test) and
7736     every 8192 bytes in between.  If the frame size is > 32768, we
7737     do this in a loop.  Otherwise, we generate the explicit probe
7738     instructions.
7739
7740     Note that we are only allowed to adjust sp once in the prologue.  */
7741
7742  probed_size = frame_size;
7743  if (flag_stack_check)
7744    probed_size += STACK_CHECK_PROTECT;
7745
7746  if (probed_size <= 32768)
7747    {
7748      if (probed_size > 4096)
7749	{
7750	  int probed;
7751
7752	  for (probed = 4096; probed < probed_size; probed += 8192)
7753	    emit_insn (gen_stack_probe_internal (GEN_INT (-probed)));
7754
7755	  /* We only have to do this probe if we aren't saving registers or
7756	     if we are probing beyond the frame because of -fstack-check.  */
7757	  if ((sa_size == 0 && probed_size > probed - 4096)
7758	      || flag_stack_check)
7759	    emit_insn (gen_stack_probe_internal (GEN_INT (-probed_size)));
7760	}
7761
7762      if (frame_size != 0)
7763	FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7764				    GEN_INT (-frame_size))));
7765    }
7766  else
7767    {
7768      /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7769	 number of 8192 byte blocks to probe.  We then probe each block
7770	 in the loop and then set SP to the proper location.  If the
7771	 amount remaining is > 4096, we have to do one more probe if we
7772	 are not saving any registers or if we are probing beyond the
7773	 frame because of -fstack-check.  */
7774
7775      HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7776      HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7777      rtx ptr = gen_rtx_REG (DImode, 22);
7778      rtx count = gen_rtx_REG (DImode, 23);
7779      rtx seq;
7780
7781      emit_move_insn (count, GEN_INT (blocks));
7782      emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7783
7784      /* Because of the difficulty in emitting a new basic block this
7785	 late in the compilation, generate the loop as a single insn.  */
7786      emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7787
7788      if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
7789	{
7790	  rtx last = gen_rtx_MEM (DImode,
7791				  plus_constant (Pmode, ptr, -leftover));
7792	  MEM_VOLATILE_P (last) = 1;
7793	  emit_move_insn (last, const0_rtx);
7794	}
7795
7796      if (flag_stack_check)
7797	{
7798	  /* If -fstack-check is specified we have to load the entire
7799	     constant into a register and subtract from the sp in one go,
7800	     because the probed stack size is not equal to the frame size.  */
7801	  HOST_WIDE_INT lo, hi;
7802	  lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7803	  hi = frame_size - lo;
7804
7805	  emit_move_insn (ptr, GEN_INT (hi));
7806	  emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7807	  seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7808				       ptr));
7809	}
7810      else
7811	{
7812	  seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7813				       GEN_INT (-leftover)));
7814	}
7815
7816      /* This alternative is special, because the DWARF code cannot
7817         possibly intuit through the loop above.  So we invent this
7818         note it looks at instead.  */
7819      RTX_FRAME_RELATED_P (seq) = 1;
7820      add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7821		    gen_rtx_SET (stack_pointer_rtx,
7822				 plus_constant (Pmode, stack_pointer_rtx,
7823						-frame_size)));
7824    }
7825
7826  /* Cope with very large offsets to the register save area.  */
7827  sa_bias = 0;
7828  sa_reg = stack_pointer_rtx;
7829  if (reg_offset + sa_size > 0x8000)
7830    {
7831      int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7832      rtx sa_bias_rtx;
7833
7834      if (low + sa_size <= 0x8000)
7835	sa_bias = reg_offset - low, reg_offset = low;
7836      else
7837	sa_bias = reg_offset, reg_offset = 0;
7838
7839      sa_reg = gen_rtx_REG (DImode, 24);
7840      sa_bias_rtx = GEN_INT (sa_bias);
7841
7842      if (add_operand (sa_bias_rtx, DImode))
7843	emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7844      else
7845	{
7846	  emit_move_insn (sa_reg, sa_bias_rtx);
7847	  emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7848	}
7849    }
7850
7851  /* Save regs in stack order.  Beginning with VMS PV.  */
7852  if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7853    emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7854
7855  /* Save register RA next.  */
7856  if (imask & (1UL << REG_RA))
7857    {
7858      emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7859      imask &= ~(1UL << REG_RA);
7860      reg_offset += 8;
7861    }
7862
7863  /* Now save any other registers required to be saved.  */
7864  for (i = 0; i < 31; i++)
7865    if (imask & (1UL << i))
7866      {
7867	emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7868	reg_offset += 8;
7869      }
7870
7871  for (i = 0; i < 31; i++)
7872    if (fmask & (1UL << i))
7873      {
7874	emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7875	reg_offset += 8;
7876      }
7877
7878  if (TARGET_ABI_OPEN_VMS)
7879    {
7880      /* Register frame procedures save the fp.  */
7881      if (alpha_procedure_type == PT_REGISTER)
7882	{
7883	  rtx_insn *insn =
7884	    emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7885			    hard_frame_pointer_rtx);
7886	  add_reg_note (insn, REG_CFA_REGISTER, NULL);
7887	  RTX_FRAME_RELATED_P (insn) = 1;
7888	}
7889
7890      if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7891	emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7892				    gen_rtx_REG (DImode, REG_PV)));
7893
7894      if (alpha_procedure_type != PT_NULL
7895	  && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7896	FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7897
7898      /* If we have to allocate space for outgoing args, do it now.  */
7899      if (crtl->outgoing_args_size != 0)
7900	{
7901	  rtx_insn *seq
7902	    = emit_move_insn (stack_pointer_rtx,
7903			      plus_constant
7904			      (Pmode, hard_frame_pointer_rtx,
7905			       - (ALPHA_ROUND
7906				  (crtl->outgoing_args_size))));
7907
7908	  /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7909	     if ! frame_pointer_needed. Setting the bit will change the CFA
7910	     computation rule to use sp again, which would be wrong if we had
7911	     frame_pointer_needed, as this means sp might move unpredictably
7912	     later on.
7913
7914	     Also, note that
7915	       frame_pointer_needed
7916	       => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7917	     and
7918	       crtl->outgoing_args_size != 0
7919	       => alpha_procedure_type != PT_NULL,
7920
7921	     so when we are not setting the bit here, we are guaranteed to
7922	     have emitted an FRP frame pointer update just before.  */
7923	  RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7924	}
7925    }
7926  else
7927    {
7928      /* If we need a frame pointer, set it from the stack pointer.  */
7929      if (frame_pointer_needed)
7930	{
7931	  if (TARGET_CAN_FAULT_IN_PROLOGUE)
7932	    FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7933	  else
7934	    /* This must always be the last instruction in the
7935	       prologue, thus we emit a special move + clobber.  */
7936	      FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7937				           stack_pointer_rtx, sa_reg)));
7938	}
7939    }
7940
7941  /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7942     the prologue, for exception handling reasons, we cannot do this for
7943     any insn that might fault.  We could prevent this for mems with a
7944     (clobber:BLK (scratch)), but this doesn't work for fp insns.  So we
7945     have to prevent all such scheduling with a blockage.
7946
7947     Linux, on the other hand, never bothered to implement OSF/1's
7948     exception handling, and so doesn't care about such things.  Anyone
7949     planning to use dwarf2 frame-unwind info can also omit the blockage.  */
7950
7951  if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7952    emit_insn (gen_blockage ());
7953}
7954
7955/* Count the number of .file directives, so that .loc is up to date.  */
7956int num_source_filenames = 0;
7957
7958/* Output the textual info surrounding the prologue.  */
7959
7960void
7961alpha_start_function (FILE *file, const char *fnname,
7962		      tree decl ATTRIBUTE_UNUSED)
7963{
7964  unsigned long imask = 0;
7965  unsigned long fmask = 0;
7966  /* Stack space needed for pushing registers clobbered by us.  */
7967  HOST_WIDE_INT sa_size;
7968  /* Complete stack size needed.  */
7969  unsigned HOST_WIDE_INT frame_size;
7970  /* The maximum debuggable frame size.  */
7971  unsigned HOST_WIDE_INT max_frame_size = 1UL << 31;
7972  /* Offset from base reg to register save area.  */
7973  HOST_WIDE_INT reg_offset;
7974  char *entry_label = (char *) alloca (strlen (fnname) + 6);
7975  char *tramp_label = (char *) alloca (strlen (fnname) + 6);
7976  int i;
7977
7978#if TARGET_ABI_OPEN_VMS
7979  vms_start_function (fnname);
7980#endif
7981
7982  alpha_fnname = fnname;
7983  sa_size = alpha_sa_size ();
7984  frame_size = compute_frame_size (get_frame_size (), sa_size);
7985
7986  if (TARGET_ABI_OPEN_VMS)
7987    reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7988  else
7989    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7990
7991  alpha_sa_mask (&imask, &fmask);
7992
7993  /* Issue function start and label.  */
7994  if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
7995    {
7996      fputs ("\t.ent ", file);
7997      assemble_name (file, fnname);
7998      putc ('\n', file);
7999
8000      /* If the function needs GP, we'll write the "..ng" label there.
8001	 Otherwise, do it here.  */
8002      if (TARGET_ABI_OSF
8003          && ! alpha_function_needs_gp
8004	  && ! cfun->is_thunk)
8005	{
8006	  putc ('$', file);
8007	  assemble_name (file, fnname);
8008	  fputs ("..ng:\n", file);
8009	}
8010    }
8011  /* Nested functions on VMS that are potentially called via trampoline
8012     get a special transfer entry point that loads the called functions
8013     procedure descriptor and static chain.  */
8014   if (TARGET_ABI_OPEN_VMS
8015       && !TREE_PUBLIC (decl)
8016       && DECL_CONTEXT (decl)
8017       && !TYPE_P (DECL_CONTEXT (decl))
8018       && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
8019     {
8020	strcpy (tramp_label, fnname);
8021	strcat (tramp_label, "..tr");
8022	ASM_OUTPUT_LABEL (file, tramp_label);
8023	fprintf (file, "\tldq $1,24($27)\n");
8024	fprintf (file, "\tldq $27,16($27)\n");
8025     }
8026
8027  strcpy (entry_label, fnname);
8028  if (TARGET_ABI_OPEN_VMS)
8029    strcat (entry_label, "..en");
8030
8031  ASM_OUTPUT_LABEL (file, entry_label);
8032  inside_function = TRUE;
8033
8034  if (TARGET_ABI_OPEN_VMS)
8035    fprintf (file, "\t.base $%d\n", vms_base_regno);
8036
8037  if (TARGET_ABI_OSF
8038      && TARGET_IEEE_CONFORMANT
8039      && !flag_inhibit_size_directive)
8040    {
8041      /* Set flags in procedure descriptor to request IEEE-conformant
8042	 math-library routines.  The value we set it to is PDSC_EXC_IEEE
8043	 (/usr/include/pdsc.h).  */
8044      fputs ("\t.eflag 48\n", file);
8045    }
8046
8047  /* Set up offsets to alpha virtual arg/local debugging pointer.  */
8048  alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
8049  alpha_arg_offset = -frame_size + 48;
8050
8051  /* Describe our frame.  If the frame size is larger than an integer,
8052     print it as zero to avoid an assembler error.  We won't be
8053     properly describing such a frame, but that's the best we can do.  */
8054  if (TARGET_ABI_OPEN_VMS)
8055    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
8056	     HOST_WIDE_INT_PRINT_DEC "\n",
8057	     vms_unwind_regno,
8058	     frame_size >= (1UL << 31) ? 0 : frame_size,
8059	     reg_offset);
8060  else if (!flag_inhibit_size_directive)
8061    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
8062	     (frame_pointer_needed
8063	      ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
8064	     frame_size >= max_frame_size ? 0 : frame_size,
8065	     crtl->args.pretend_args_size);
8066
8067  /* Describe which registers were spilled.  */
8068  if (TARGET_ABI_OPEN_VMS)
8069    {
8070      if (imask)
8071        /* ??? Does VMS care if mask contains ra?  The old code didn't
8072           set it, so I don't here.  */
8073	fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
8074      if (fmask)
8075	fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
8076      if (alpha_procedure_type == PT_REGISTER)
8077	fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
8078    }
8079  else if (!flag_inhibit_size_directive)
8080    {
8081      if (imask)
8082	{
8083	  fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8084		   frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8085
8086	  for (i = 0; i < 32; ++i)
8087	    if (imask & (1UL << i))
8088	      reg_offset += 8;
8089	}
8090
8091      if (fmask)
8092	fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8093		 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8094    }
8095
8096#if TARGET_ABI_OPEN_VMS
8097  /* If a user condition handler has been installed at some point, emit
8098     the procedure descriptor bits to point the Condition Handling Facility
8099     at the indirection wrapper, and state the fp offset at which the user
8100     handler may be found.  */
8101  if (cfun->machine->uses_condition_handler)
8102    {
8103      fprintf (file, "\t.handler __gcc_shell_handler\n");
8104      fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8105    }
8106
8107#ifdef TARGET_VMS_CRASH_DEBUG
8108  /* Support of minimal traceback info.  */
8109  switch_to_section (readonly_data_section);
8110  fprintf (file, "\t.align 3\n");
8111  assemble_name (file, fnname); fputs ("..na:\n", file);
8112  fputs ("\t.ascii \"", file);
8113  assemble_name (file, fnname);
8114  fputs ("\\0\"\n", file);
8115  switch_to_section (text_section);
8116#endif
8117#endif /* TARGET_ABI_OPEN_VMS */
8118}
8119
8120/* Emit the .prologue note at the scheduled end of the prologue.  */
8121
8122static void
8123alpha_output_function_end_prologue (FILE *file)
8124{
8125  if (TARGET_ABI_OPEN_VMS)
8126    fputs ("\t.prologue\n", file);
8127  else if (!flag_inhibit_size_directive)
8128    fprintf (file, "\t.prologue %d\n",
8129	     alpha_function_needs_gp || cfun->is_thunk);
8130}
8131
8132/* Write function epilogue.  */
8133
8134void
8135alpha_expand_epilogue (void)
8136{
8137  /* Registers to save.  */
8138  unsigned long imask = 0;
8139  unsigned long fmask = 0;
8140  /* Stack space needed for pushing registers clobbered by us.  */
8141  HOST_WIDE_INT sa_size;
8142  /* Complete stack size needed.  */
8143  HOST_WIDE_INT frame_size;
8144  /* Offset from base reg to register save area.  */
8145  HOST_WIDE_INT reg_offset;
8146  int fp_is_frame_pointer, fp_offset;
8147  rtx sa_reg, sa_reg_exp = NULL;
8148  rtx sp_adj1, sp_adj2, mem, reg, insn;
8149  rtx eh_ofs;
8150  rtx cfa_restores = NULL_RTX;
8151  int i;
8152
8153  sa_size = alpha_sa_size ();
8154  frame_size = compute_frame_size (get_frame_size (), sa_size);
8155
8156  if (TARGET_ABI_OPEN_VMS)
8157    {
8158       if (alpha_procedure_type == PT_STACK)
8159          reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8160       else
8161          reg_offset = 0;
8162    }
8163  else
8164    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8165
8166  alpha_sa_mask (&imask, &fmask);
8167
8168  fp_is_frame_pointer
8169    = (TARGET_ABI_OPEN_VMS
8170       ? alpha_procedure_type == PT_STACK
8171       : frame_pointer_needed);
8172  fp_offset = 0;
8173  sa_reg = stack_pointer_rtx;
8174
8175  if (crtl->calls_eh_return)
8176    eh_ofs = EH_RETURN_STACKADJ_RTX;
8177  else
8178    eh_ofs = NULL_RTX;
8179
8180  if (sa_size)
8181    {
8182      /* If we have a frame pointer, restore SP from it.  */
8183      if (TARGET_ABI_OPEN_VMS
8184	  ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8185	  : frame_pointer_needed)
8186	emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8187
8188      /* Cope with very large offsets to the register save area.  */
8189      if (reg_offset + sa_size > 0x8000)
8190	{
8191	  int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8192	  HOST_WIDE_INT bias;
8193
8194	  if (low + sa_size <= 0x8000)
8195	    bias = reg_offset - low, reg_offset = low;
8196	  else
8197	    bias = reg_offset, reg_offset = 0;
8198
8199	  sa_reg = gen_rtx_REG (DImode, 22);
8200	  sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8201
8202	  emit_move_insn (sa_reg, sa_reg_exp);
8203	}
8204
8205      /* Restore registers in order, excepting a true frame pointer.  */
8206
8207      mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset));
8208      reg = gen_rtx_REG (DImode, REG_RA);
8209      emit_move_insn (reg, mem);
8210      cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8211
8212      reg_offset += 8;
8213      imask &= ~(1UL << REG_RA);
8214
8215      for (i = 0; i < 31; ++i)
8216	if (imask & (1UL << i))
8217	  {
8218	    if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8219	      fp_offset = reg_offset;
8220	    else
8221	      {
8222		mem = gen_frame_mem (DImode,
8223				     plus_constant (Pmode, sa_reg,
8224						    reg_offset));
8225		reg = gen_rtx_REG (DImode, i);
8226		emit_move_insn (reg, mem);
8227		cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8228					       cfa_restores);
8229	      }
8230	    reg_offset += 8;
8231	  }
8232
8233      for (i = 0; i < 31; ++i)
8234	if (fmask & (1UL << i))
8235	  {
8236	    mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg,
8237						        reg_offset));
8238	    reg = gen_rtx_REG (DFmode, i+32);
8239	    emit_move_insn (reg, mem);
8240	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8241	    reg_offset += 8;
8242	  }
8243    }
8244
8245  if (frame_size || eh_ofs)
8246    {
8247      sp_adj1 = stack_pointer_rtx;
8248
8249      if (eh_ofs)
8250	{
8251	  sp_adj1 = gen_rtx_REG (DImode, 23);
8252	  emit_move_insn (sp_adj1,
8253			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8254	}
8255
8256      /* If the stack size is large, begin computation into a temporary
8257	 register so as not to interfere with a potential fp restore,
8258	 which must be consecutive with an SP restore.  */
8259      if (frame_size < 32768 && !cfun->calls_alloca)
8260	sp_adj2 = GEN_INT (frame_size);
8261      else if (frame_size < 0x40007fffL)
8262	{
8263	  int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8264
8265	  sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8266	  if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8267	    sp_adj1 = sa_reg;
8268	  else
8269	    {
8270	      sp_adj1 = gen_rtx_REG (DImode, 23);
8271	      emit_move_insn (sp_adj1, sp_adj2);
8272	    }
8273	  sp_adj2 = GEN_INT (low);
8274	}
8275      else
8276	{
8277	  rtx tmp = gen_rtx_REG (DImode, 23);
8278	  sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8279	  if (!sp_adj2)
8280	    {
8281	      /* We can't drop new things to memory this late, afaik,
8282		 so build it up by pieces.  */
8283	      sp_adj2 = alpha_emit_set_long_const (tmp, frame_size);
8284	      gcc_assert (sp_adj2);
8285	    }
8286	}
8287
8288      /* From now on, things must be in order.  So emit blockages.  */
8289
8290      /* Restore the frame pointer.  */
8291      if (fp_is_frame_pointer)
8292	{
8293	  emit_insn (gen_blockage ());
8294	  mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8295						      fp_offset));
8296	  emit_move_insn (hard_frame_pointer_rtx, mem);
8297	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8298					 hard_frame_pointer_rtx, cfa_restores);
8299	}
8300      else if (TARGET_ABI_OPEN_VMS)
8301	{
8302	  emit_insn (gen_blockage ());
8303	  emit_move_insn (hard_frame_pointer_rtx,
8304			  gen_rtx_REG (DImode, vms_save_fp_regno));
8305	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8306					 hard_frame_pointer_rtx, cfa_restores);
8307	}
8308
8309      /* Restore the stack pointer.  */
8310      emit_insn (gen_blockage ());
8311      if (sp_adj2 == const0_rtx)
8312	insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8313      else
8314	insn = emit_move_insn (stack_pointer_rtx,
8315			       gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8316      REG_NOTES (insn) = cfa_restores;
8317      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8318      RTX_FRAME_RELATED_P (insn) = 1;
8319    }
8320  else
8321    {
8322      gcc_assert (cfa_restores == NULL);
8323
8324      if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8325        {
8326          emit_insn (gen_blockage ());
8327          insn = emit_move_insn (hard_frame_pointer_rtx,
8328				 gen_rtx_REG (DImode, vms_save_fp_regno));
8329	  add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8330	  RTX_FRAME_RELATED_P (insn) = 1;
8331        }
8332    }
8333}
8334
8335/* Output the rest of the textual info surrounding the epilogue.  */
8336
8337void
8338alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8339{
8340  rtx_insn *insn;
8341
8342  /* We output a nop after noreturn calls at the very end of the function to
8343     ensure that the return address always remains in the caller's code range,
8344     as not doing so might confuse unwinding engines.  */
8345  insn = get_last_insn ();
8346  if (!INSN_P (insn))
8347    insn = prev_active_insn (insn);
8348  if (insn && CALL_P (insn))
8349    output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8350
8351#if TARGET_ABI_OPEN_VMS
8352  /* Write the linkage entries.  */
8353  alpha_write_linkage (file, fnname);
8354#endif
8355
8356  /* End the function.  */
8357  if (TARGET_ABI_OPEN_VMS
8358      || !flag_inhibit_size_directive)
8359    {
8360      fputs ("\t.end ", file);
8361      assemble_name (file, fnname);
8362      putc ('\n', file);
8363    }
8364  inside_function = FALSE;
8365}
8366
8367#if TARGET_ABI_OSF
8368/* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8369
8370   In order to avoid the hordes of differences between generated code
8371   with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8372   lots of code loading up large constants, generate rtl and emit it
8373   instead of going straight to text.
8374
8375   Not sure why this idea hasn't been explored before...  */
8376
8377static void
8378alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8379			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8380			   tree function)
8381{
8382  HOST_WIDE_INT hi, lo;
8383  rtx this_rtx, funexp;
8384  rtx_insn *insn;
8385
8386  /* We always require a valid GP.  */
8387  emit_insn (gen_prologue_ldgp ());
8388  emit_note (NOTE_INSN_PROLOGUE_END);
8389
8390  /* Find the "this" pointer.  If the function returns a structure,
8391     the structure return pointer is in $16.  */
8392  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8393    this_rtx = gen_rtx_REG (Pmode, 17);
8394  else
8395    this_rtx = gen_rtx_REG (Pmode, 16);
8396
8397  /* Add DELTA.  When possible we use ldah+lda.  Otherwise load the
8398     entire constant for the add.  */
8399  lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8400  hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8401  if (hi + lo == delta)
8402    {
8403      if (hi)
8404	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8405      if (lo)
8406	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8407    }
8408  else
8409    {
8410      rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta);
8411      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8412    }
8413
8414  /* Add a delta stored in the vtable at VCALL_OFFSET.  */
8415  if (vcall_offset)
8416    {
8417      rtx tmp, tmp2;
8418
8419      tmp = gen_rtx_REG (Pmode, 0);
8420      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8421
8422      lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8423      hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8424      if (hi + lo == vcall_offset)
8425	{
8426	  if (hi)
8427	    emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8428	}
8429      else
8430	{
8431	  tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8432					    vcall_offset);
8433          emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8434	  lo = 0;
8435	}
8436      if (lo)
8437	tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8438      else
8439	tmp2 = tmp;
8440      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8441
8442      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8443    }
8444
8445  /* Generate a tail call to the target function.  */
8446  if (! TREE_USED (function))
8447    {
8448      assemble_external (function);
8449      TREE_USED (function) = 1;
8450    }
8451  funexp = XEXP (DECL_RTL (function), 0);
8452  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8453  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8454  SIBLING_CALL_P (insn) = 1;
8455
8456  /* Run just enough of rest_of_compilation to get the insns emitted.
8457     There's not really enough bulk here to make other passes such as
8458     instruction scheduling worth while.  Note that use_thunk calls
8459     assemble_start_function and assemble_end_function.  */
8460  insn = get_insns ();
8461  shorten_branches (insn);
8462  final_start_function (insn, file, 1);
8463  final (insn, file, 1);
8464  final_end_function ();
8465}
8466#endif /* TARGET_ABI_OSF */
8467
8468/* Debugging support.  */
8469
8470#include "gstab.h"
8471
8472/* Name of the file containing the current function.  */
8473
8474static const char *current_function_file = "";
8475
8476/* Offsets to alpha virtual arg/local debugging pointers.  */
8477
8478long alpha_arg_offset;
8479long alpha_auto_offset;
8480
8481/* Emit a new filename to a stream.  */
8482
8483void
8484alpha_output_filename (FILE *stream, const char *name)
8485{
8486  static int first_time = TRUE;
8487
8488  if (first_time)
8489    {
8490      first_time = FALSE;
8491      ++num_source_filenames;
8492      current_function_file = name;
8493      fprintf (stream, "\t.file\t%d ", num_source_filenames);
8494      output_quoted_string (stream, name);
8495      fprintf (stream, "\n");
8496    }
8497
8498  else if (name != current_function_file
8499	   && strcmp (name, current_function_file) != 0)
8500    {
8501      ++num_source_filenames;
8502      current_function_file = name;
8503      fprintf (stream, "\t.file\t%d ", num_source_filenames);
8504
8505      output_quoted_string (stream, name);
8506      fprintf (stream, "\n");
8507    }
8508}
8509
8510/* Structure to show the current status of registers and memory.  */
8511
8512struct shadow_summary
8513{
8514  struct {
8515    unsigned int i     : 31;	/* Mask of int regs */
8516    unsigned int fp    : 31;	/* Mask of fp regs */
8517    unsigned int mem   :  1;	/* mem == imem | fpmem */
8518  } used, defd;
8519};
8520
8521/* Summary the effects of expression X on the machine.  Update SUM, a pointer
8522   to the summary structure.  SET is nonzero if the insn is setting the
8523   object, otherwise zero.  */
8524
8525static void
8526summarize_insn (rtx x, struct shadow_summary *sum, int set)
8527{
8528  const char *format_ptr;
8529  int i, j;
8530
8531  if (x == 0)
8532    return;
8533
8534  switch (GET_CODE (x))
8535    {
8536      /* ??? Note that this case would be incorrect if the Alpha had a
8537	 ZERO_EXTRACT in SET_DEST.  */
8538    case SET:
8539      summarize_insn (SET_SRC (x), sum, 0);
8540      summarize_insn (SET_DEST (x), sum, 1);
8541      break;
8542
8543    case CLOBBER:
8544      summarize_insn (XEXP (x, 0), sum, 1);
8545      break;
8546
8547    case USE:
8548      summarize_insn (XEXP (x, 0), sum, 0);
8549      break;
8550
8551    case ASM_OPERANDS:
8552      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8553	summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8554      break;
8555
8556    case PARALLEL:
8557      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8558	summarize_insn (XVECEXP (x, 0, i), sum, 0);
8559      break;
8560
8561    case SUBREG:
8562      summarize_insn (SUBREG_REG (x), sum, 0);
8563      break;
8564
8565    case REG:
8566      {
8567	int regno = REGNO (x);
8568	unsigned long mask = ((unsigned long) 1) << (regno % 32);
8569
8570	if (regno == 31 || regno == 63)
8571	  break;
8572
8573	if (set)
8574	  {
8575	    if (regno < 32)
8576	      sum->defd.i |= mask;
8577	    else
8578	      sum->defd.fp |= mask;
8579	  }
8580	else
8581	  {
8582	    if (regno < 32)
8583	      sum->used.i  |= mask;
8584	    else
8585	      sum->used.fp |= mask;
8586	  }
8587	}
8588      break;
8589
8590    case MEM:
8591      if (set)
8592	sum->defd.mem = 1;
8593      else
8594	sum->used.mem = 1;
8595
8596      /* Find the regs used in memory address computation: */
8597      summarize_insn (XEXP (x, 0), sum, 0);
8598      break;
8599
8600    case CONST_INT:   case CONST_WIDE_INT:  case CONST_DOUBLE:
8601    case SYMBOL_REF:  case LABEL_REF:       case CONST:
8602    case SCRATCH:     case ASM_INPUT:
8603      break;
8604
8605      /* Handle common unary and binary ops for efficiency.  */
8606    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
8607    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
8608    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
8609    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
8610    case NE:       case EQ:      case GE:      case GT:        case LE:
8611    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
8612      summarize_insn (XEXP (x, 0), sum, 0);
8613      summarize_insn (XEXP (x, 1), sum, 0);
8614      break;
8615
8616    case NEG:  case NOT:  case SIGN_EXTEND:  case ZERO_EXTEND:
8617    case TRUNCATE:  case FLOAT_EXTEND:  case FLOAT_TRUNCATE:  case FLOAT:
8618    case FIX:  case UNSIGNED_FLOAT:  case UNSIGNED_FIX:  case ABS:
8619    case SQRT:  case FFS:
8620      summarize_insn (XEXP (x, 0), sum, 0);
8621      break;
8622
8623    default:
8624      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8625      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8626	switch (format_ptr[i])
8627	  {
8628	  case 'e':
8629	    summarize_insn (XEXP (x, i), sum, 0);
8630	    break;
8631
8632	  case 'E':
8633	    for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8634	      summarize_insn (XVECEXP (x, i, j), sum, 0);
8635	    break;
8636
8637	  case 'i':
8638	    break;
8639
8640	  default:
8641	    gcc_unreachable ();
8642	  }
8643    }
8644}
8645
8646/* Ensure a sufficient number of `trapb' insns are in the code when
8647   the user requests code with a trap precision of functions or
8648   instructions.
8649
8650   In naive mode, when the user requests a trap-precision of
8651   "instruction", a trapb is needed after every instruction that may
8652   generate a trap.  This ensures that the code is resumption safe but
8653   it is also slow.
8654
8655   When optimizations are turned on, we delay issuing a trapb as long
8656   as possible.  In this context, a trap shadow is the sequence of
8657   instructions that starts with a (potentially) trap generating
8658   instruction and extends to the next trapb or call_pal instruction
8659   (but GCC never generates call_pal by itself).  We can delay (and
8660   therefore sometimes omit) a trapb subject to the following
8661   conditions:
8662
8663   (a) On entry to the trap shadow, if any Alpha register or memory
8664   location contains a value that is used as an operand value by some
8665   instruction in the trap shadow (live on entry), then no instruction
8666   in the trap shadow may modify the register or memory location.
8667
8668   (b) Within the trap shadow, the computation of the base register
8669   for a memory load or store instruction may not involve using the
8670   result of an instruction that might generate an UNPREDICTABLE
8671   result.
8672
8673   (c) Within the trap shadow, no register may be used more than once
8674   as a destination register.  (This is to make life easier for the
8675   trap-handler.)
8676
8677   (d) The trap shadow may not include any branch instructions.  */
8678
8679static void
8680alpha_handle_trap_shadows (void)
8681{
8682  struct shadow_summary shadow;
8683  int trap_pending, exception_nesting;
8684  rtx_insn *i, *n;
8685
8686  trap_pending = 0;
8687  exception_nesting = 0;
8688  shadow.used.i = 0;
8689  shadow.used.fp = 0;
8690  shadow.used.mem = 0;
8691  shadow.defd = shadow.used;
8692
8693  for (i = get_insns (); i ; i = NEXT_INSN (i))
8694    {
8695      if (NOTE_P (i))
8696	{
8697	  switch (NOTE_KIND (i))
8698	    {
8699	    case NOTE_INSN_EH_REGION_BEG:
8700	      exception_nesting++;
8701	      if (trap_pending)
8702		goto close_shadow;
8703	      break;
8704
8705	    case NOTE_INSN_EH_REGION_END:
8706	      exception_nesting--;
8707	      if (trap_pending)
8708		goto close_shadow;
8709	      break;
8710
8711	    case NOTE_INSN_EPILOGUE_BEG:
8712	      if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8713		goto close_shadow;
8714	      break;
8715	    }
8716	}
8717      else if (trap_pending)
8718	{
8719	  if (alpha_tp == ALPHA_TP_FUNC)
8720	    {
8721	      if (JUMP_P (i)
8722		  && GET_CODE (PATTERN (i)) == RETURN)
8723		goto close_shadow;
8724	    }
8725	  else if (alpha_tp == ALPHA_TP_INSN)
8726	    {
8727	      if (optimize > 0)
8728		{
8729		  struct shadow_summary sum;
8730
8731		  sum.used.i = 0;
8732		  sum.used.fp = 0;
8733		  sum.used.mem = 0;
8734		  sum.defd = sum.used;
8735
8736		  switch (GET_CODE (i))
8737		    {
8738		    case INSN:
8739		      /* Annoyingly, get_attr_trap will die on these.  */
8740		      if (GET_CODE (PATTERN (i)) == USE
8741			  || GET_CODE (PATTERN (i)) == CLOBBER)
8742			break;
8743
8744		      summarize_insn (PATTERN (i), &sum, 0);
8745
8746		      if ((sum.defd.i & shadow.defd.i)
8747			  || (sum.defd.fp & shadow.defd.fp))
8748			{
8749			  /* (c) would be violated */
8750			  goto close_shadow;
8751			}
8752
8753		      /* Combine shadow with summary of current insn: */
8754		      shadow.used.i   |= sum.used.i;
8755		      shadow.used.fp  |= sum.used.fp;
8756		      shadow.used.mem |= sum.used.mem;
8757		      shadow.defd.i   |= sum.defd.i;
8758		      shadow.defd.fp  |= sum.defd.fp;
8759		      shadow.defd.mem |= sum.defd.mem;
8760
8761		      if ((sum.defd.i & shadow.used.i)
8762			  || (sum.defd.fp & shadow.used.fp)
8763			  || (sum.defd.mem & shadow.used.mem))
8764			{
8765			  /* (a) would be violated (also takes care of (b))  */
8766			  gcc_assert (get_attr_trap (i) != TRAP_YES
8767				      || (!(sum.defd.i & sum.used.i)
8768					  && !(sum.defd.fp & sum.used.fp)));
8769
8770			  goto close_shadow;
8771			}
8772		      break;
8773
8774		    case BARRIER:
8775		      /* __builtin_unreachable can expand to no code at all,
8776			 leaving (barrier) RTXes in the instruction stream.  */
8777		      goto close_shadow_notrapb;
8778
8779		    case JUMP_INSN:
8780		    case CALL_INSN:
8781		    case CODE_LABEL:
8782		      goto close_shadow;
8783
8784		    default:
8785		      gcc_unreachable ();
8786		    }
8787		}
8788	      else
8789		{
8790		close_shadow:
8791		  n = emit_insn_before (gen_trapb (), i);
8792		  PUT_MODE (n, TImode);
8793		  PUT_MODE (i, TImode);
8794		close_shadow_notrapb:
8795		  trap_pending = 0;
8796		  shadow.used.i = 0;
8797		  shadow.used.fp = 0;
8798		  shadow.used.mem = 0;
8799		  shadow.defd = shadow.used;
8800		}
8801	    }
8802	}
8803
8804      if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8805	  && NONJUMP_INSN_P (i)
8806	  && GET_CODE (PATTERN (i)) != USE
8807	  && GET_CODE (PATTERN (i)) != CLOBBER
8808	  && get_attr_trap (i) == TRAP_YES)
8809	{
8810	  if (optimize && !trap_pending)
8811	    summarize_insn (PATTERN (i), &shadow, 0);
8812	  trap_pending = 1;
8813	}
8814    }
8815}
8816
8817/* Alpha can only issue instruction groups simultaneously if they are
8818   suitably aligned.  This is very processor-specific.  */
8819/* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8820   that are marked "fake".  These instructions do not exist on that target,
8821   but it is possible to see these insns with deranged combinations of
8822   command-line options, such as "-mtune=ev4 -mmax".  Instead of aborting,
8823   choose a result at random.  */
8824
8825enum alphaev4_pipe {
8826  EV4_STOP = 0,
8827  EV4_IB0 = 1,
8828  EV4_IB1 = 2,
8829  EV4_IBX = 4
8830};
8831
8832enum alphaev5_pipe {
8833  EV5_STOP = 0,
8834  EV5_NONE = 1,
8835  EV5_E01 = 2,
8836  EV5_E0 = 4,
8837  EV5_E1 = 8,
8838  EV5_FAM = 16,
8839  EV5_FA = 32,
8840  EV5_FM = 64
8841};
8842
8843static enum alphaev4_pipe
8844alphaev4_insn_pipe (rtx_insn *insn)
8845{
8846  if (recog_memoized (insn) < 0)
8847    return EV4_STOP;
8848  if (get_attr_length (insn) != 4)
8849    return EV4_STOP;
8850
8851  switch (get_attr_type (insn))
8852    {
8853    case TYPE_ILD:
8854    case TYPE_LDSYM:
8855    case TYPE_FLD:
8856    case TYPE_LD_L:
8857      return EV4_IBX;
8858
8859    case TYPE_IADD:
8860    case TYPE_ILOG:
8861    case TYPE_ICMOV:
8862    case TYPE_ICMP:
8863    case TYPE_FST:
8864    case TYPE_SHIFT:
8865    case TYPE_IMUL:
8866    case TYPE_FBR:
8867    case TYPE_MVI:		/* fake */
8868      return EV4_IB0;
8869
8870    case TYPE_IST:
8871    case TYPE_MISC:
8872    case TYPE_IBR:
8873    case TYPE_JSR:
8874    case TYPE_CALLPAL:
8875    case TYPE_FCPYS:
8876    case TYPE_FCMOV:
8877    case TYPE_FADD:
8878    case TYPE_FDIV:
8879    case TYPE_FMUL:
8880    case TYPE_ST_C:
8881    case TYPE_MB:
8882    case TYPE_FSQRT:		/* fake */
8883    case TYPE_FTOI:		/* fake */
8884    case TYPE_ITOF:		/* fake */
8885      return EV4_IB1;
8886
8887    default:
8888      gcc_unreachable ();
8889    }
8890}
8891
8892static enum alphaev5_pipe
8893alphaev5_insn_pipe (rtx_insn *insn)
8894{
8895  if (recog_memoized (insn) < 0)
8896    return EV5_STOP;
8897  if (get_attr_length (insn) != 4)
8898    return EV5_STOP;
8899
8900  switch (get_attr_type (insn))
8901    {
8902    case TYPE_ILD:
8903    case TYPE_FLD:
8904    case TYPE_LDSYM:
8905    case TYPE_IADD:
8906    case TYPE_ILOG:
8907    case TYPE_ICMOV:
8908    case TYPE_ICMP:
8909      return EV5_E01;
8910
8911    case TYPE_IST:
8912    case TYPE_FST:
8913    case TYPE_SHIFT:
8914    case TYPE_IMUL:
8915    case TYPE_MISC:
8916    case TYPE_MVI:
8917    case TYPE_LD_L:
8918    case TYPE_ST_C:
8919    case TYPE_MB:
8920    case TYPE_FTOI:		/* fake */
8921    case TYPE_ITOF:		/* fake */
8922      return EV5_E0;
8923
8924    case TYPE_IBR:
8925    case TYPE_JSR:
8926    case TYPE_CALLPAL:
8927      return EV5_E1;
8928
8929    case TYPE_FCPYS:
8930      return EV5_FAM;
8931
8932    case TYPE_FBR:
8933    case TYPE_FCMOV:
8934    case TYPE_FADD:
8935    case TYPE_FDIV:
8936    case TYPE_FSQRT:		/* fake */
8937      return EV5_FA;
8938
8939    case TYPE_FMUL:
8940      return EV5_FM;
8941
8942    default:
8943      gcc_unreachable ();
8944    }
8945}
8946
8947/* IN_USE is a mask of the slots currently filled within the insn group.
8948   The mask bits come from alphaev4_pipe above.  If EV4_IBX is set, then
8949   the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8950
8951   LEN is, of course, the length of the group in bytes.  */
8952
8953static rtx_insn *
8954alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen)
8955{
8956  int len, in_use;
8957
8958  len = in_use = 0;
8959
8960  if (! INSN_P (insn)
8961      || GET_CODE (PATTERN (insn)) == CLOBBER
8962      || GET_CODE (PATTERN (insn)) == USE)
8963    goto next_and_done;
8964
8965  while (1)
8966    {
8967      enum alphaev4_pipe pipe;
8968
8969      pipe = alphaev4_insn_pipe (insn);
8970      switch (pipe)
8971	{
8972	case EV4_STOP:
8973	  /* Force complex instructions to start new groups.  */
8974	  if (in_use)
8975	    goto done;
8976
8977	  /* If this is a completely unrecognized insn, it's an asm.
8978	     We don't know how long it is, so record length as -1 to
8979	     signal a needed realignment.  */
8980	  if (recog_memoized (insn) < 0)
8981	    len = -1;
8982	  else
8983	    len = get_attr_length (insn);
8984	  goto next_and_done;
8985
8986	case EV4_IBX:
8987	  if (in_use & EV4_IB0)
8988	    {
8989	      if (in_use & EV4_IB1)
8990		goto done;
8991	      in_use |= EV4_IB1;
8992	    }
8993	  else
8994	    in_use |= EV4_IB0 | EV4_IBX;
8995	  break;
8996
8997	case EV4_IB0:
8998	  if (in_use & EV4_IB0)
8999	    {
9000	      if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
9001		goto done;
9002	      in_use |= EV4_IB1;
9003	    }
9004	  in_use |= EV4_IB0;
9005	  break;
9006
9007	case EV4_IB1:
9008	  if (in_use & EV4_IB1)
9009	    goto done;
9010	  in_use |= EV4_IB1;
9011	  break;
9012
9013	default:
9014	  gcc_unreachable ();
9015	}
9016      len += 4;
9017
9018      /* Haifa doesn't do well scheduling branches.  */
9019      if (JUMP_P (insn))
9020	goto next_and_done;
9021
9022    next:
9023      insn = next_nonnote_insn (insn);
9024
9025      if (!insn || ! INSN_P (insn))
9026	goto done;
9027
9028      /* Let Haifa tell us where it thinks insn group boundaries are.  */
9029      if (GET_MODE (insn) == TImode)
9030	goto done;
9031
9032      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9033	goto next;
9034    }
9035
9036 next_and_done:
9037  insn = next_nonnote_insn (insn);
9038
9039 done:
9040  *plen = len;
9041  *pin_use = in_use;
9042  return insn;
9043}
9044
9045/* IN_USE is a mask of the slots currently filled within the insn group.
9046   The mask bits come from alphaev5_pipe above.  If EV5_E01 is set, then
9047   the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9048
9049   LEN is, of course, the length of the group in bytes.  */
9050
9051static rtx_insn *
9052alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen)
9053{
9054  int len, in_use;
9055
9056  len = in_use = 0;
9057
9058  if (! INSN_P (insn)
9059      || GET_CODE (PATTERN (insn)) == CLOBBER
9060      || GET_CODE (PATTERN (insn)) == USE)
9061    goto next_and_done;
9062
9063  while (1)
9064    {
9065      enum alphaev5_pipe pipe;
9066
9067      pipe = alphaev5_insn_pipe (insn);
9068      switch (pipe)
9069	{
9070	case EV5_STOP:
9071	  /* Force complex instructions to start new groups.  */
9072	  if (in_use)
9073	    goto done;
9074
9075	  /* If this is a completely unrecognized insn, it's an asm.
9076	     We don't know how long it is, so record length as -1 to
9077	     signal a needed realignment.  */
9078	  if (recog_memoized (insn) < 0)
9079	    len = -1;
9080	  else
9081	    len = get_attr_length (insn);
9082	  goto next_and_done;
9083
9084	/* ??? Most of the places below, we would like to assert never
9085	   happen, as it would indicate an error either in Haifa, or
9086	   in the scheduling description.  Unfortunately, Haifa never
9087	   schedules the last instruction of the BB, so we don't have
9088	   an accurate TI bit to go off.  */
9089	case EV5_E01:
9090	  if (in_use & EV5_E0)
9091	    {
9092	      if (in_use & EV5_E1)
9093		goto done;
9094	      in_use |= EV5_E1;
9095	    }
9096	  else
9097	    in_use |= EV5_E0 | EV5_E01;
9098	  break;
9099
9100	case EV5_E0:
9101	  if (in_use & EV5_E0)
9102	    {
9103	      if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9104		goto done;
9105	      in_use |= EV5_E1;
9106	    }
9107	  in_use |= EV5_E0;
9108	  break;
9109
9110	case EV5_E1:
9111	  if (in_use & EV5_E1)
9112	    goto done;
9113	  in_use |= EV5_E1;
9114	  break;
9115
9116	case EV5_FAM:
9117	  if (in_use & EV5_FA)
9118	    {
9119	      if (in_use & EV5_FM)
9120		goto done;
9121	      in_use |= EV5_FM;
9122	    }
9123	  else
9124	    in_use |= EV5_FA | EV5_FAM;
9125	  break;
9126
9127	case EV5_FA:
9128	  if (in_use & EV5_FA)
9129	    goto done;
9130	  in_use |= EV5_FA;
9131	  break;
9132
9133	case EV5_FM:
9134	  if (in_use & EV5_FM)
9135	    goto done;
9136	  in_use |= EV5_FM;
9137	  break;
9138
9139	case EV5_NONE:
9140	  break;
9141
9142	default:
9143	  gcc_unreachable ();
9144	}
9145      len += 4;
9146
9147      /* Haifa doesn't do well scheduling branches.  */
9148      /* ??? If this is predicted not-taken, slotting continues, except
9149	 that no more IBR, FBR, or JSR insns may be slotted.  */
9150      if (JUMP_P (insn))
9151	goto next_and_done;
9152
9153    next:
9154      insn = next_nonnote_insn (insn);
9155
9156      if (!insn || ! INSN_P (insn))
9157	goto done;
9158
9159      /* Let Haifa tell us where it thinks insn group boundaries are.  */
9160      if (GET_MODE (insn) == TImode)
9161	goto done;
9162
9163      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9164	goto next;
9165    }
9166
9167 next_and_done:
9168  insn = next_nonnote_insn (insn);
9169
9170 done:
9171  *plen = len;
9172  *pin_use = in_use;
9173  return insn;
9174}
9175
9176static rtx
9177alphaev4_next_nop (int *pin_use)
9178{
9179  int in_use = *pin_use;
9180  rtx nop;
9181
9182  if (!(in_use & EV4_IB0))
9183    {
9184      in_use |= EV4_IB0;
9185      nop = gen_nop ();
9186    }
9187  else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9188    {
9189      in_use |= EV4_IB1;
9190      nop = gen_nop ();
9191    }
9192  else if (TARGET_FP && !(in_use & EV4_IB1))
9193    {
9194      in_use |= EV4_IB1;
9195      nop = gen_fnop ();
9196    }
9197  else
9198    nop = gen_unop ();
9199
9200  *pin_use = in_use;
9201  return nop;
9202}
9203
9204static rtx
9205alphaev5_next_nop (int *pin_use)
9206{
9207  int in_use = *pin_use;
9208  rtx nop;
9209
9210  if (!(in_use & EV5_E1))
9211    {
9212      in_use |= EV5_E1;
9213      nop = gen_nop ();
9214    }
9215  else if (TARGET_FP && !(in_use & EV5_FA))
9216    {
9217      in_use |= EV5_FA;
9218      nop = gen_fnop ();
9219    }
9220  else if (TARGET_FP && !(in_use & EV5_FM))
9221    {
9222      in_use |= EV5_FM;
9223      nop = gen_fnop ();
9224    }
9225  else
9226    nop = gen_unop ();
9227
9228  *pin_use = in_use;
9229  return nop;
9230}
9231
9232/* The instruction group alignment main loop.  */
9233
9234static void
9235alpha_align_insns_1 (unsigned int max_align,
9236		     rtx_insn *(*next_group) (rtx_insn *, int *, int *),
9237		     rtx (*next_nop) (int *))
9238{
9239  /* ALIGN is the known alignment for the insn group.  */
9240  unsigned int align;
9241  /* OFS is the offset of the current insn in the insn group.  */
9242  int ofs;
9243  int prev_in_use, in_use, len, ldgp;
9244  rtx_insn *i, *next;
9245
9246  /* Let shorten branches care for assigning alignments to code labels.  */
9247  shorten_branches (get_insns ());
9248
9249  if (align_functions < 4)
9250    align = 4;
9251  else if ((unsigned int) align_functions < max_align)
9252    align = align_functions;
9253  else
9254    align = max_align;
9255
9256  ofs = prev_in_use = 0;
9257  i = get_insns ();
9258  if (NOTE_P (i))
9259    i = next_nonnote_insn (i);
9260
9261  ldgp = alpha_function_needs_gp ? 8 : 0;
9262
9263  while (i)
9264    {
9265      next = (*next_group) (i, &in_use, &len);
9266
9267      /* When we see a label, resync alignment etc.  */
9268      if (LABEL_P (i))
9269	{
9270	  unsigned int new_align = 1 << label_to_alignment (i);
9271
9272	  if (new_align >= align)
9273	    {
9274	      align = new_align < max_align ? new_align : max_align;
9275	      ofs = 0;
9276	    }
9277
9278	  else if (ofs & (new_align-1))
9279	    ofs = (ofs | (new_align-1)) + 1;
9280	  gcc_assert (!len);
9281	}
9282
9283      /* Handle complex instructions special.  */
9284      else if (in_use == 0)
9285	{
9286	  /* Asms will have length < 0.  This is a signal that we have
9287	     lost alignment knowledge.  Assume, however, that the asm
9288	     will not mis-align instructions.  */
9289	  if (len < 0)
9290	    {
9291	      ofs = 0;
9292	      align = 4;
9293	      len = 0;
9294	    }
9295	}
9296
9297      /* If the known alignment is smaller than the recognized insn group,
9298	 realign the output.  */
9299      else if ((int) align < len)
9300	{
9301	  unsigned int new_log_align = len > 8 ? 4 : 3;
9302	  rtx_insn *prev, *where;
9303
9304	  where = prev = prev_nonnote_insn (i);
9305	  if (!where || !LABEL_P (where))
9306	    where = i;
9307
9308	  /* Can't realign between a call and its gp reload.  */
9309	  if (! (TARGET_EXPLICIT_RELOCS
9310		 && prev && CALL_P (prev)))
9311	    {
9312	      emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9313	      align = 1 << new_log_align;
9314	      ofs = 0;
9315	    }
9316	}
9317
9318      /* We may not insert padding inside the initial ldgp sequence.  */
9319      else if (ldgp > 0)
9320	ldgp -= len;
9321
9322      /* If the group won't fit in the same INT16 as the previous,
9323	 we need to add padding to keep the group together.  Rather
9324	 than simply leaving the insn filling to the assembler, we
9325	 can make use of the knowledge of what sorts of instructions
9326	 were issued in the previous group to make sure that all of
9327	 the added nops are really free.  */
9328      else if (ofs + len > (int) align)
9329	{
9330	  int nop_count = (align - ofs) / 4;
9331	  rtx_insn *where;
9332
9333	  /* Insert nops before labels, branches, and calls to truly merge
9334	     the execution of the nops with the previous instruction group.  */
9335	  where = prev_nonnote_insn (i);
9336	  if (where)
9337	    {
9338	      if (LABEL_P (where))
9339		{
9340		  rtx_insn *where2 = prev_nonnote_insn (where);
9341		  if (where2 && JUMP_P (where2))
9342		    where = where2;
9343		}
9344	      else if (NONJUMP_INSN_P (where))
9345		where = i;
9346	    }
9347	  else
9348	    where = i;
9349
9350	  do
9351	    emit_insn_before ((*next_nop)(&prev_in_use), where);
9352	  while (--nop_count);
9353	  ofs = 0;
9354	}
9355
9356      ofs = (ofs + len) & (align - 1);
9357      prev_in_use = in_use;
9358      i = next;
9359    }
9360}
9361
9362static void
9363alpha_align_insns (void)
9364{
9365  if (alpha_tune == PROCESSOR_EV4)
9366    alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop);
9367  else if (alpha_tune == PROCESSOR_EV5)
9368    alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop);
9369  else
9370    gcc_unreachable ();
9371}
9372
9373/* Insert an unop between sibcall or noreturn function call and GP load.  */
9374
9375static void
9376alpha_pad_function_end (void)
9377{
9378  rtx_insn *insn, *next;
9379
9380  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9381    {
9382      if (!CALL_P (insn)
9383	  || !(SIBLING_CALL_P (insn)
9384	       || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9385        continue;
9386
9387      /* Make sure we do not split a call and its corresponding
9388	 CALL_ARG_LOCATION note.  */
9389      next = NEXT_INSN (insn);
9390      if (next == NULL)
9391	continue;
9392      if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
9393	insn = next;
9394
9395      next = next_active_insn (insn);
9396      if (next)
9397	{
9398	  rtx pat = PATTERN (next);
9399
9400	  if (GET_CODE (pat) == SET
9401	      && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9402	      && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9403	    emit_insn_after (gen_unop (), insn);
9404	}
9405    }
9406}
9407
9408/* Machine dependent reorg pass.  */
9409
9410static void
9411alpha_reorg (void)
9412{
9413  /* Workaround for a linker error that triggers when an exception
9414     handler immediatelly follows a sibcall or a noreturn function.
9415
9416In the sibcall case:
9417
9418     The instruction stream from an object file:
9419
9420 1d8:   00 00 fb 6b     jmp     (t12)
9421 1dc:   00 00 ba 27     ldah    gp,0(ra)
9422 1e0:   00 00 bd 23     lda     gp,0(gp)
9423 1e4:   00 00 7d a7     ldq     t12,0(gp)
9424 1e8:   00 40 5b 6b     jsr     ra,(t12),1ec <__funcZ+0x1ec>
9425
9426     was converted in the final link pass to:
9427
9428   12003aa88:   67 fa ff c3     br      120039428 <...>
9429   12003aa8c:   00 00 fe 2f     unop
9430   12003aa90:   00 00 fe 2f     unop
9431   12003aa94:   48 83 7d a7     ldq     t12,-31928(gp)
9432   12003aa98:   00 40 5b 6b     jsr     ra,(t12),12003aa9c <__func+0x1ec>
9433
9434And in the noreturn case:
9435
9436     The instruction stream from an object file:
9437
9438  54:   00 40 5b 6b     jsr     ra,(t12),58 <__func+0x58>
9439  58:   00 00 ba 27     ldah    gp,0(ra)
9440  5c:   00 00 bd 23     lda     gp,0(gp)
9441  60:   00 00 7d a7     ldq     t12,0(gp)
9442  64:   00 40 5b 6b     jsr     ra,(t12),68 <__func+0x68>
9443
9444     was converted in the final link pass to:
9445
9446   fdb24:       a0 03 40 d3     bsr     ra,fe9a8 <_called_func+0x8>
9447   fdb28:       00 00 fe 2f     unop
9448   fdb2c:       00 00 fe 2f     unop
9449   fdb30:       30 82 7d a7     ldq     t12,-32208(gp)
9450   fdb34:       00 40 5b 6b     jsr     ra,(t12),fdb38 <__func+0x68>
9451
9452     GP load instructions were wrongly cleared by the linker relaxation
9453     pass.  This workaround prevents removal of GP loads by inserting
9454     an unop instruction between a sibcall or noreturn function call and
9455     exception handler prologue.  */
9456
9457  if (current_function_has_exception_handlers ())
9458    alpha_pad_function_end ();
9459}
9460
9461static void
9462alpha_file_start (void)
9463{
9464  default_file_start ();
9465
9466  fputs ("\t.set noreorder\n", asm_out_file);
9467  fputs ("\t.set volatile\n", asm_out_file);
9468  if (TARGET_ABI_OSF)
9469    fputs ("\t.set noat\n", asm_out_file);
9470  if (TARGET_EXPLICIT_RELOCS)
9471    fputs ("\t.set nomacro\n", asm_out_file);
9472  if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9473    {
9474      const char *arch;
9475
9476      if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9477	arch = "ev6";
9478      else if (TARGET_MAX)
9479	arch = "pca56";
9480      else if (TARGET_BWX)
9481	arch = "ev56";
9482      else if (alpha_cpu == PROCESSOR_EV5)
9483	arch = "ev5";
9484      else
9485	arch = "ev4";
9486
9487      fprintf (asm_out_file, "\t.arch %s\n", arch);
9488    }
9489}
9490
9491/* Since we don't have a .dynbss section, we should not allow global
9492   relocations in the .rodata section.  */
9493
9494static int
9495alpha_elf_reloc_rw_mask (void)
9496{
9497  return flag_pic ? 3 : 2;
9498}
9499
9500/* Return a section for X.  The only special thing we do here is to
9501   honor small data.  */
9502
9503static section *
9504alpha_elf_select_rtx_section (machine_mode mode, rtx x,
9505			      unsigned HOST_WIDE_INT align)
9506{
9507  if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9508    /* ??? Consider using mergeable sdata sections.  */
9509    return sdata_section;
9510  else
9511    return default_elf_select_rtx_section (mode, x, align);
9512}
9513
9514static unsigned int
9515alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9516{
9517  unsigned int flags = 0;
9518
9519  if (strcmp (name, ".sdata") == 0
9520      || strncmp (name, ".sdata.", 7) == 0
9521      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9522      || strcmp (name, ".sbss") == 0
9523      || strncmp (name, ".sbss.", 6) == 0
9524      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9525    flags = SECTION_SMALL;
9526
9527  flags |= default_section_type_flags (decl, name, reloc);
9528  return flags;
9529}
9530
9531/* Structure to collect function names for final output in link section.  */
9532/* Note that items marked with GTY can't be ifdef'ed out.  */
9533
9534enum reloc_kind
9535{
9536  KIND_LINKAGE,
9537  KIND_CODEADDR
9538};
9539
9540struct GTY(()) alpha_links
9541{
9542  rtx func;
9543  rtx linkage;
9544  enum reloc_kind rkind;
9545};
9546
9547#if TARGET_ABI_OPEN_VMS
9548
9549/* Return the VMS argument type corresponding to MODE.  */
9550
9551enum avms_arg_type
9552alpha_arg_type (machine_mode mode)
9553{
9554  switch (mode)
9555    {
9556    case SFmode:
9557      return TARGET_FLOAT_VAX ? FF : FS;
9558    case DFmode:
9559      return TARGET_FLOAT_VAX ? FD : FT;
9560    default:
9561      return I64;
9562    }
9563}
9564
9565/* Return an rtx for an integer representing the VMS Argument Information
9566   register value.  */
9567
9568rtx
9569alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9570{
9571  unsigned HOST_WIDE_INT regval = cum.num_args;
9572  int i;
9573
9574  for (i = 0; i < 6; i++)
9575    regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9576
9577  return GEN_INT (regval);
9578}
9579
9580
9581/* Return a SYMBOL_REF representing the reference to the .linkage entry
9582   of function FUNC built for calls made from CFUNDECL.  LFLAG is 1 if
9583   this is the reference to the linkage pointer value, 0 if this is the
9584   reference to the function entry value.  RFLAG is 1 if this a reduced
9585   reference (code address only), 0 if this is a full reference.  */
9586
9587rtx
9588alpha_use_linkage (rtx func, bool lflag, bool rflag)
9589{
9590  struct alpha_links *al = NULL;
9591  const char *name = XSTR (func, 0);
9592
9593  if (cfun->machine->links)
9594    {
9595      /* Is this name already defined?  */
9596      alpha_links **slot = cfun->machine->links->get (name);
9597      if (slot)
9598	al = *slot;
9599    }
9600  else
9601    cfun->machine->links
9602      = hash_map<nofree_string_hash, alpha_links *>::create_ggc (64);
9603
9604  if (al == NULL)
9605    {
9606      size_t buf_len;
9607      char *linksym;
9608      tree id;
9609
9610      if (name[0] == '*')
9611	name++;
9612
9613      /* Follow transparent alias, as this is used for CRTL translations.  */
9614      id = maybe_get_identifier (name);
9615      if (id)
9616        {
9617          while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9618            id = TREE_CHAIN (id);
9619          name = IDENTIFIER_POINTER (id);
9620        }
9621
9622      buf_len = strlen (name) + 8 + 9;
9623      linksym = (char *) alloca (buf_len);
9624      snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9625
9626      al = ggc_alloc<alpha_links> ();
9627      al->func = func;
9628      al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9629
9630      cfun->machine->links->put (ggc_strdup (name), al);
9631    }
9632
9633  al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9634
9635  if (lflag)
9636    return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9637  else
9638    return al->linkage;
9639}
9640
9641static int
9642alpha_write_one_linkage (const char *name, alpha_links *link, FILE *stream)
9643{
9644  ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9645  if (link->rkind == KIND_CODEADDR)
9646    {
9647      /* External and used, request code address.  */
9648      fprintf (stream, "\t.code_address ");
9649    }
9650  else
9651    {
9652      if (!SYMBOL_REF_EXTERNAL_P (link->func)
9653          && SYMBOL_REF_LOCAL_P (link->func))
9654	{
9655	  /* Locally defined, build linkage pair.  */
9656	  fprintf (stream, "\t.quad %s..en\n", name);
9657	  fprintf (stream, "\t.quad ");
9658	}
9659      else
9660	{
9661	  /* External, request linkage pair.  */
9662	  fprintf (stream, "\t.linkage ");
9663	}
9664    }
9665  assemble_name (stream, name);
9666  fputs ("\n", stream);
9667
9668  return 0;
9669}
9670
9671static void
9672alpha_write_linkage (FILE *stream, const char *funname)
9673{
9674  fprintf (stream, "\t.link\n");
9675  fprintf (stream, "\t.align 3\n");
9676  in_section = NULL;
9677
9678#ifdef TARGET_VMS_CRASH_DEBUG
9679  fputs ("\t.name ", stream);
9680  assemble_name (stream, funname);
9681  fputs ("..na\n", stream);
9682#endif
9683
9684  ASM_OUTPUT_LABEL (stream, funname);
9685  fprintf (stream, "\t.pdesc ");
9686  assemble_name (stream, funname);
9687  fprintf (stream, "..en,%s\n",
9688	   alpha_procedure_type == PT_STACK ? "stack"
9689	   : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9690
9691  if (cfun->machine->links)
9692    {
9693      hash_map<nofree_string_hash, alpha_links *>::iterator iter
9694	= cfun->machine->links->begin ();
9695      for (; iter != cfun->machine->links->end (); ++iter)
9696	alpha_write_one_linkage ((*iter).first, (*iter).second, stream);
9697    }
9698}
9699
9700/* Switch to an arbitrary section NAME with attributes as specified
9701   by FLAGS.  ALIGN specifies any known alignment requirements for
9702   the section; 0 if the default should be used.  */
9703
9704static void
9705vms_asm_named_section (const char *name, unsigned int flags,
9706		       tree decl ATTRIBUTE_UNUSED)
9707{
9708  fputc ('\n', asm_out_file);
9709  fprintf (asm_out_file, ".section\t%s", name);
9710
9711  if (flags & SECTION_DEBUG)
9712    fprintf (asm_out_file, ",NOWRT");
9713
9714  fputc ('\n', asm_out_file);
9715}
9716
9717/* Record an element in the table of global constructors.  SYMBOL is
9718   a SYMBOL_REF of the function to be called; PRIORITY is a number
9719   between 0 and MAX_INIT_PRIORITY.
9720
9721   Differs from default_ctors_section_asm_out_constructor in that the
9722   width of the .ctors entry is always 64 bits, rather than the 32 bits
9723   used by a normal pointer.  */
9724
9725static void
9726vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9727{
9728  switch_to_section (ctors_section);
9729  assemble_align (BITS_PER_WORD);
9730  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9731}
9732
9733static void
9734vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9735{
9736  switch_to_section (dtors_section);
9737  assemble_align (BITS_PER_WORD);
9738  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9739}
9740#else
9741rtx
9742alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9743		   bool lflag ATTRIBUTE_UNUSED,
9744		   bool rflag ATTRIBUTE_UNUSED)
9745{
9746  return NULL_RTX;
9747}
9748
9749#endif /* TARGET_ABI_OPEN_VMS */
9750
9751static void
9752alpha_init_libfuncs (void)
9753{
9754  if (TARGET_ABI_OPEN_VMS)
9755    {
9756      /* Use the VMS runtime library functions for division and
9757	 remainder.  */
9758      set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9759      set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9760      set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9761      set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9762      set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9763      set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9764      set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9765      set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9766#ifdef MEM_LIBFUNCS_INIT
9767      MEM_LIBFUNCS_INIT;
9768#endif
9769    }
9770}
9771
9772/* On the Alpha, we use this to disable the floating-point registers
9773   when they don't exist.  */
9774
9775static void
9776alpha_conditional_register_usage (void)
9777{
9778  int i;
9779  if (! TARGET_FPREGS)
9780    for (i = 32; i < 63; i++)
9781      fixed_regs[i] = call_used_regs[i] = 1;
9782}
9783
9784/* Canonicalize a comparison from one we don't have to one we do have.  */
9785
9786static void
9787alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9788			       bool op0_preserve_value)
9789{
9790  if (!op0_preserve_value
9791      && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9792      && (REG_P (*op1) || *op1 == const0_rtx))
9793    {
9794      rtx tem = *op0;
9795      *op0 = *op1;
9796      *op1 = tem;
9797      *code = (int)swap_condition ((enum rtx_code)*code);
9798    }
9799
9800  if ((*code == LT || *code == LTU)
9801      && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9802    {
9803      *code = *code == LT ? LE : LEU;
9804      *op1 = GEN_INT (255);
9805    }
9806}
9807
9808/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
9809
9810static void
9811alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
9812{
9813  const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
9814
9815  tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
9816  tree new_fenv_var, reload_fenv, restore_fnenv;
9817  tree update_call, atomic_feraiseexcept, hold_fnclex;
9818
9819  /* Assume OSF/1 compatible interfaces.  */
9820  if (!TARGET_ABI_OSF)
9821    return;
9822
9823  /* Generate the equivalent of :
9824       unsigned long fenv_var;
9825       fenv_var = __ieee_get_fp_control ();
9826
9827       unsigned long masked_fenv;
9828       masked_fenv = fenv_var & mask;
9829
9830       __ieee_set_fp_control (masked_fenv);  */
9831
9832  fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9833  get_fpscr
9834    = build_fn_decl ("__ieee_get_fp_control",
9835		     build_function_type_list (long_unsigned_type_node, NULL));
9836  set_fpscr
9837    = build_fn_decl ("__ieee_set_fp_control",
9838		     build_function_type_list (void_type_node, NULL));
9839  mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
9840  ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node,
9841		    fenv_var, build_call_expr (get_fpscr, 0));
9842  masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
9843  hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
9844  *hold = build2 (COMPOUND_EXPR, void_type_node,
9845		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
9846		  hold_fnclex);
9847
9848  /* Store the value of masked_fenv to clear the exceptions:
9849     __ieee_set_fp_control (masked_fenv);  */
9850
9851  *clear = build_call_expr (set_fpscr, 1, masked_fenv);
9852
9853  /* Generate the equivalent of :
9854       unsigned long new_fenv_var;
9855       new_fenv_var = __ieee_get_fp_control ();
9856
9857       __ieee_set_fp_control (fenv_var);
9858
9859       __atomic_feraiseexcept (new_fenv_var);  */
9860
9861  new_fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9862  reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var,
9863			build_call_expr (get_fpscr, 0));
9864  restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
9865  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
9866  update_call
9867    = build_call_expr (atomic_feraiseexcept, 1,
9868		       fold_convert (integer_type_node, new_fenv_var));
9869  *update = build2 (COMPOUND_EXPR, void_type_node,
9870		    build2 (COMPOUND_EXPR, void_type_node,
9871			    reload_fenv, restore_fnenv), update_call);
9872}
9873
9874/* Initialize the GCC target structure.  */
9875#if TARGET_ABI_OPEN_VMS
9876# undef TARGET_ATTRIBUTE_TABLE
9877# define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9878# undef TARGET_CAN_ELIMINATE
9879# define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9880#endif
9881
9882#undef TARGET_IN_SMALL_DATA_P
9883#define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9884
9885#undef TARGET_ASM_ALIGNED_HI_OP
9886#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9887#undef TARGET_ASM_ALIGNED_DI_OP
9888#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9889
9890/* Default unaligned ops are provided for ELF systems.  To get unaligned
9891   data for non-ELF systems, we have to turn off auto alignment.  */
9892#if TARGET_ABI_OPEN_VMS
9893#undef TARGET_ASM_UNALIGNED_HI_OP
9894#define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9895#undef TARGET_ASM_UNALIGNED_SI_OP
9896#define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9897#undef TARGET_ASM_UNALIGNED_DI_OP
9898#define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9899#endif
9900
9901#undef  TARGET_ASM_RELOC_RW_MASK
9902#define TARGET_ASM_RELOC_RW_MASK  alpha_elf_reloc_rw_mask
9903#undef	TARGET_ASM_SELECT_RTX_SECTION
9904#define	TARGET_ASM_SELECT_RTX_SECTION  alpha_elf_select_rtx_section
9905#undef  TARGET_SECTION_TYPE_FLAGS
9906#define TARGET_SECTION_TYPE_FLAGS  alpha_elf_section_type_flags
9907
9908#undef TARGET_ASM_FUNCTION_END_PROLOGUE
9909#define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9910
9911#undef TARGET_INIT_LIBFUNCS
9912#define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9913
9914#undef TARGET_LEGITIMIZE_ADDRESS
9915#define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9916#undef TARGET_MODE_DEPENDENT_ADDRESS_P
9917#define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
9918
9919#undef TARGET_ASM_FILE_START
9920#define TARGET_ASM_FILE_START alpha_file_start
9921
9922#undef TARGET_SCHED_ADJUST_COST
9923#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
9924#undef TARGET_SCHED_ISSUE_RATE
9925#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
9926#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
9927#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
9928  alpha_multipass_dfa_lookahead
9929
9930#undef TARGET_HAVE_TLS
9931#define TARGET_HAVE_TLS HAVE_AS_TLS
9932
9933#undef  TARGET_BUILTIN_DECL
9934#define TARGET_BUILTIN_DECL  alpha_builtin_decl
9935#undef  TARGET_INIT_BUILTINS
9936#define TARGET_INIT_BUILTINS alpha_init_builtins
9937#undef  TARGET_EXPAND_BUILTIN
9938#define TARGET_EXPAND_BUILTIN alpha_expand_builtin
9939#undef  TARGET_FOLD_BUILTIN
9940#define TARGET_FOLD_BUILTIN alpha_fold_builtin
9941#undef  TARGET_GIMPLE_FOLD_BUILTIN
9942#define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin
9943
9944#undef TARGET_FUNCTION_OK_FOR_SIBCALL
9945#define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
9946#undef TARGET_CANNOT_COPY_INSN_P
9947#define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
9948#undef TARGET_LEGITIMATE_CONSTANT_P
9949#define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
9950#undef TARGET_CANNOT_FORCE_CONST_MEM
9951#define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
9952
9953#if TARGET_ABI_OSF
9954#undef TARGET_ASM_OUTPUT_MI_THUNK
9955#define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
9956#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9957#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
9958#undef TARGET_STDARG_OPTIMIZE_HOOK
9959#define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
9960#endif
9961
9962#undef TARGET_PRINT_OPERAND
9963#define TARGET_PRINT_OPERAND alpha_print_operand
9964#undef TARGET_PRINT_OPERAND_ADDRESS
9965#define TARGET_PRINT_OPERAND_ADDRESS alpha_print_operand_address
9966#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
9967#define TARGET_PRINT_OPERAND_PUNCT_VALID_P alpha_print_operand_punct_valid_p
9968
9969/* Use 16-bits anchor.  */
9970#undef TARGET_MIN_ANCHOR_OFFSET
9971#define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
9972#undef TARGET_MAX_ANCHOR_OFFSET
9973#define TARGET_MAX_ANCHOR_OFFSET 0x7fff
9974#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9975#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
9976
9977#undef TARGET_REGISTER_MOVE_COST
9978#define TARGET_REGISTER_MOVE_COST alpha_register_move_cost
9979#undef TARGET_MEMORY_MOVE_COST
9980#define TARGET_MEMORY_MOVE_COST alpha_memory_move_cost
9981#undef TARGET_RTX_COSTS
9982#define TARGET_RTX_COSTS alpha_rtx_costs
9983#undef TARGET_ADDRESS_COST
9984#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
9985
9986#undef TARGET_MACHINE_DEPENDENT_REORG
9987#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
9988
9989#undef TARGET_PROMOTE_FUNCTION_MODE
9990#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
9991#undef TARGET_PROMOTE_PROTOTYPES
9992#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
9993
9994#undef TARGET_FUNCTION_VALUE
9995#define TARGET_FUNCTION_VALUE alpha_function_value
9996#undef TARGET_LIBCALL_VALUE
9997#define TARGET_LIBCALL_VALUE alpha_libcall_value
9998#undef TARGET_FUNCTION_VALUE_REGNO_P
9999#define TARGET_FUNCTION_VALUE_REGNO_P alpha_function_value_regno_p
10000#undef TARGET_RETURN_IN_MEMORY
10001#define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10002#undef TARGET_PASS_BY_REFERENCE
10003#define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10004#undef TARGET_SETUP_INCOMING_VARARGS
10005#define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10006#undef TARGET_STRICT_ARGUMENT_NAMING
10007#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10008#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10009#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10010#undef TARGET_SPLIT_COMPLEX_ARG
10011#define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10012#undef TARGET_GIMPLIFY_VA_ARG_EXPR
10013#define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10014#undef TARGET_ARG_PARTIAL_BYTES
10015#define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10016#undef TARGET_FUNCTION_ARG
10017#define TARGET_FUNCTION_ARG alpha_function_arg
10018#undef TARGET_FUNCTION_ARG_ADVANCE
10019#define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
10020#undef TARGET_TRAMPOLINE_INIT
10021#define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
10022
10023#undef TARGET_INSTANTIATE_DECLS
10024#define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
10025
10026#undef TARGET_SECONDARY_RELOAD
10027#define TARGET_SECONDARY_RELOAD alpha_secondary_reload
10028
10029#undef TARGET_SCALAR_MODE_SUPPORTED_P
10030#define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10031#undef TARGET_VECTOR_MODE_SUPPORTED_P
10032#define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10033
10034#undef TARGET_BUILD_BUILTIN_VA_LIST
10035#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10036
10037#undef TARGET_EXPAND_BUILTIN_VA_START
10038#define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
10039
10040#undef TARGET_OPTION_OVERRIDE
10041#define TARGET_OPTION_OVERRIDE alpha_option_override
10042
10043#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10044#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10045  alpha_override_options_after_change
10046
10047#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10048#undef TARGET_MANGLE_TYPE
10049#define TARGET_MANGLE_TYPE alpha_mangle_type
10050#endif
10051
10052#undef TARGET_LRA_P
10053#define TARGET_LRA_P hook_bool_void_false
10054
10055#undef TARGET_LEGITIMATE_ADDRESS_P
10056#define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
10057
10058#undef TARGET_CONDITIONAL_REGISTER_USAGE
10059#define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
10060
10061#undef TARGET_CANONICALIZE_COMPARISON
10062#define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
10063
10064#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10065#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
10066
10067struct gcc_target targetm = TARGET_INITIALIZER;
10068
10069
10070#include "gt-alpha.h"
10071