1/* Subroutines used for code generation on the DEC Alpha.
2   Copyright (C) 1992-2022 Free Software Foundation, Inc.
3   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21
22#define IN_TARGET_CODE 1
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "stringpool.h"
32#include "attribs.h"
33#include "memmodel.h"
34#include "gimple.h"
35#include "df.h"
36#include "predict.h"
37#include "tm_p.h"
38#include "ssa.h"
39#include "expmed.h"
40#include "optabs.h"
41#include "regs.h"
42#include "emit-rtl.h"
43#include "recog.h"
44#include "diagnostic-core.h"
45#include "alias.h"
46#include "fold-const.h"
47#include "stor-layout.h"
48#include "calls.h"
49#include "varasm.h"
50#include "output.h"
51#include "insn-attr.h"
52#include "explow.h"
53#include "expr.h"
54#include "reload.h"
55#include "except.h"
56#include "common/common-target.h"
57#include "debug.h"
58#include "langhooks.h"
59#include "cfgrtl.h"
60#include "tree-pass.h"
61#include "context.h"
62#include "gimple-iterator.h"
63#include "gimplify.h"
64#include "tree-stdarg.h"
65#include "tm-constrs.h"
66#include "libfuncs.h"
67#include "builtins.h"
68#include "rtl-iter.h"
69#include "flags.h"
70#include "opts.h"
71
72/* This file should be included last.  */
73#include "target-def.h"
74
75/* Specify which cpu to schedule for.  */
76enum processor_type alpha_tune;
77
78/* Which cpu we're generating code for.  */
79enum processor_type alpha_cpu;
80
81static const char * const alpha_cpu_name[] =
82{
83  "ev4", "ev5", "ev6"
84};
85
86/* Specify how accurate floating-point traps need to be.  */
87
88enum alpha_trap_precision alpha_tp;
89
90/* Specify the floating-point rounding mode.  */
91
92enum alpha_fp_rounding_mode alpha_fprm;
93
94/* Specify which things cause traps.  */
95
96enum alpha_fp_trap_mode alpha_fptm;
97
98/* Nonzero if inside of a function, because the Alpha asm can't
99   handle .files inside of functions.  */
100
101static int inside_function = FALSE;
102
103/* The number of cycles of latency we should assume on memory reads.  */
104
105static int alpha_memory_latency = 3;
106
107/* Whether the function needs the GP.  */
108
109static int alpha_function_needs_gp;
110
111/* The assembler name of the current function.  */
112
113static const char *alpha_fnname;
114
115/* The next explicit relocation sequence number.  */
116extern GTY(()) int alpha_next_sequence_number;
117int alpha_next_sequence_number = 1;
118
119/* The literal and gpdisp sequence numbers for this insn, as printed
120   by %# and %* respectively.  */
121extern GTY(()) int alpha_this_literal_sequence_number;
122extern GTY(()) int alpha_this_gpdisp_sequence_number;
123int alpha_this_literal_sequence_number;
124int alpha_this_gpdisp_sequence_number;
125
126/* Costs of various operations on the different architectures.  */
127
128struct alpha_rtx_cost_data
129{
130  unsigned char fp_add;
131  unsigned char fp_mult;
132  unsigned char fp_div_sf;
133  unsigned char fp_div_df;
134  unsigned char int_mult_si;
135  unsigned char int_mult_di;
136  unsigned char int_shift;
137  unsigned char int_cmov;
138  unsigned short int_div;
139};
140
141static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
142{
143  { /* EV4 */
144    COSTS_N_INSNS (6),		/* fp_add */
145    COSTS_N_INSNS (6),		/* fp_mult */
146    COSTS_N_INSNS (34),		/* fp_div_sf */
147    COSTS_N_INSNS (63),		/* fp_div_df */
148    COSTS_N_INSNS (23),		/* int_mult_si */
149    COSTS_N_INSNS (23),		/* int_mult_di */
150    COSTS_N_INSNS (2),		/* int_shift */
151    COSTS_N_INSNS (2),		/* int_cmov */
152    COSTS_N_INSNS (97),		/* int_div */
153  },
154  { /* EV5 */
155    COSTS_N_INSNS (4),		/* fp_add */
156    COSTS_N_INSNS (4),		/* fp_mult */
157    COSTS_N_INSNS (15),		/* fp_div_sf */
158    COSTS_N_INSNS (22),		/* fp_div_df */
159    COSTS_N_INSNS (8),		/* int_mult_si */
160    COSTS_N_INSNS (12),		/* int_mult_di */
161    COSTS_N_INSNS (1) + 1,	/* int_shift */
162    COSTS_N_INSNS (1),		/* int_cmov */
163    COSTS_N_INSNS (83),		/* int_div */
164  },
165  { /* EV6 */
166    COSTS_N_INSNS (4),		/* fp_add */
167    COSTS_N_INSNS (4),		/* fp_mult */
168    COSTS_N_INSNS (12),		/* fp_div_sf */
169    COSTS_N_INSNS (15),		/* fp_div_df */
170    COSTS_N_INSNS (7),		/* int_mult_si */
171    COSTS_N_INSNS (7),		/* int_mult_di */
172    COSTS_N_INSNS (1),		/* int_shift */
173    COSTS_N_INSNS (2),		/* int_cmov */
174    COSTS_N_INSNS (86),		/* int_div */
175  },
176};
177
178/* Similar but tuned for code size instead of execution latency.  The
179   extra +N is fractional cost tuning based on latency.  It's used to
180   encourage use of cheaper insns like shift, but only if there's just
181   one of them.  */
182
183static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
184{
185  COSTS_N_INSNS (1),		/* fp_add */
186  COSTS_N_INSNS (1),		/* fp_mult */
187  COSTS_N_INSNS (1),		/* fp_div_sf */
188  COSTS_N_INSNS (1) + 1,	/* fp_div_df */
189  COSTS_N_INSNS (1) + 1,	/* int_mult_si */
190  COSTS_N_INSNS (1) + 2,	/* int_mult_di */
191  COSTS_N_INSNS (1),		/* int_shift */
192  COSTS_N_INSNS (1),		/* int_cmov */
193  COSTS_N_INSNS (6),		/* int_div */
194};
195
196/* Get the number of args of a function in one of two ways.  */
197#if TARGET_ABI_OPEN_VMS
198#define NUM_ARGS crtl->args.info.num_args
199#else
200#define NUM_ARGS crtl->args.info
201#endif
202
203#define REG_PV 27
204#define REG_RA 26
205
206/* Declarations of static functions.  */
207static struct machine_function *alpha_init_machine_status (void);
208static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
209static void alpha_handle_trap_shadows (void);
210static void alpha_align_insns (void);
211static void alpha_override_options_after_change (void);
212
213#if TARGET_ABI_OPEN_VMS
214static void alpha_write_linkage (FILE *, const char *);
215static bool vms_valid_pointer_mode (scalar_int_mode);
216#else
217#define vms_patch_builtins()  gcc_unreachable()
218#endif
219
220static unsigned int
221rest_of_handle_trap_shadows (void)
222{
223  alpha_handle_trap_shadows ();
224  return 0;
225}
226
227namespace {
228
229const pass_data pass_data_handle_trap_shadows =
230{
231  RTL_PASS,
232  "trap_shadows",			/* name */
233  OPTGROUP_NONE,			/* optinfo_flags */
234  TV_NONE,				/* tv_id */
235  0,					/* properties_required */
236  0,					/* properties_provided */
237  0,					/* properties_destroyed */
238  0,					/* todo_flags_start */
239  TODO_df_finish,			/* todo_flags_finish */
240};
241
242class pass_handle_trap_shadows : public rtl_opt_pass
243{
244public:
245  pass_handle_trap_shadows(gcc::context *ctxt)
246    : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
247  {}
248
249  /* opt_pass methods: */
250  virtual bool gate (function *)
251    {
252      return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
253    }
254
255  virtual unsigned int execute (function *)
256    {
257      return rest_of_handle_trap_shadows ();
258    }
259
260}; // class pass_handle_trap_shadows
261
262} // anon namespace
263
264rtl_opt_pass *
265make_pass_handle_trap_shadows (gcc::context *ctxt)
266{
267  return new pass_handle_trap_shadows (ctxt);
268}
269
270static unsigned int
271rest_of_align_insns (void)
272{
273  alpha_align_insns ();
274  return 0;
275}
276
277namespace {
278
279const pass_data pass_data_align_insns =
280{
281  RTL_PASS,
282  "align_insns",			/* name */
283  OPTGROUP_NONE,			/* optinfo_flags */
284  TV_NONE,				/* tv_id */
285  0,					/* properties_required */
286  0,					/* properties_provided */
287  0,					/* properties_destroyed */
288  0,					/* todo_flags_start */
289  TODO_df_finish,			/* todo_flags_finish */
290};
291
292class pass_align_insns : public rtl_opt_pass
293{
294public:
295  pass_align_insns(gcc::context *ctxt)
296    : rtl_opt_pass(pass_data_align_insns, ctxt)
297  {}
298
299  /* opt_pass methods: */
300  virtual bool gate (function *)
301    {
302      /* Due to the number of extra trapb insns, don't bother fixing up
303	 alignment when trap precision is instruction.  Moreover, we can
304	 only do our job when sched2 is run.  */
305      return ((alpha_tune == PROCESSOR_EV4
306	       || alpha_tune == PROCESSOR_EV5)
307	      && optimize && !optimize_size
308	      && alpha_tp != ALPHA_TP_INSN
309	      && flag_schedule_insns_after_reload);
310    }
311
312  virtual unsigned int execute (function *)
313    {
314      return rest_of_align_insns ();
315    }
316
317}; // class pass_align_insns
318
319} // anon namespace
320
321rtl_opt_pass *
322make_pass_align_insns (gcc::context *ctxt)
323{
324  return new pass_align_insns (ctxt);
325}
326
327#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
328/* Implement TARGET_MANGLE_TYPE.  */
329
330static const char *
331alpha_mangle_type (const_tree type)
332{
333  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
334      && TARGET_LONG_DOUBLE_128)
335    return "g";
336
337  /* For all other types, use normal C++ mangling.  */
338  return NULL;
339}
340#endif
341
342/* Parse target option strings.  */
343
344static void
345alpha_option_override (void)
346{
347  static const struct cpu_table {
348    const char *const name;
349    const enum processor_type processor;
350    const int flags;
351    const unsigned short line_size; /* in bytes */
352    const unsigned short l1_size;   /* in kb.  */
353    const unsigned short l2_size;   /* in kb.  */
354  } cpu_table[] = {
355    /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
356       EV4/EV45 had 128k to 16M 32-byte direct Bcache.  LCA45
357       had 64k to 8M 8-byte direct Bcache.  */
358    { "ev4",	PROCESSOR_EV4, 0, 32, 8, 8*1024 },
359    { "21064",	PROCESSOR_EV4, 0, 32, 8, 8*1024 },
360    { "ev45",	PROCESSOR_EV4, 0, 32, 16, 16*1024 },
361
362    /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
363       and 1M to 16M 64 byte L3 (not modeled).
364       PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
365       PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache.  */
366    { "ev5",	PROCESSOR_EV5, 0, 32, 8, 96 },
367    { "21164",	PROCESSOR_EV5, 0, 32, 8, 96 },
368    { "ev56",	PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
369    { "21164a",	PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
370    { "pca56",	PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
371    { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
372    { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
373
374    /* EV6 had 64k 64 byte L1, 1M to 16M Bcache.  */
375    { "ev6",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
376    { "21264",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
377    { "ev67",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
378      64, 64, 16*1024 },
379    { "21264a",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
380      64, 64, 16*1024 }
381  };
382
383  int const ct_size = ARRAY_SIZE (cpu_table);
384  int line_size = 0, l1_size = 0, l2_size = 0;
385  int i;
386
387#ifdef SUBTARGET_OVERRIDE_OPTIONS
388  SUBTARGET_OVERRIDE_OPTIONS;
389#endif
390
391  /* Default to full IEEE compliance mode for Go language.  */
392  if (strcmp (lang_hooks.name, "GNU Go") == 0
393      && !(target_flags_explicit & MASK_IEEE))
394    target_flags |= MASK_IEEE;
395
396  alpha_fprm = ALPHA_FPRM_NORM;
397  alpha_tp = ALPHA_TP_PROG;
398  alpha_fptm = ALPHA_FPTM_N;
399
400  if (TARGET_IEEE)
401    {
402      alpha_tp = ALPHA_TP_INSN;
403      alpha_fptm = ALPHA_FPTM_SU;
404    }
405  if (TARGET_IEEE_WITH_INEXACT)
406    {
407      alpha_tp = ALPHA_TP_INSN;
408      alpha_fptm = ALPHA_FPTM_SUI;
409    }
410
411  if (alpha_tp_string)
412    {
413      if (! strcmp (alpha_tp_string, "p"))
414	alpha_tp = ALPHA_TP_PROG;
415      else if (! strcmp (alpha_tp_string, "f"))
416	alpha_tp = ALPHA_TP_FUNC;
417      else if (! strcmp (alpha_tp_string, "i"))
418	alpha_tp = ALPHA_TP_INSN;
419      else
420	error ("bad value %qs for %<-mtrap-precision%> switch",
421	       alpha_tp_string);
422    }
423
424  if (alpha_fprm_string)
425    {
426      if (! strcmp (alpha_fprm_string, "n"))
427	alpha_fprm = ALPHA_FPRM_NORM;
428      else if (! strcmp (alpha_fprm_string, "m"))
429	alpha_fprm = ALPHA_FPRM_MINF;
430      else if (! strcmp (alpha_fprm_string, "c"))
431	alpha_fprm = ALPHA_FPRM_CHOP;
432      else if (! strcmp (alpha_fprm_string,"d"))
433	alpha_fprm = ALPHA_FPRM_DYN;
434      else
435	error ("bad value %qs for %<-mfp-rounding-mode%> switch",
436	       alpha_fprm_string);
437    }
438
439  if (alpha_fptm_string)
440    {
441      if (strcmp (alpha_fptm_string, "n") == 0)
442	alpha_fptm = ALPHA_FPTM_N;
443      else if (strcmp (alpha_fptm_string, "u") == 0)
444	alpha_fptm = ALPHA_FPTM_U;
445      else if (strcmp (alpha_fptm_string, "su") == 0)
446	alpha_fptm = ALPHA_FPTM_SU;
447      else if (strcmp (alpha_fptm_string, "sui") == 0)
448	alpha_fptm = ALPHA_FPTM_SUI;
449      else
450	error ("bad value %qs for %<-mfp-trap-mode%> switch",
451	       alpha_fptm_string);
452    }
453
454  if (alpha_cpu_string)
455    {
456      for (i = 0; i < ct_size; i++)
457	if (! strcmp (alpha_cpu_string, cpu_table [i].name))
458	  {
459	    alpha_tune = alpha_cpu = cpu_table[i].processor;
460	    line_size = cpu_table[i].line_size;
461	    l1_size = cpu_table[i].l1_size;
462	    l2_size = cpu_table[i].l2_size;
463	    target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
464	    target_flags |= cpu_table[i].flags;
465	    break;
466	  }
467      if (i == ct_size)
468	error ("bad value %qs for %<-mcpu%> switch", alpha_cpu_string);
469    }
470
471  if (alpha_tune_string)
472    {
473      for (i = 0; i < ct_size; i++)
474	if (! strcmp (alpha_tune_string, cpu_table [i].name))
475	  {
476	    alpha_tune = cpu_table[i].processor;
477	    line_size = cpu_table[i].line_size;
478	    l1_size = cpu_table[i].l1_size;
479	    l2_size = cpu_table[i].l2_size;
480	    break;
481	  }
482      if (i == ct_size)
483	error ("bad value %qs for %<-mtune%> switch", alpha_tune_string);
484    }
485
486  if (line_size)
487    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
488			 param_l1_cache_line_size, line_size);
489  if (l1_size)
490    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
491			 param_l1_cache_size, l1_size);
492  if (l2_size)
493    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
494			 param_l2_cache_size, l2_size);
495
496  /* Do some sanity checks on the above options.  */
497
498  if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
499      && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
500    {
501      warning (0, "fp software completion requires %<-mtrap-precision=i%>");
502      alpha_tp = ALPHA_TP_INSN;
503    }
504
505  if (alpha_cpu == PROCESSOR_EV6)
506    {
507      /* Except for EV6 pass 1 (not released), we always have precise
508	 arithmetic traps.  Which means we can do software completion
509	 without minding trap shadows.  */
510      alpha_tp = ALPHA_TP_PROG;
511    }
512
513  if (TARGET_FLOAT_VAX)
514    {
515      if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
516	{
517	  warning (0, "rounding mode not supported for VAX floats");
518	  alpha_fprm = ALPHA_FPRM_NORM;
519	}
520      if (alpha_fptm == ALPHA_FPTM_SUI)
521	{
522	  warning (0, "trap mode not supported for VAX floats");
523	  alpha_fptm = ALPHA_FPTM_SU;
524	}
525      if (target_flags_explicit & MASK_LONG_DOUBLE_128)
526	warning (0, "128-bit %<long double%> not supported for VAX floats");
527      target_flags &= ~MASK_LONG_DOUBLE_128;
528    }
529
530  {
531    char *end;
532    int lat;
533
534    if (!alpha_mlat_string)
535      alpha_mlat_string = "L1";
536
537    if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
538	&& (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
539      ;
540    else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
541	     && ISDIGIT ((unsigned char)alpha_mlat_string[1])
542	     && alpha_mlat_string[2] == '\0')
543      {
544	static int const cache_latency[][4] =
545	{
546	  { 3, 30, -1 },	/* ev4 -- Bcache is a guess */
547	  { 2, 12, 38 },	/* ev5 -- Bcache from PC164 LMbench numbers */
548	  { 3, 12, 30 },	/* ev6 -- Bcache from DS20 LMbench.  */
549	};
550
551	lat = alpha_mlat_string[1] - '0';
552	if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
553	  {
554	    warning (0, "L%d cache latency unknown for %s",
555		     lat, alpha_cpu_name[alpha_tune]);
556	    lat = 3;
557	  }
558	else
559	  lat = cache_latency[alpha_tune][lat-1];
560      }
561    else if (! strcmp (alpha_mlat_string, "main"))
562      {
563	/* Most current memories have about 370ns latency.  This is
564	   a reasonable guess for a fast cpu.  */
565	lat = 150;
566      }
567    else
568      {
569	warning (0, "bad value %qs for %<-mmemory-latency%>",
570		 alpha_mlat_string);
571	lat = 3;
572      }
573
574    alpha_memory_latency = lat;
575  }
576
577  /* Default the definition of "small data" to 8 bytes.  */
578  if (!OPTION_SET_P (g_switch_value))
579    g_switch_value = 8;
580
581  /* Infer TARGET_SMALL_DATA from -fpic/-fPIC.  */
582  if (flag_pic == 1)
583    target_flags |= MASK_SMALL_DATA;
584  else if (flag_pic == 2)
585    target_flags &= ~MASK_SMALL_DATA;
586
587  alpha_override_options_after_change ();
588
589  /* Register variables and functions with the garbage collector.  */
590
591  /* Set up function hooks.  */
592  init_machine_status = alpha_init_machine_status;
593
594  /* Tell the compiler when we're using VAX floating point.  */
595  if (TARGET_FLOAT_VAX)
596    {
597      REAL_MODE_FORMAT (SFmode) = &vax_f_format;
598      REAL_MODE_FORMAT (DFmode) = &vax_g_format;
599      REAL_MODE_FORMAT (TFmode) = NULL;
600    }
601
602#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
603  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
604    target_flags |= MASK_LONG_DOUBLE_128;
605#endif
606
607}
608
609/* Implement targetm.override_options_after_change.  */
610
611static void
612alpha_override_options_after_change (void)
613{
614  /* Align labels and loops for optimal branching.  */
615  /* ??? Kludge these by not doing anything if we don't optimize.  */
616  if (optimize > 0)
617    {
618      if (flag_align_loops && !str_align_loops)
619	str_align_loops = "16";
620      if (flag_align_jumps && !str_align_jumps)
621	str_align_jumps = "16";
622    }
623  if (flag_align_functions && !str_align_functions)
624    str_align_functions = "16";
625}
626
627/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
628
629int
630zap_mask (HOST_WIDE_INT value)
631{
632  int i;
633
634  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
635       i++, value >>= 8)
636    if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
637      return 0;
638
639  return 1;
640}
641
642/* Return true if OP is valid for a particular TLS relocation.
643   We are already guaranteed that OP is a CONST.  */
644
645int
646tls_symbolic_operand_1 (rtx op, int size, int unspec)
647{
648  op = XEXP (op, 0);
649
650  if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
651    return 0;
652  op = XVECEXP (op, 0, 0);
653
654  if (GET_CODE (op) != SYMBOL_REF)
655    return 0;
656
657  switch (SYMBOL_REF_TLS_MODEL (op))
658    {
659    case TLS_MODEL_LOCAL_DYNAMIC:
660      return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
661    case TLS_MODEL_INITIAL_EXEC:
662      return unspec == UNSPEC_TPREL && size == 64;
663    case TLS_MODEL_LOCAL_EXEC:
664      return unspec == UNSPEC_TPREL && size == alpha_tls_size;
665    default:
666      gcc_unreachable ();
667    }
668}
669
670/* Used by aligned_memory_operand and unaligned_memory_operand to
671   resolve what reload is going to do with OP if it's a register.  */
672
673rtx
674resolve_reload_operand (rtx op)
675{
676  if (reload_in_progress)
677    {
678      rtx tmp = op;
679      if (SUBREG_P (tmp))
680	tmp = SUBREG_REG (tmp);
681      if (REG_P (tmp)
682	  && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
683	{
684	  op = reg_equiv_memory_loc (REGNO (tmp));
685	  if (op == 0)
686	    return 0;
687	}
688    }
689  return op;
690}
691
692/* The scalar modes supported differs from the default check-what-c-supports
693   version in that sometimes TFmode is available even when long double
694   indicates only DFmode.  */
695
696static bool
697alpha_scalar_mode_supported_p (scalar_mode mode)
698{
699  switch (mode)
700    {
701    case E_QImode:
702    case E_HImode:
703    case E_SImode:
704    case E_DImode:
705    case E_TImode: /* via optabs.cc */
706      return true;
707
708    case E_SFmode:
709    case E_DFmode:
710      return true;
711
712    case E_TFmode:
713      return TARGET_HAS_XFLOATING_LIBS;
714
715    default:
716      return false;
717    }
718}
719
720/* Alpha implements a couple of integer vector mode operations when
721   TARGET_MAX is enabled.  We do not check TARGET_MAX here, however,
722   which allows the vectorizer to operate on e.g. move instructions,
723   or when expand_vector_operations can do something useful.  */
724
725static bool
726alpha_vector_mode_supported_p (machine_mode mode)
727{
728  return mode == V8QImode || mode == V4HImode || mode == V2SImode;
729}
730
731/* Return the TLS model to use for SYMBOL.  */
732
733static enum tls_model
734tls_symbolic_operand_type (rtx symbol)
735{
736  enum tls_model model;
737
738  if (GET_CODE (symbol) != SYMBOL_REF)
739    return TLS_MODEL_NONE;
740  model = SYMBOL_REF_TLS_MODEL (symbol);
741
742  /* Local-exec with a 64-bit size is the same code as initial-exec.  */
743  if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
744    model = TLS_MODEL_INITIAL_EXEC;
745
746  return model;
747}
748
749/* Return true if the function DECL will share the same GP as any
750   function in the current unit of translation.  */
751
752static bool
753decl_has_samegp (const_tree decl)
754{
755  /* Functions that are not local can be overridden, and thus may
756     not share the same gp.  */
757  if (!(*targetm.binds_local_p) (decl))
758    return false;
759
760  /* If -msmall-data is in effect, assume that there is only one GP
761     for the module, and so any local symbol has this property.  We
762     need explicit relocations to be able to enforce this for symbols
763     not defined in this unit of translation, however.  */
764  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
765    return true;
766
767  /* Functions that are not external are defined in this UoT.  */
768  /* ??? Irritatingly, static functions not yet emitted are still
769     marked "external".  Apply this to non-static functions only.  */
770  return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
771}
772
773/* Return true if EXP should be placed in the small data section.  */
774
775static bool
776alpha_in_small_data_p (const_tree exp)
777{
778  /* We want to merge strings, so we never consider them small data.  */
779  if (TREE_CODE (exp) == STRING_CST)
780    return false;
781
782  /* Functions are never in the small data area.  Duh.  */
783  if (TREE_CODE (exp) == FUNCTION_DECL)
784    return false;
785
786  /* COMMON symbols are never small data.  */
787  if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp))
788    return false;
789
790  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
791    {
792      const char *section = DECL_SECTION_NAME (exp);
793      if (strcmp (section, ".sdata") == 0
794	  || strcmp (section, ".sbss") == 0)
795	return true;
796    }
797  else
798    {
799      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
800
801      /* If this is an incomplete type with size 0, then we can't put it
802	 in sdata because it might be too big when completed.  */
803      if (size > 0 && size <= g_switch_value)
804	return true;
805    }
806
807  return false;
808}
809
810#if TARGET_ABI_OPEN_VMS
811static bool
812vms_valid_pointer_mode (scalar_int_mode mode)
813{
814  return (mode == SImode || mode == DImode);
815}
816
817static bool
818alpha_linkage_symbol_p (const char *symname)
819{
820  int symlen = strlen (symname);
821
822  if (symlen > 4)
823    return strcmp (&symname [symlen - 4], "..lk") == 0;
824
825  return false;
826}
827
828#define LINKAGE_SYMBOL_REF_P(X) \
829  ((GET_CODE (X) == SYMBOL_REF   \
830    && alpha_linkage_symbol_p (XSTR (X, 0))) \
831   || (GET_CODE (X) == CONST                 \
832       && GET_CODE (XEXP (X, 0)) == PLUS     \
833       && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
834       && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
835#endif
836
837/* legitimate_address_p recognizes an RTL expression that is a valid
838   memory address for an instruction.  The MODE argument is the
839   machine mode for the MEM expression that wants to use this address.
840
841   For Alpha, we have either a constant address or the sum of a
842   register and a constant address, or just a register.  For DImode,
843   any of those forms can be surrounded with an AND that clear the
844   low-order three bits; this is an "unaligned" access.  */
845
846static bool
847alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict)
848{
849  /* If this is an ldq_u type address, discard the outer AND.  */
850  if (mode == DImode
851      && GET_CODE (x) == AND
852      && CONST_INT_P (XEXP (x, 1))
853      && INTVAL (XEXP (x, 1)) == -8)
854    x = XEXP (x, 0);
855
856  /* Discard non-paradoxical subregs.  */
857  if (SUBREG_P (x)
858      && (GET_MODE_SIZE (GET_MODE (x))
859	  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
860    x = SUBREG_REG (x);
861
862  /* Unadorned general registers are valid.  */
863  if (REG_P (x)
864      && (strict
865	  ? STRICT_REG_OK_FOR_BASE_P (x)
866	  : NONSTRICT_REG_OK_FOR_BASE_P (x)))
867    return true;
868
869  /* Constant addresses (i.e. +/- 32k) are valid.  */
870  if (CONSTANT_ADDRESS_P (x))
871    return true;
872
873#if TARGET_ABI_OPEN_VMS
874  if (LINKAGE_SYMBOL_REF_P (x))
875    return true;
876#endif
877
878  /* Register plus a small constant offset is valid.  */
879  if (GET_CODE (x) == PLUS)
880    {
881      rtx ofs = XEXP (x, 1);
882      x = XEXP (x, 0);
883
884      /* Discard non-paradoxical subregs.  */
885      if (SUBREG_P (x)
886          && (GET_MODE_SIZE (GET_MODE (x))
887	      < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
888	x = SUBREG_REG (x);
889
890      if (REG_P (x))
891	{
892	  if (! strict
893	      && NONSTRICT_REG_OK_FP_BASE_P (x)
894	      && CONST_INT_P (ofs))
895	    return true;
896	  if ((strict
897	       ? STRICT_REG_OK_FOR_BASE_P (x)
898	       : NONSTRICT_REG_OK_FOR_BASE_P (x))
899	      && CONSTANT_ADDRESS_P (ofs))
900	    return true;
901	}
902    }
903
904  /* If we're managing explicit relocations, LO_SUM is valid, as are small
905     data symbols.  Avoid explicit relocations of modes larger than word
906     mode since i.e. $LC0+8($1) can fold around +/- 32k offset.  */
907  else if (TARGET_EXPLICIT_RELOCS
908	   && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
909    {
910      if (small_symbolic_operand (x, Pmode))
911	return true;
912
913      if (GET_CODE (x) == LO_SUM)
914	{
915	  rtx ofs = XEXP (x, 1);
916	  x = XEXP (x, 0);
917
918	  /* Discard non-paradoxical subregs.  */
919	  if (SUBREG_P (x)
920	      && (GET_MODE_SIZE (GET_MODE (x))
921		  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
922	    x = SUBREG_REG (x);
923
924	  /* Must have a valid base register.  */
925	  if (! (REG_P (x)
926		 && (strict
927		     ? STRICT_REG_OK_FOR_BASE_P (x)
928		     : NONSTRICT_REG_OK_FOR_BASE_P (x))))
929	    return false;
930
931	  /* The symbol must be local.  */
932	  if (local_symbolic_operand (ofs, Pmode)
933	      || dtp32_symbolic_operand (ofs, Pmode)
934	      || tp32_symbolic_operand (ofs, Pmode))
935	    return true;
936	}
937    }
938
939  return false;
940}
941
942/* Build the SYMBOL_REF for __tls_get_addr.  */
943
944static GTY(()) rtx tls_get_addr_libfunc;
945
946static rtx
947get_tls_get_addr (void)
948{
949  if (!tls_get_addr_libfunc)
950    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
951  return tls_get_addr_libfunc;
952}
953
954/* Try machine-dependent ways of modifying an illegitimate address
955   to be legitimate.  If we find one, return the new, valid address.  */
956
957static rtx
958alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
959{
960  HOST_WIDE_INT addend;
961
962  /* If the address is (plus reg const_int) and the CONST_INT is not a
963     valid offset, compute the high part of the constant and add it to
964     the register.  Then our address is (plus temp low-part-const).  */
965  if (GET_CODE (x) == PLUS
966      && REG_P (XEXP (x, 0))
967      && CONST_INT_P (XEXP (x, 1))
968      && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
969    {
970      addend = INTVAL (XEXP (x, 1));
971      x = XEXP (x, 0);
972      goto split_addend;
973    }
974
975  /* If the address is (const (plus FOO const_int)), find the low-order
976     part of the CONST_INT.  Then load FOO plus any high-order part of the
977     CONST_INT into a register.  Our address is (plus reg low-part-const).
978     This is done to reduce the number of GOT entries.  */
979  if (can_create_pseudo_p ()
980      && GET_CODE (x) == CONST
981      && GET_CODE (XEXP (x, 0)) == PLUS
982      && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
983    {
984      addend = INTVAL (XEXP (XEXP (x, 0), 1));
985      x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
986      goto split_addend;
987    }
988
989  /* If we have a (plus reg const), emit the load as in (2), then add
990     the two registers, and finally generate (plus reg low-part-const) as
991     our address.  */
992  if (can_create_pseudo_p ()
993      && GET_CODE (x) == PLUS
994      && REG_P (XEXP (x, 0))
995      && GET_CODE (XEXP (x, 1)) == CONST
996      && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
997      && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
998    {
999      addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1000      x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1001			       XEXP (XEXP (XEXP (x, 1), 0), 0),
1002			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
1003      goto split_addend;
1004    }
1005
1006  /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1007     Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1008     around +/- 32k offset.  */
1009  if (TARGET_EXPLICIT_RELOCS
1010      && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1011      && symbolic_operand (x, Pmode))
1012    {
1013      rtx r0, r16, eqv, tga, tp, dest, seq;
1014      rtx_insn *insn;
1015
1016      switch (tls_symbolic_operand_type (x))
1017	{
1018	case TLS_MODEL_NONE:
1019	  break;
1020
1021	case TLS_MODEL_GLOBAL_DYNAMIC:
1022	  {
1023	    start_sequence ();
1024
1025	    r0 = gen_rtx_REG (Pmode, 0);
1026	    r16 = gen_rtx_REG (Pmode, 16);
1027	    tga = get_tls_get_addr ();
1028	    dest = gen_reg_rtx (Pmode);
1029	    seq = GEN_INT (alpha_next_sequence_number++);
1030
1031	    emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1032	    rtx val = gen_call_value_osf_tlsgd (r0, tga, seq);
1033	    insn = emit_call_insn (val);
1034	    RTL_CONST_CALL_P (insn) = 1;
1035	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1036
1037	    insn = get_insns ();
1038	    end_sequence ();
1039
1040	    emit_libcall_block (insn, dest, r0, x);
1041	    return dest;
1042	  }
1043
1044	case TLS_MODEL_LOCAL_DYNAMIC:
1045	  {
1046	    start_sequence ();
1047
1048	    r0 = gen_rtx_REG (Pmode, 0);
1049	    r16 = gen_rtx_REG (Pmode, 16);
1050	    tga = get_tls_get_addr ();
1051	    scratch = gen_reg_rtx (Pmode);
1052	    seq = GEN_INT (alpha_next_sequence_number++);
1053
1054	    emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1055	    rtx val = gen_call_value_osf_tlsldm (r0, tga, seq);
1056	    insn = emit_call_insn (val);
1057	    RTL_CONST_CALL_P (insn) = 1;
1058	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1059
1060	    insn = get_insns ();
1061	    end_sequence ();
1062
1063	    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1064				  UNSPEC_TLSLDM_CALL);
1065	    emit_libcall_block (insn, scratch, r0, eqv);
1066
1067	    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1068	    eqv = gen_rtx_CONST (Pmode, eqv);
1069
1070	    if (alpha_tls_size == 64)
1071	      {
1072		dest = gen_reg_rtx (Pmode);
1073		emit_insn (gen_rtx_SET (dest, eqv));
1074		emit_insn (gen_adddi3 (dest, dest, scratch));
1075		return dest;
1076	      }
1077	    if (alpha_tls_size == 32)
1078	      {
1079		rtx temp = gen_rtx_HIGH (Pmode, eqv);
1080		temp = gen_rtx_PLUS (Pmode, scratch, temp);
1081		scratch = gen_reg_rtx (Pmode);
1082		emit_insn (gen_rtx_SET (scratch, temp));
1083	      }
1084	    return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1085	  }
1086
1087	case TLS_MODEL_INITIAL_EXEC:
1088	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1089	  eqv = gen_rtx_CONST (Pmode, eqv);
1090	  tp = gen_reg_rtx (Pmode);
1091	  scratch = gen_reg_rtx (Pmode);
1092	  dest = gen_reg_rtx (Pmode);
1093
1094	  emit_insn (gen_get_thread_pointerdi (tp));
1095	  emit_insn (gen_rtx_SET (scratch, eqv));
1096	  emit_insn (gen_adddi3 (dest, tp, scratch));
1097	  return dest;
1098
1099	case TLS_MODEL_LOCAL_EXEC:
1100	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1101	  eqv = gen_rtx_CONST (Pmode, eqv);
1102	  tp = gen_reg_rtx (Pmode);
1103
1104	  emit_insn (gen_get_thread_pointerdi (tp));
1105	  if (alpha_tls_size == 32)
1106	    {
1107	      rtx temp = gen_rtx_HIGH (Pmode, eqv);
1108	      temp = gen_rtx_PLUS (Pmode, tp, temp);
1109	      tp = gen_reg_rtx (Pmode);
1110	      emit_insn (gen_rtx_SET (tp, temp));
1111	    }
1112	  return gen_rtx_LO_SUM (Pmode, tp, eqv);
1113
1114	default:
1115	  gcc_unreachable ();
1116	}
1117
1118      if (local_symbolic_operand (x, Pmode))
1119	{
1120	  if (small_symbolic_operand (x, Pmode))
1121	    return x;
1122	  else
1123	    {
1124	      if (can_create_pseudo_p ())
1125	        scratch = gen_reg_rtx (Pmode);
1126	      emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
1127	      return gen_rtx_LO_SUM (Pmode, scratch, x);
1128	    }
1129	}
1130    }
1131
1132  return NULL;
1133
1134 split_addend:
1135  {
1136    HOST_WIDE_INT low, high;
1137
1138    low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1139    addend -= low;
1140    high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1141    addend -= high;
1142
1143    if (addend)
1144      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1145			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
1146			       1, OPTAB_LIB_WIDEN);
1147    if (high)
1148      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1149			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
1150			       1, OPTAB_LIB_WIDEN);
1151
1152    return plus_constant (Pmode, x, low);
1153  }
1154}
1155
1156
1157/* Try machine-dependent ways of modifying an illegitimate address
1158   to be legitimate.  Return X or the new, valid address.  */
1159
1160static rtx
1161alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1162			  machine_mode mode)
1163{
1164  rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1165  return new_x ? new_x : x;
1166}
1167
1168/* Return true if ADDR has an effect that depends on the machine mode it
1169   is used for.  On the Alpha this is true only for the unaligned modes.
1170   We can simplify the test since we know that the address must be valid.  */
1171
1172static bool
1173alpha_mode_dependent_address_p (const_rtx addr,
1174				addr_space_t as ATTRIBUTE_UNUSED)
1175{
1176  return GET_CODE (addr) == AND;
1177}
1178
1179/* Primarily this is required for TLS symbols, but given that our move
1180   patterns *ought* to be able to handle any symbol at any time, we
1181   should never be spilling symbolic operands to the constant pool, ever.  */
1182
1183static bool
1184alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1185{
1186  enum rtx_code code = GET_CODE (x);
1187  return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1188}
1189
1190/* We do not allow indirect calls to be optimized into sibling calls, nor
1191   can we allow a call to a function with a different GP to be optimized
1192   into a sibcall.  */
1193
1194static bool
1195alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1196{
1197  /* Can't do indirect tail calls, since we don't know if the target
1198     uses the same GP.  */
1199  if (!decl)
1200    return false;
1201
1202  /* Otherwise, we can make a tail call if the target function shares
1203     the same GP.  */
1204  return decl_has_samegp (decl);
1205}
1206
1207bool
1208some_small_symbolic_operand_int (rtx x)
1209{
1210  subrtx_var_iterator::array_type array;
1211  FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
1212    {
1213      rtx x = *iter;
1214      /* Don't re-split.  */
1215      if (GET_CODE (x) == LO_SUM)
1216	iter.skip_subrtxes ();
1217      else if (small_symbolic_operand (x, Pmode))
1218	return true;
1219    }
1220  return false;
1221}
1222
1223rtx
1224split_small_symbolic_operand (rtx x)
1225{
1226  x = copy_insn (x);
1227  subrtx_ptr_iterator::array_type array;
1228  FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
1229    {
1230      rtx *ptr = *iter;
1231      rtx x = *ptr;
1232      /* Don't re-split.  */
1233      if (GET_CODE (x) == LO_SUM)
1234	iter.skip_subrtxes ();
1235      else if (small_symbolic_operand (x, Pmode))
1236	{
1237	  *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1238	  iter.skip_subrtxes ();
1239	}
1240    }
1241  return x;
1242}
1243
1244/* Indicate that INSN cannot be duplicated.  This is true for any insn
1245   that we've marked with gpdisp relocs, since those have to stay in
1246   1-1 correspondence with one another.
1247
1248   Technically we could copy them if we could set up a mapping from one
1249   sequence number to another, across the set of insns to be duplicated.
1250   This seems overly complicated and error-prone since interblock motion
1251   from sched-ebb could move one of the pair of insns to a different block.
1252
1253   Also cannot allow jsr insns to be duplicated.  If they throw exceptions,
1254   then they'll be in a different block from their ldgp.  Which could lead
1255   the bb reorder code to think that it would be ok to copy just the block
1256   containing the call and branch to the block containing the ldgp.  */
1257
1258static bool
1259alpha_cannot_copy_insn_p (rtx_insn *insn)
1260{
1261  if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1262    return false;
1263  if (recog_memoized (insn) >= 0)
1264    return get_attr_cannot_copy (insn);
1265  else
1266    return false;
1267}
1268
1269
1270/* Try a machine-dependent way of reloading an illegitimate address
1271   operand.  If we find one, push the reload and return the new rtx.  */
1272
1273rtx
1274alpha_legitimize_reload_address (rtx x,
1275				 machine_mode mode ATTRIBUTE_UNUSED,
1276				 int opnum, int type,
1277				 int ind_levels ATTRIBUTE_UNUSED)
1278{
1279  /* We must recognize output that we have already generated ourselves.  */
1280  if (GET_CODE (x) == PLUS
1281      && GET_CODE (XEXP (x, 0)) == PLUS
1282      && REG_P (XEXP (XEXP (x, 0), 0))
1283      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1284      && CONST_INT_P (XEXP (x, 1)))
1285    {
1286      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1287		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1288		   opnum, (enum reload_type) type);
1289      return x;
1290    }
1291
1292  /* We wish to handle large displacements off a base register by
1293     splitting the addend across an ldah and the mem insn.  This
1294     cuts number of extra insns needed from 3 to 1.  */
1295  if (GET_CODE (x) == PLUS
1296      && REG_P (XEXP (x, 0))
1297      && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1298      && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1299      && CONST_INT_P (XEXP (x, 1)))
1300    {
1301      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1302      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1303      HOST_WIDE_INT high
1304	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1305
1306      /* Check for 32-bit overflow.  */
1307      if (high + low != val)
1308	return NULL_RTX;
1309
1310      /* Reload the high part into a base reg; leave the low part
1311	 in the mem directly.  */
1312      x = gen_rtx_PLUS (GET_MODE (x),
1313			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1314				      GEN_INT (high)),
1315			GEN_INT (low));
1316
1317      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1318		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1319		   opnum, (enum reload_type) type);
1320      return x;
1321    }
1322
1323  return NULL_RTX;
1324}
1325
1326/* Return the cost of moving between registers of various classes.  Moving
1327   between FLOAT_REGS and anything else except float regs is expensive.
1328   In fact, we make it quite expensive because we really don't want to
1329   do these moves unless it is clearly worth it.  Optimizations may
1330   reduce the impact of not being able to allocate a pseudo to a
1331   hard register.  */
1332
1333static int
1334alpha_register_move_cost (machine_mode /*mode*/,
1335			  reg_class_t from, reg_class_t to)
1336{
1337  if ((from == FLOAT_REGS) == (to == FLOAT_REGS))
1338    return 2;
1339
1340  if (TARGET_FIX)
1341    return (from == FLOAT_REGS) ? 6 : 8;
1342
1343  return 4 + 2 * alpha_memory_latency;
1344}
1345
1346/* Return the cost of moving data of MODE from a register to
1347   or from memory.  On the Alpha, bump this up a bit.  */
1348
1349static int
1350alpha_memory_move_cost (machine_mode /*mode*/, reg_class_t /*regclass*/,
1351			bool /*in*/)
1352{
1353  return 2 * alpha_memory_latency;
1354}
1355
1356/* Compute a (partial) cost for rtx X.  Return true if the complete
1357   cost has been computed, and false if subexpressions should be
1358   scanned.  In either case, *TOTAL contains the cost result.  */
1359
1360static bool
1361alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
1362		 bool speed)
1363{
1364  int code = GET_CODE (x);
1365  bool float_mode_p = FLOAT_MODE_P (mode);
1366  const struct alpha_rtx_cost_data *cost_data;
1367
1368  if (!speed)
1369    cost_data = &alpha_rtx_cost_size;
1370  else
1371    cost_data = &alpha_rtx_cost_data[alpha_tune];
1372
1373  switch (code)
1374    {
1375    case CONST_INT:
1376      /* If this is an 8-bit constant, return zero since it can be used
1377	 nearly anywhere with no cost.  If it is a valid operand for an
1378	 ADD or AND, likewise return 0 if we know it will be used in that
1379	 context.  Otherwise, return 2 since it might be used there later.
1380	 All other constants take at least two insns.  */
1381      if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1382	{
1383	  *total = 0;
1384	  return true;
1385	}
1386      /* FALLTHRU */
1387
1388    case CONST_DOUBLE:
1389    case CONST_WIDE_INT:
1390      if (x == CONST0_RTX (mode))
1391	*total = 0;
1392      else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1393	       || (outer_code == AND && and_operand (x, VOIDmode)))
1394	*total = 0;
1395      else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1396	*total = 2;
1397      else
1398	*total = COSTS_N_INSNS (2);
1399      return true;
1400
1401    case CONST:
1402    case SYMBOL_REF:
1403    case LABEL_REF:
1404      if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1405	*total = COSTS_N_INSNS (outer_code != MEM);
1406      else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1407	*total = COSTS_N_INSNS (1 + (outer_code != MEM));
1408      else if (tls_symbolic_operand_type (x))
1409	/* Estimate of cost for call_pal rduniq.  */
1410	/* ??? How many insns do we emit here?  More than one...  */
1411	*total = COSTS_N_INSNS (15);
1412      else
1413	/* Otherwise we do a load from the GOT.  */
1414	*total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1415      return true;
1416
1417    case HIGH:
1418      /* This is effectively an add_operand.  */
1419      *total = 2;
1420      return true;
1421
1422    case PLUS:
1423    case MINUS:
1424      if (float_mode_p)
1425	*total = cost_data->fp_add;
1426      else if (GET_CODE (XEXP (x, 0)) == ASHIFT
1427	       && const23_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1428	{
1429	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
1430			      (enum rtx_code) outer_code, opno, speed)
1431		    + rtx_cost (XEXP (x, 1), mode,
1432				(enum rtx_code) outer_code, opno, speed)
1433		    + COSTS_N_INSNS (1));
1434	  return true;
1435	}
1436      return false;
1437
1438    case MULT:
1439      if (float_mode_p)
1440	*total = cost_data->fp_mult;
1441      else if (mode == DImode)
1442	*total = cost_data->int_mult_di;
1443      else
1444	*total = cost_data->int_mult_si;
1445      return false;
1446
1447    case ASHIFT:
1448      if (CONST_INT_P (XEXP (x, 1))
1449	  && INTVAL (XEXP (x, 1)) <= 3)
1450	{
1451	  *total = COSTS_N_INSNS (1);
1452	  return false;
1453	}
1454      /* FALLTHRU */
1455
1456    case ASHIFTRT:
1457    case LSHIFTRT:
1458      *total = cost_data->int_shift;
1459      return false;
1460
1461    case IF_THEN_ELSE:
1462      if (float_mode_p)
1463        *total = cost_data->fp_add;
1464      else
1465        *total = cost_data->int_cmov;
1466      return false;
1467
1468    case DIV:
1469    case UDIV:
1470    case MOD:
1471    case UMOD:
1472      if (!float_mode_p)
1473	*total = cost_data->int_div;
1474      else if (mode == SFmode)
1475        *total = cost_data->fp_div_sf;
1476      else
1477        *total = cost_data->fp_div_df;
1478      return false;
1479
1480    case MEM:
1481      *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1482      return true;
1483
1484    case NEG:
1485      if (! float_mode_p)
1486	{
1487	  *total = COSTS_N_INSNS (1);
1488	  return false;
1489	}
1490      /* FALLTHRU */
1491
1492    case ABS:
1493      if (! float_mode_p)
1494	{
1495	  *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1496	  return false;
1497	}
1498      /* FALLTHRU */
1499
1500    case FLOAT:
1501    case UNSIGNED_FLOAT:
1502    case FIX:
1503    case UNSIGNED_FIX:
1504    case FLOAT_TRUNCATE:
1505      *total = cost_data->fp_add;
1506      return false;
1507
1508    case FLOAT_EXTEND:
1509      if (MEM_P (XEXP (x, 0)))
1510	*total = 0;
1511      else
1512	*total = cost_data->fp_add;
1513      return false;
1514
1515    default:
1516      return false;
1517    }
1518}
1519
1520/* REF is an alignable memory location.  Place an aligned SImode
1521   reference into *PALIGNED_MEM and the number of bits to shift into
1522   *PBITNUM.  SCRATCH is a free register for use in reloading out
1523   of range stack slots.  */
1524
1525void
1526get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1527{
1528  rtx base;
1529  HOST_WIDE_INT disp, offset;
1530
1531  gcc_assert (MEM_P (ref));
1532
1533  if (reload_in_progress)
1534    {
1535      base = find_replacement (&XEXP (ref, 0));
1536      gcc_assert (memory_address_p (GET_MODE (ref), base));
1537    }
1538  else
1539    base = XEXP (ref, 0);
1540
1541  if (GET_CODE (base) == PLUS)
1542    disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1543  else
1544    disp = 0;
1545
1546  /* Find the byte offset within an aligned word.  If the memory itself is
1547     claimed to be aligned, believe it.  Otherwise, aligned_memory_operand
1548     will have examined the base register and determined it is aligned, and
1549     thus displacements from it are naturally alignable.  */
1550  if (MEM_ALIGN (ref) >= 32)
1551    offset = 0;
1552  else
1553    offset = disp & 3;
1554
1555  /* The location should not cross aligned word boundary.  */
1556  gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1557	      <= GET_MODE_SIZE (SImode));
1558
1559  /* Access the entire aligned word.  */
1560  *paligned_mem = widen_memory_access (ref, SImode, -offset);
1561
1562  /* Convert the byte offset within the word to a bit offset.  */
1563  offset *= BITS_PER_UNIT;
1564  *pbitnum = GEN_INT (offset);
1565}
1566
1567/* Similar, but just get the address.  Handle the two reload cases.
1568   Add EXTRA_OFFSET to the address we return.  */
1569
1570rtx
1571get_unaligned_address (rtx ref)
1572{
1573  rtx base;
1574  HOST_WIDE_INT offset = 0;
1575
1576  gcc_assert (MEM_P (ref));
1577
1578  if (reload_in_progress)
1579    {
1580      base = find_replacement (&XEXP (ref, 0));
1581      gcc_assert (memory_address_p (GET_MODE (ref), base));
1582    }
1583  else
1584    base = XEXP (ref, 0);
1585
1586  if (GET_CODE (base) == PLUS)
1587    offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1588
1589  return plus_constant (Pmode, base, offset);
1590}
1591
1592/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1593   X is always returned in a register.  */
1594
1595rtx
1596get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1597{
1598  if (GET_CODE (addr) == PLUS)
1599    {
1600      ofs += INTVAL (XEXP (addr, 1));
1601      addr = XEXP (addr, 0);
1602    }
1603
1604  return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1605			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1606}
1607
1608/* On the Alpha, all (non-symbolic) constants except zero go into
1609   a floating-point register via memory.  Note that we cannot
1610   return anything that is not a subset of RCLASS, and that some
1611   symbolic constants cannot be dropped to memory.  */
1612
1613enum reg_class
1614alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1615{
1616  /* Zero is present in any register class.  */
1617  if (x == CONST0_RTX (GET_MODE (x)))
1618    return rclass;
1619
1620  /* These sorts of constants we can easily drop to memory.  */
1621  if (CONST_SCALAR_INT_P (x)
1622      || CONST_DOUBLE_P (x)
1623      || GET_CODE (x) == CONST_VECTOR)
1624    {
1625      if (rclass == FLOAT_REGS)
1626	return NO_REGS;
1627      if (rclass == ALL_REGS)
1628	return GENERAL_REGS;
1629      return rclass;
1630    }
1631
1632  /* All other kinds of constants should not (and in the case of HIGH
1633     cannot) be dropped to memory -- instead we use a GENERAL_REGS
1634     secondary reload.  */
1635  if (CONSTANT_P (x))
1636    return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1637
1638  return rclass;
1639}
1640
1641/* Inform reload about cases where moving X with a mode MODE to a register in
1642   RCLASS requires an extra scratch or immediate register.  Return the class
1643   needed for the immediate register.  */
1644
1645static reg_class_t
1646alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1647			machine_mode mode, secondary_reload_info *sri)
1648{
1649  enum reg_class rclass = (enum reg_class) rclass_i;
1650
1651  /* Loading and storing HImode or QImode values to and from memory
1652     usually requires a scratch register.  */
1653  if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1654    {
1655      if (any_memory_operand (x, mode))
1656	{
1657	  if (in_p)
1658	    {
1659	      if (!aligned_memory_operand (x, mode))
1660		sri->icode = direct_optab_handler (reload_in_optab, mode);
1661	    }
1662	  else
1663	    sri->icode = direct_optab_handler (reload_out_optab, mode);
1664	  return NO_REGS;
1665	}
1666    }
1667
1668  /* We also cannot do integral arithmetic into FP regs, as might result
1669     from register elimination into a DImode fp register.  */
1670  if (rclass == FLOAT_REGS)
1671    {
1672      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1673	return GENERAL_REGS;
1674      if (in_p && INTEGRAL_MODE_P (mode)
1675	  && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1676	return GENERAL_REGS;
1677    }
1678
1679  return NO_REGS;
1680}
1681
1682/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
1683
1684   If we are copying between general and FP registers, we need a memory
1685   location unless the FIX extension is available.  */
1686
1687static bool
1688alpha_secondary_memory_needed (machine_mode, reg_class_t class1,
1689			       reg_class_t class2)
1690{
1691  return (!TARGET_FIX
1692	  && ((class1 == FLOAT_REGS && class2 != FLOAT_REGS)
1693	      || (class2 == FLOAT_REGS && class1 != FLOAT_REGS)));
1694}
1695
1696/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.  If MODE is
1697   floating-point, use it.  Otherwise, widen to a word like the default.
1698   This is needed because we always store integers in FP registers in
1699   quadword format.  This whole area is very tricky!  */
1700
1701static machine_mode
1702alpha_secondary_memory_needed_mode (machine_mode mode)
1703{
1704  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1705    return mode;
1706  if (GET_MODE_SIZE (mode) >= 4)
1707    return mode;
1708  return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require ();
1709}
1710
1711/* Given SEQ, which is an INSN list, look for any MEMs in either
1712   a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1713   volatile flags from REF into each of the MEMs found.  If REF is not
1714   a MEM, don't do anything.  */
1715
1716void
1717alpha_set_memflags (rtx seq, rtx ref)
1718{
1719  rtx_insn *insn;
1720
1721  if (!MEM_P (ref))
1722    return;
1723
1724  /* This is only called from alpha.md, after having had something
1725     generated from one of the insn patterns.  So if everything is
1726     zero, the pattern is already up-to-date.  */
1727  if (!MEM_VOLATILE_P (ref)
1728      && !MEM_NOTRAP_P (ref)
1729      && !MEM_READONLY_P (ref))
1730    return;
1731
1732  subrtx_var_iterator::array_type array;
1733  for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1734    if (INSN_P (insn))
1735      FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
1736	{
1737	  rtx x = *iter;
1738	  if (MEM_P (x))
1739	    {
1740	      MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
1741	      MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
1742	      MEM_READONLY_P (x) = MEM_READONLY_P (ref);
1743	      /* Sadly, we cannot use alias sets because the extra
1744		 aliasing produced by the AND interferes.  Given that
1745		 two-byte quantities are the only thing we would be
1746		 able to differentiate anyway, there does not seem to
1747		 be any point in convoluting the early out of the
1748		 alias check.  */
1749	      iter.skip_subrtxes ();
1750	    }
1751	}
1752    else
1753      gcc_unreachable ();
1754}
1755
1756static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT,
1757				 int, bool);
1758
1759/* Internal routine for alpha_emit_set_const to check for N or below insns.
1760   If NO_OUTPUT is true, then we only check to see if N insns are possible,
1761   and return pc_rtx if successful.  */
1762
1763static rtx
1764alpha_emit_set_const_1 (rtx target, machine_mode mode,
1765			HOST_WIDE_INT c, int n, bool no_output)
1766{
1767  HOST_WIDE_INT new_const;
1768  int i, bits;
1769  /* Use a pseudo if highly optimizing and still generating RTL.  */
1770  rtx subtarget
1771    = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1772  rtx temp, insn;
1773
1774  /* If this is a sign-extended 32-bit constant, we can do this in at most
1775     three insns, so do it if we have enough insns left.  */
1776
1777  if (c >> 31 == -1 || c >> 31 == 0)
1778    {
1779      HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1780      HOST_WIDE_INT tmp1 = c - low;
1781      HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1782      HOST_WIDE_INT extra = 0;
1783
1784      /* If HIGH will be interpreted as negative but the constant is
1785	 positive, we must adjust it to do two ldha insns.  */
1786
1787      if ((high & 0x8000) != 0 && c >= 0)
1788	{
1789	  extra = 0x4000;
1790	  tmp1 -= 0x40000000;
1791	  high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1792	}
1793
1794      if (c == low || (low == 0 && extra == 0))
1795	{
1796	  /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1797	     but that meant that we can't handle INT_MIN on 32-bit machines
1798	     (like NT/Alpha), because we recurse indefinitely through
1799	     emit_move_insn to gen_movdi.  So instead, since we know exactly
1800	     what we want, create it explicitly.  */
1801
1802	  if (no_output)
1803	    return pc_rtx;
1804	  if (target == NULL)
1805	    target = gen_reg_rtx (mode);
1806	  emit_insn (gen_rtx_SET (target, GEN_INT (c)));
1807	  return target;
1808	}
1809      else if (n >= 2 + (extra != 0))
1810	{
1811	  if (no_output)
1812	    return pc_rtx;
1813	  if (!can_create_pseudo_p ())
1814	    {
1815	      emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
1816	      temp = target;
1817	    }
1818	  else
1819	    temp = copy_to_suggested_reg (GEN_INT (high << 16),
1820					  subtarget, mode);
1821
1822	  /* As of 2002-02-23, addsi3 is only available when not optimizing.
1823	     This means that if we go through expand_binop, we'll try to
1824	     generate extensions, etc, which will require new pseudos, which
1825	     will fail during some split phases.  The SImode add patterns
1826	     still exist, but are not named.  So build the insns by hand.  */
1827
1828	  if (extra != 0)
1829	    {
1830	      if (! subtarget)
1831		subtarget = gen_reg_rtx (mode);
1832	      insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1833	      insn = gen_rtx_SET (subtarget, insn);
1834	      emit_insn (insn);
1835	      temp = subtarget;
1836	    }
1837
1838	  if (target == NULL)
1839	    target = gen_reg_rtx (mode);
1840	  insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1841	  insn = gen_rtx_SET (target, insn);
1842	  emit_insn (insn);
1843	  return target;
1844	}
1845    }
1846
1847  /* If we couldn't do it that way, try some other methods.  But if we have
1848     no instructions left, don't bother.  Likewise, if this is SImode and
1849     we can't make pseudos, we can't do anything since the expand_binop
1850     and expand_unop calls will widen and try to make pseudos.  */
1851
1852  if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1853    return 0;
1854
1855  /* Next, see if we can load a related constant and then shift and possibly
1856     negate it to get the constant we want.  Try this once each increasing
1857     numbers of insns.  */
1858
1859  for (i = 1; i < n; i++)
1860    {
1861      /* First, see if minus some low bits, we've an easy load of
1862	 high bits.  */
1863
1864      new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1865      if (new_const != 0)
1866	{
1867          temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1868	  if (temp)
1869	    {
1870	      if (no_output)
1871		return temp;
1872	      return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1873				   target, 0, OPTAB_WIDEN);
1874	    }
1875	}
1876
1877      /* Next try complementing.  */
1878      temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1879      if (temp)
1880	{
1881	  if (no_output)
1882	    return temp;
1883	  return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1884	}
1885
1886      /* Next try to form a constant and do a left shift.  We can do this
1887	 if some low-order bits are zero; the exact_log2 call below tells
1888	 us that information.  The bits we are shifting out could be any
1889	 value, but here we'll just try the 0- and sign-extended forms of
1890	 the constant.  To try to increase the chance of having the same
1891	 constant in more than one insn, start at the highest number of
1892	 bits to shift, but try all possibilities in case a ZAPNOT will
1893	 be useful.  */
1894
1895      bits = exact_log2 (c & -c);
1896      if (bits > 0)
1897	for (; bits > 0; bits--)
1898	  {
1899	    new_const = c >> bits;
1900	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1901	    if (!temp && c < 0)
1902	      {
1903		new_const = (unsigned HOST_WIDE_INT)c >> bits;
1904		temp = alpha_emit_set_const (subtarget, mode, new_const,
1905					     i, no_output);
1906	      }
1907	    if (temp)
1908	      {
1909		if (no_output)
1910		  return temp;
1911	        return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1912				     target, 0, OPTAB_WIDEN);
1913	      }
1914	  }
1915
1916      /* Now try high-order zero bits.  Here we try the shifted-in bits as
1917	 all zero and all ones.  Be careful to avoid shifting outside the
1918	 mode and to avoid shifting outside the host wide int size.  */
1919
1920      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1921	      - floor_log2 (c) - 1);
1922      if (bits > 0)
1923	for (; bits > 0; bits--)
1924	  {
1925	    new_const = c << bits;
1926	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1927	    if (!temp)
1928	      {
1929		new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1930	        temp = alpha_emit_set_const (subtarget, mode, new_const,
1931					     i, no_output);
1932	      }
1933	    if (temp)
1934	      {
1935		if (no_output)
1936		  return temp;
1937		return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1938				     target, 1, OPTAB_WIDEN);
1939	      }
1940	  }
1941
1942      /* Now try high-order 1 bits.  We get that with a sign-extension.
1943	 But one bit isn't enough here.  Be careful to avoid shifting outside
1944	 the mode and to avoid shifting outside the host wide int size.  */
1945
1946      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1947	      - floor_log2 (~ c) - 2);
1948      if (bits > 0)
1949	for (; bits > 0; bits--)
1950	  {
1951	    new_const = c << bits;
1952	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1953	    if (!temp)
1954	      {
1955		new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1956	        temp = alpha_emit_set_const (subtarget, mode, new_const,
1957					     i, no_output);
1958	      }
1959	    if (temp)
1960	      {
1961		if (no_output)
1962		  return temp;
1963		return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1964				     target, 0, OPTAB_WIDEN);
1965	      }
1966	  }
1967    }
1968
1969  /* Finally, see if can load a value into the target that is the same as the
1970     constant except that all bytes that are 0 are changed to be 0xff.  If we
1971     can, then we can do a ZAPNOT to obtain the desired constant.  */
1972
1973  new_const = c;
1974  for (i = 0; i < 64; i += 8)
1975    if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1976      new_const |= (HOST_WIDE_INT) 0xff << i;
1977
1978  /* We are only called for SImode and DImode.  If this is SImode, ensure that
1979     we are sign extended to a full word.  */
1980
1981  if (mode == SImode)
1982    new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1983
1984  if (new_const != c)
1985    {
1986      temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1987      if (temp)
1988	{
1989	  if (no_output)
1990	    return temp;
1991	  return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1992			       target, 0, OPTAB_WIDEN);
1993	}
1994    }
1995
1996  return 0;
1997}
1998
1999/* Try to output insns to set TARGET equal to the constant C if it can be
2000   done in less than N insns.  Do all computations in MODE.  Returns the place
2001   where the output has been placed if it can be done and the insns have been
2002   emitted.  If it would take more than N insns, zero is returned and no
2003   insns and emitted.  */
2004
2005static rtx
2006alpha_emit_set_const (rtx target, machine_mode mode,
2007		      HOST_WIDE_INT c, int n, bool no_output)
2008{
2009  machine_mode orig_mode = mode;
2010  rtx orig_target = target;
2011  rtx result = 0;
2012  int i;
2013
2014  /* If we can't make any pseudos, TARGET is an SImode hard register, we
2015     can't load this constant in one insn, do this in DImode.  */
2016  if (!can_create_pseudo_p () && mode == SImode
2017      && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
2018    {
2019      result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
2020      if (result)
2021	return result;
2022
2023      target = no_output ? NULL : gen_lowpart (DImode, target);
2024      mode = DImode;
2025    }
2026  else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2027    {
2028      target = no_output ? NULL : gen_lowpart (DImode, target);
2029      mode = DImode;
2030    }
2031
2032  /* Try 1 insn, then 2, then up to N.  */
2033  for (i = 1; i <= n; i++)
2034    {
2035      result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2036      if (result)
2037	{
2038	  rtx_insn *insn;
2039	  rtx set;
2040
2041	  if (no_output)
2042	    return result;
2043
2044	  insn = get_last_insn ();
2045	  set = single_set (insn);
2046	  if (! CONSTANT_P (SET_SRC (set)))
2047	    set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2048	  break;
2049	}
2050    }
2051
2052  /* Allow for the case where we changed the mode of TARGET.  */
2053  if (result)
2054    {
2055      if (result == target)
2056	result = orig_target;
2057      else if (mode != orig_mode)
2058	result = gen_lowpart (orig_mode, result);
2059    }
2060
2061  return result;
2062}
2063
2064/* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2065   fall back to a straight forward decomposition.  We do this to avoid
2066   exponential run times encountered when looking for longer sequences
2067   with alpha_emit_set_const.  */
2068
2069static rtx
2070alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
2071{
2072  HOST_WIDE_INT d1, d2, d3, d4;
2073
2074  /* Decompose the entire word */
2075
2076  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2077  c1 -= d1;
2078  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2079  c1 = (c1 - d2) >> 32;
2080  d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2081  c1 -= d3;
2082  d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2083  gcc_assert (c1 == d4);
2084
2085  /* Construct the high word */
2086  if (d4)
2087    {
2088      emit_move_insn (target, GEN_INT (d4));
2089      if (d3)
2090	emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2091    }
2092  else
2093    emit_move_insn (target, GEN_INT (d3));
2094
2095  /* Shift it into place */
2096  emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2097
2098  /* Add in the low bits.  */
2099  if (d2)
2100    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2101  if (d1)
2102    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2103
2104  return target;
2105}
2106
2107/* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits.  */
2108
2109static HOST_WIDE_INT
2110alpha_extract_integer (rtx x)
2111{
2112  if (GET_CODE (x) == CONST_VECTOR)
2113    x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2114
2115  gcc_assert (CONST_INT_P (x));
2116
2117  return INTVAL (x);
2118}
2119
2120/* Implement TARGET_LEGITIMATE_CONSTANT_P.  This is all constants for which
2121   we are willing to load the value into a register via a move pattern.
2122   Normally this is all symbolic constants, integral constants that
2123   take three or fewer instructions, and floating-point zero.  */
2124
2125bool
2126alpha_legitimate_constant_p (machine_mode mode, rtx x)
2127{
2128  HOST_WIDE_INT i0;
2129
2130  switch (GET_CODE (x))
2131    {
2132    case LABEL_REF:
2133    case HIGH:
2134      return true;
2135
2136    case CONST:
2137      if (GET_CODE (XEXP (x, 0)) == PLUS
2138	  && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2139	x = XEXP (XEXP (x, 0), 0);
2140      else
2141	return true;
2142
2143      if (GET_CODE (x) != SYMBOL_REF)
2144	return true;
2145      /* FALLTHRU */
2146
2147    case SYMBOL_REF:
2148      /* TLS symbols are never valid.  */
2149      return SYMBOL_REF_TLS_MODEL (x) == 0;
2150
2151    case CONST_WIDE_INT:
2152      if (TARGET_BUILD_CONSTANTS)
2153	return true;
2154      if (x == CONST0_RTX (mode))
2155	return true;
2156      mode = DImode;
2157      gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
2158      i0 = CONST_WIDE_INT_ELT (x, 1);
2159      if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
2160	return false;
2161      i0 = CONST_WIDE_INT_ELT (x, 0);
2162      goto do_integer;
2163
2164    case CONST_DOUBLE:
2165      if (x == CONST0_RTX (mode))
2166	return true;
2167      return false;
2168
2169    case CONST_VECTOR:
2170      if (x == CONST0_RTX (mode))
2171	return true;
2172      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2173	return false;
2174      if (GET_MODE_SIZE (mode) != 8)
2175	return false;
2176      /* FALLTHRU */
2177
2178    case CONST_INT:
2179      if (TARGET_BUILD_CONSTANTS)
2180	return true;
2181      i0 = alpha_extract_integer (x);
2182    do_integer:
2183      return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
2184
2185    default:
2186      return false;
2187    }
2188}
2189
2190/* Operand 1 is known to be a constant, and should require more than one
2191   instruction to load.  Emit that multi-part load.  */
2192
2193bool
2194alpha_split_const_mov (machine_mode mode, rtx *operands)
2195{
2196  HOST_WIDE_INT i0;
2197  rtx temp = NULL_RTX;
2198
2199  i0 = alpha_extract_integer (operands[1]);
2200
2201  temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2202
2203  if (!temp && TARGET_BUILD_CONSTANTS)
2204    temp = alpha_emit_set_long_const (operands[0], i0);
2205
2206  if (temp)
2207    {
2208      if (!rtx_equal_p (operands[0], temp))
2209	emit_move_insn (operands[0], temp);
2210      return true;
2211    }
2212
2213  return false;
2214}
2215
2216/* Expand a move instruction; return true if all work is done.
2217   We don't handle non-bwx subword loads here.  */
2218
2219bool
2220alpha_expand_mov (machine_mode mode, rtx *operands)
2221{
2222  rtx tmp;
2223
2224  /* If the output is not a register, the input must be.  */
2225  if (MEM_P (operands[0])
2226      && ! reg_or_0_operand (operands[1], mode))
2227    operands[1] = force_reg (mode, operands[1]);
2228
2229  /* Allow legitimize_address to perform some simplifications.  */
2230  if (mode == Pmode && symbolic_operand (operands[1], mode))
2231    {
2232      tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2233      if (tmp)
2234	{
2235	  if (tmp == operands[0])
2236	    return true;
2237	  operands[1] = tmp;
2238	  return false;
2239	}
2240    }
2241
2242  /* Early out for non-constants and valid constants.  */
2243  if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2244    return false;
2245
2246  /* Split large integers.  */
2247  if (CONST_INT_P (operands[1])
2248      || GET_CODE (operands[1]) == CONST_VECTOR)
2249    {
2250      if (alpha_split_const_mov (mode, operands))
2251	return true;
2252    }
2253
2254  /* Otherwise we've nothing left but to drop the thing to memory.  */
2255  tmp = force_const_mem (mode, operands[1]);
2256
2257  if (tmp == NULL_RTX)
2258    return false;
2259
2260  if (reload_in_progress)
2261    {
2262      emit_move_insn (operands[0], XEXP (tmp, 0));
2263      operands[1] = replace_equiv_address (tmp, operands[0]);
2264    }
2265  else
2266    operands[1] = validize_mem (tmp);
2267  return false;
2268}
2269
2270/* Expand a non-bwx QImode or HImode move instruction;
2271   return true if all work is done.  */
2272
2273bool
2274alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
2275{
2276  rtx seq;
2277
2278  /* If the output is not a register, the input must be.  */
2279  if (MEM_P (operands[0]))
2280    operands[1] = force_reg (mode, operands[1]);
2281
2282  /* Handle four memory cases, unaligned and aligned for either the input
2283     or the output.  The only case where we can be called during reload is
2284     for aligned loads; all other cases require temporaries.  */
2285
2286  if (any_memory_operand (operands[1], mode))
2287    {
2288      if (aligned_memory_operand (operands[1], mode))
2289	{
2290	  if (reload_in_progress)
2291	    {
2292	      seq = gen_reload_in_aligned (mode, operands[0], operands[1]);
2293	      emit_insn (seq);
2294	    }
2295	  else
2296	    {
2297	      rtx aligned_mem, bitnum;
2298	      rtx scratch = gen_reg_rtx (SImode);
2299	      rtx subtarget;
2300	      bool copyout;
2301
2302	      get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2303
2304	      subtarget = operands[0];
2305	      if (REG_P (subtarget))
2306		subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2307	      else
2308		subtarget = gen_reg_rtx (DImode), copyout = true;
2309
2310	      if (mode == QImode)
2311		seq = gen_aligned_loadqi (subtarget, aligned_mem,
2312					  bitnum, scratch);
2313	      else
2314		seq = gen_aligned_loadhi (subtarget, aligned_mem,
2315					  bitnum, scratch);
2316	      emit_insn (seq);
2317
2318	      if (copyout)
2319		emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2320	    }
2321	}
2322      else
2323	{
2324	  /* Don't pass these as parameters since that makes the generated
2325	     code depend on parameter evaluation order which will cause
2326	     bootstrap failures.  */
2327
2328	  rtx temp1, temp2, subtarget, ua;
2329	  bool copyout;
2330
2331	  temp1 = gen_reg_rtx (DImode);
2332	  temp2 = gen_reg_rtx (DImode);
2333
2334	  subtarget = operands[0];
2335	  if (REG_P (subtarget))
2336	    subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2337	  else
2338	    subtarget = gen_reg_rtx (DImode), copyout = true;
2339
2340	  ua = get_unaligned_address (operands[1]);
2341	  if (mode == QImode)
2342	    seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2343	  else
2344	    seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2345
2346	  alpha_set_memflags (seq, operands[1]);
2347	  emit_insn (seq);
2348
2349	  if (copyout)
2350	    emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2351	}
2352      return true;
2353    }
2354
2355  if (any_memory_operand (operands[0], mode))
2356    {
2357      if (aligned_memory_operand (operands[0], mode))
2358	{
2359	  rtx aligned_mem, bitnum;
2360	  rtx temp1 = gen_reg_rtx (SImode);
2361	  rtx temp2 = gen_reg_rtx (SImode);
2362
2363	  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2364
2365	  emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2366					temp1, temp2));
2367	}
2368      else
2369	{
2370	  rtx temp1 = gen_reg_rtx (DImode);
2371	  rtx temp2 = gen_reg_rtx (DImode);
2372	  rtx temp3 = gen_reg_rtx (DImode);
2373	  rtx ua = get_unaligned_address (operands[0]);
2374
2375	  seq = gen_unaligned_store
2376	    (mode, ua, operands[1], temp1, temp2, temp3);
2377
2378	  alpha_set_memflags (seq, operands[0]);
2379	  emit_insn (seq);
2380	}
2381      return true;
2382    }
2383
2384  return false;
2385}
2386
2387/* Implement the movmisalign patterns.  One of the operands is a memory
2388   that is not naturally aligned.  Emit instructions to load it.  */
2389
2390void
2391alpha_expand_movmisalign (machine_mode mode, rtx *operands)
2392{
2393  /* Honor misaligned loads, for those we promised to do so.  */
2394  if (MEM_P (operands[1]))
2395    {
2396      rtx tmp;
2397
2398      if (register_operand (operands[0], mode))
2399	tmp = operands[0];
2400      else
2401	tmp = gen_reg_rtx (mode);
2402
2403      alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2404      if (tmp != operands[0])
2405	emit_move_insn (operands[0], tmp);
2406    }
2407  else if (MEM_P (operands[0]))
2408    {
2409      if (!reg_or_0_operand (operands[1], mode))
2410	operands[1] = force_reg (mode, operands[1]);
2411      alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2412    }
2413  else
2414    gcc_unreachable ();
2415}
2416
2417/* Generate an unsigned DImode to FP conversion.  This is the same code
2418   optabs would emit if we didn't have TFmode patterns.
2419
2420   For SFmode, this is the only construction I've found that can pass
2421   gcc.c-torture/execute/ieee/rbug.c.  No scenario that uses DFmode
2422   intermediates will work, because you'll get intermediate rounding
2423   that ruins the end result.  Some of this could be fixed by turning
2424   on round-to-positive-infinity, but that requires diddling the fpsr,
2425   which kills performance.  I tried turning this around and converting
2426   to a negative number, so that I could turn on /m, but either I did
2427   it wrong or there's something else cause I wound up with the exact
2428   same single-bit error.  There is a branch-less form of this same code:
2429
2430	srl     $16,1,$1
2431	and     $16,1,$2
2432	cmplt   $16,0,$3
2433	or      $1,$2,$2
2434	cmovge  $16,$16,$2
2435	itoft	$3,$f10
2436	itoft	$2,$f11
2437	cvtqs   $f11,$f11
2438	adds    $f11,$f11,$f0
2439	fcmoveq $f10,$f11,$f0
2440
2441   I'm not using it because it's the same number of instructions as
2442   this branch-full form, and it has more serialized long latency
2443   instructions on the critical path.
2444
2445   For DFmode, we can avoid rounding errors by breaking up the word
2446   into two pieces, converting them separately, and adding them back:
2447
2448   LC0: .long 0,0x5f800000
2449
2450	itoft	$16,$f11
2451	lda	$2,LC0
2452	cmplt	$16,0,$1
2453	cpyse	$f11,$f31,$f10
2454	cpyse	$f31,$f11,$f11
2455	s4addq	$1,$2,$1
2456	lds	$f12,0($1)
2457	cvtqt	$f10,$f10
2458	cvtqt	$f11,$f11
2459	addt	$f12,$f10,$f0
2460	addt	$f0,$f11,$f0
2461
2462   This doesn't seem to be a clear-cut win over the optabs form.
2463   It probably all depends on the distribution of numbers being
2464   converted -- in the optabs form, all but high-bit-set has a
2465   much lower minimum execution time.  */
2466
2467void
2468alpha_emit_floatuns (rtx operands[2])
2469{
2470  rtx neglab, donelab, i0, i1, f0, in, out;
2471  machine_mode mode;
2472
2473  out = operands[0];
2474  in = force_reg (DImode, operands[1]);
2475  mode = GET_MODE (out);
2476  neglab = gen_label_rtx ();
2477  donelab = gen_label_rtx ();
2478  i0 = gen_reg_rtx (DImode);
2479  i1 = gen_reg_rtx (DImode);
2480  f0 = gen_reg_rtx (mode);
2481
2482  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2483
2484  emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
2485  emit_jump_insn (gen_jump (donelab));
2486  emit_barrier ();
2487
2488  emit_label (neglab);
2489
2490  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2491  emit_insn (gen_anddi3 (i1, in, const1_rtx));
2492  emit_insn (gen_iordi3 (i0, i0, i1));
2493  emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
2494  emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
2495
2496  emit_label (donelab);
2497}
2498
2499/* Generate the comparison for a conditional branch.  */
2500
2501void
2502alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
2503{
2504  enum rtx_code cmp_code, branch_code;
2505  machine_mode branch_mode = VOIDmode;
2506  enum rtx_code code = GET_CODE (operands[0]);
2507  rtx op0 = operands[1], op1 = operands[2];
2508  rtx tem;
2509
2510  if (cmp_mode == TFmode)
2511    {
2512      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2513      op1 = const0_rtx;
2514      cmp_mode = DImode;
2515    }
2516
2517  /* The general case: fold the comparison code to the types of compares
2518     that we have, choosing the branch as necessary.  */
2519  switch (code)
2520    {
2521    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2522    case UNORDERED:
2523      /* We have these compares.  */
2524      cmp_code = code, branch_code = NE;
2525      break;
2526
2527    case NE:
2528    case ORDERED:
2529      /* These must be reversed.  */
2530      cmp_code = reverse_condition (code), branch_code = EQ;
2531      break;
2532
2533    case GE:  case GT: case GEU:  case GTU:
2534      /* For FP, we swap them, for INT, we reverse them.  */
2535      if (cmp_mode == DFmode)
2536	{
2537	  cmp_code = swap_condition (code);
2538	  branch_code = NE;
2539	  std::swap (op0, op1);
2540	}
2541      else
2542	{
2543	  cmp_code = reverse_condition (code);
2544	  branch_code = EQ;
2545	}
2546      break;
2547
2548    default:
2549      gcc_unreachable ();
2550    }
2551
2552  if (cmp_mode == DFmode)
2553    {
2554      if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2555	{
2556	  /* When we are not as concerned about non-finite values, and we
2557	     are comparing against zero, we can branch directly.  */
2558	  if (op1 == CONST0_RTX (DFmode))
2559	    cmp_code = UNKNOWN, branch_code = code;
2560	  else if (op0 == CONST0_RTX (DFmode))
2561	    {
2562	      /* Undo the swap we probably did just above.  */
2563	      std::swap (op0, op1);
2564	      branch_code = swap_condition (cmp_code);
2565	      cmp_code = UNKNOWN;
2566	    }
2567	}
2568      else
2569	{
2570	  /* ??? We mark the branch mode to be CCmode to prevent the
2571	     compare and branch from being combined, since the compare
2572	     insn follows IEEE rules that the branch does not.  */
2573	  branch_mode = CCmode;
2574	}
2575    }
2576  else
2577    {
2578      /* The following optimizations are only for signed compares.  */
2579      if (code != LEU && code != LTU && code != GEU && code != GTU)
2580	{
2581	  /* Whee.  Compare and branch against 0 directly.  */
2582	  if (op1 == const0_rtx)
2583	    cmp_code = UNKNOWN, branch_code = code;
2584
2585	  /* If the constants doesn't fit into an immediate, but can
2586 	     be generated by lda/ldah, we adjust the argument and
2587 	     compare against zero, so we can use beq/bne directly.  */
2588	  /* ??? Don't do this when comparing against symbols, otherwise
2589	     we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2590	     be declared false out of hand (at least for non-weak).  */
2591	  else if (CONST_INT_P (op1)
2592		   && (code == EQ || code == NE)
2593		   && !(symbolic_operand (op0, VOIDmode)
2594			|| (REG_P (op0) && REG_POINTER (op0))))
2595	    {
2596	      rtx n_op1 = GEN_INT (-INTVAL (op1));
2597
2598	      if (! satisfies_constraint_I (op1)
2599		  && (satisfies_constraint_K (n_op1)
2600		      || satisfies_constraint_L (n_op1)))
2601		cmp_code = PLUS, branch_code = code, op1 = n_op1;
2602	    }
2603	}
2604
2605      if (!reg_or_0_operand (op0, DImode))
2606	op0 = force_reg (DImode, op0);
2607      if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2608	op1 = force_reg (DImode, op1);
2609    }
2610
2611  /* Emit an initial compare instruction, if necessary.  */
2612  tem = op0;
2613  if (cmp_code != UNKNOWN)
2614    {
2615      tem = gen_reg_rtx (cmp_mode);
2616      emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2617    }
2618
2619  /* Emit the branch instruction.  */
2620  tem = gen_rtx_SET (pc_rtx,
2621		     gen_rtx_IF_THEN_ELSE (VOIDmode,
2622					   gen_rtx_fmt_ee (branch_code,
2623							   branch_mode, tem,
2624							   CONST0_RTX (cmp_mode)),
2625					   gen_rtx_LABEL_REF (VOIDmode,
2626							      operands[3]),
2627					   pc_rtx));
2628  emit_jump_insn (tem);
2629}
2630
2631/* Certain simplifications can be done to make invalid setcc operations
2632   valid.  Return the final comparison, or NULL if we can't work.  */
2633
2634bool
2635alpha_emit_setcc (rtx operands[], machine_mode cmp_mode)
2636{
2637  enum rtx_code cmp_code;
2638  enum rtx_code code = GET_CODE (operands[1]);
2639  rtx op0 = operands[2], op1 = operands[3];
2640  rtx tmp;
2641
2642  if (cmp_mode == TFmode)
2643    {
2644      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2645      op1 = const0_rtx;
2646      cmp_mode = DImode;
2647    }
2648
2649  if (cmp_mode == DFmode && !TARGET_FIX)
2650    return 0;
2651
2652  /* The general case: fold the comparison code to the types of compares
2653     that we have, choosing the branch as necessary.  */
2654
2655  cmp_code = UNKNOWN;
2656  switch (code)
2657    {
2658    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2659    case UNORDERED:
2660      /* We have these compares.  */
2661      if (cmp_mode == DFmode)
2662	cmp_code = code, code = NE;
2663      break;
2664
2665    case NE:
2666      if (cmp_mode == DImode && op1 == const0_rtx)
2667	break;
2668      /* FALLTHRU */
2669
2670    case ORDERED:
2671      cmp_code = reverse_condition (code);
2672      code = EQ;
2673      break;
2674
2675    case GE:  case GT: case GEU:  case GTU:
2676      /* These normally need swapping, but for integer zero we have
2677	 special patterns that recognize swapped operands.  */
2678      if (cmp_mode == DImode && op1 == const0_rtx)
2679	break;
2680      code = swap_condition (code);
2681      if (cmp_mode == DFmode)
2682	cmp_code = code, code = NE;
2683      std::swap (op0, op1);
2684      break;
2685
2686    default:
2687      gcc_unreachable ();
2688    }
2689
2690  if (cmp_mode == DImode)
2691    {
2692      if (!register_operand (op0, DImode))
2693	op0 = force_reg (DImode, op0);
2694      if (!reg_or_8bit_operand (op1, DImode))
2695	op1 = force_reg (DImode, op1);
2696    }
2697
2698  /* Emit an initial compare instruction, if necessary.  */
2699  if (cmp_code != UNKNOWN)
2700    {
2701      tmp = gen_reg_rtx (cmp_mode);
2702      emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2703						   op0, op1)));
2704
2705      op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2706      op1 = const0_rtx;
2707    }
2708
2709  /* Emit the setcc instruction.  */
2710  emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode,
2711						       op0, op1)));
2712  return true;
2713}
2714
2715
2716/* Rewrite a comparison against zero CMP of the form
2717   (CODE (cc0) (const_int 0)) so it can be written validly in
2718   a conditional move (if_then_else CMP ...).
2719   If both of the operands that set cc0 are nonzero we must emit
2720   an insn to perform the compare (it can't be done within
2721   the conditional move).  */
2722
2723rtx
2724alpha_emit_conditional_move (rtx cmp, machine_mode mode)
2725{
2726  enum rtx_code code = GET_CODE (cmp);
2727  enum rtx_code cmov_code = NE;
2728  rtx op0 = XEXP (cmp, 0);
2729  rtx op1 = XEXP (cmp, 1);
2730  machine_mode cmp_mode
2731    = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2732  machine_mode cmov_mode = VOIDmode;
2733  int local_fast_math = flag_unsafe_math_optimizations;
2734  rtx tem;
2735
2736  if (cmp_mode == TFmode)
2737    {
2738      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2739      op1 = const0_rtx;
2740      cmp_mode = DImode;
2741    }
2742
2743  gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2744
2745  if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2746    {
2747      enum rtx_code cmp_code;
2748
2749      if (! TARGET_FIX)
2750	return 0;
2751
2752      /* If we have fp<->int register move instructions, do a cmov by
2753	 performing the comparison in fp registers, and move the
2754	 zero/nonzero value to integer registers, where we can then
2755	 use a normal cmov, or vice-versa.  */
2756
2757      switch (code)
2758	{
2759	case EQ: case LE: case LT: case LEU: case LTU:
2760	case UNORDERED:
2761	  /* We have these compares.  */
2762	  cmp_code = code, code = NE;
2763	  break;
2764
2765	case NE:
2766	case ORDERED:
2767	  /* These must be reversed.  */
2768	  cmp_code = reverse_condition (code), code = EQ;
2769	  break;
2770
2771	case GE: case GT: case GEU: case GTU:
2772	  /* These normally need swapping, but for integer zero we have
2773	     special patterns that recognize swapped operands.  */
2774	  if (cmp_mode == DImode && op1 == const0_rtx)
2775	    cmp_code = code, code = NE;
2776	  else
2777	    {
2778	      cmp_code = swap_condition (code);
2779	      code = NE;
2780	      std::swap (op0, op1);
2781	    }
2782	  break;
2783
2784	default:
2785	  gcc_unreachable ();
2786	}
2787
2788      if (cmp_mode == DImode)
2789	{
2790	  if (!reg_or_0_operand (op0, DImode))
2791	    op0 = force_reg (DImode, op0);
2792	  if (!reg_or_8bit_operand (op1, DImode))
2793	    op1 = force_reg (DImode, op1);
2794	}
2795
2796      tem = gen_reg_rtx (cmp_mode);
2797      emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2798						   op0, op1)));
2799
2800      cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode;
2801      op0 = gen_lowpart (cmp_mode, tem);
2802      op1 = CONST0_RTX (cmp_mode);
2803      cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2804      local_fast_math = 1;
2805    }
2806
2807  if (cmp_mode == DImode)
2808    {
2809      if (!reg_or_0_operand (op0, DImode))
2810	op0 = force_reg (DImode, op0);
2811      if (!reg_or_8bit_operand (op1, DImode))
2812	op1 = force_reg (DImode, op1);
2813    }
2814
2815  /* We may be able to use a conditional move directly.
2816     This avoids emitting spurious compares.  */
2817  if (signed_comparison_operator (cmp, VOIDmode)
2818      && (cmp_mode == DImode || local_fast_math)
2819      && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2820    return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2821
2822  /* We can't put the comparison inside the conditional move;
2823     emit a compare instruction and put that inside the
2824     conditional move.  Make sure we emit only comparisons we have;
2825     swap or reverse as necessary.  */
2826
2827  if (!can_create_pseudo_p ())
2828    return NULL_RTX;
2829
2830  switch (code)
2831    {
2832    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2833    case UNORDERED:
2834      /* We have these compares: */
2835      break;
2836
2837    case NE:
2838    case ORDERED:
2839      /* These must be reversed.  */
2840      code = reverse_condition (code);
2841      cmov_code = EQ;
2842      break;
2843
2844    case GE:  case GT:  case GEU:  case GTU:
2845      /* These normally need swapping, but for integer zero we have
2846	 special patterns that recognize swapped operands.  */
2847      if (cmp_mode == DImode && op1 == const0_rtx)
2848	break;
2849      code = swap_condition (code);
2850      std::swap (op0, op1);
2851      break;
2852
2853    default:
2854      gcc_unreachable ();
2855    }
2856
2857  if (cmp_mode == DImode)
2858    {
2859      if (!reg_or_0_operand (op0, DImode))
2860	op0 = force_reg (DImode, op0);
2861      if (!reg_or_8bit_operand (op1, DImode))
2862	op1 = force_reg (DImode, op1);
2863    }
2864
2865  /* ??? We mark the branch mode to be CCmode to prevent the compare
2866     and cmov from being combined, since the compare insn follows IEEE
2867     rules that the cmov does not.  */
2868  if (cmp_mode == DFmode && !local_fast_math)
2869    cmov_mode = CCmode;
2870
2871  tem = gen_reg_rtx (cmp_mode);
2872  emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2873  return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2874}
2875
2876/* Simplify a conditional move of two constants into a setcc with
2877   arithmetic.  This is done with a splitter since combine would
2878   just undo the work if done during code generation.  It also catches
2879   cases we wouldn't have before cse.  */
2880
2881int
2882alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2883			      rtx t_rtx, rtx f_rtx)
2884{
2885  HOST_WIDE_INT t, f, diff;
2886  machine_mode mode;
2887  rtx target, subtarget, tmp;
2888
2889  mode = GET_MODE (dest);
2890  t = INTVAL (t_rtx);
2891  f = INTVAL (f_rtx);
2892  diff = t - f;
2893
2894  if (((code == NE || code == EQ) && diff < 0)
2895      || (code == GE || code == GT))
2896    {
2897      code = reverse_condition (code);
2898      std::swap (t, f);
2899      diff = -diff;
2900    }
2901
2902  subtarget = target = dest;
2903  if (mode != DImode)
2904    {
2905      target = gen_lowpart (DImode, dest);
2906      if (can_create_pseudo_p ())
2907        subtarget = gen_reg_rtx (DImode);
2908      else
2909	subtarget = target;
2910    }
2911  /* Below, we must be careful to use copy_rtx on target and subtarget
2912     in intermediate insns, as they may be a subreg rtx, which may not
2913     be shared.  */
2914
2915  if (f == 0 && exact_log2 (diff) > 0
2916      /* On EV6, we've got enough shifters to make non-arithmetic shifts
2917	 viable over a longer latency cmove.  On EV5, the E0 slot is a
2918	 scarce resource, and on EV4 shift has the same latency as a cmove.  */
2919      && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2920    {
2921      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2922      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2923
2924      tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2925			    GEN_INT (exact_log2 (t)));
2926      emit_insn (gen_rtx_SET (target, tmp));
2927    }
2928  else if (f == 0 && t == -1)
2929    {
2930      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2931      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2932
2933      emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2934    }
2935  else if (diff == 1 || diff == 4 || diff == 8)
2936    {
2937      rtx add_op;
2938
2939      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2940      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2941
2942      if (diff == 1)
2943	emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2944      else
2945	{
2946	  add_op = GEN_INT (f);
2947	  if (sext_add_operand (add_op, mode))
2948	    {
2949	      tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2950				    GEN_INT (exact_log2 (diff)));
2951	      tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2952	      emit_insn (gen_rtx_SET (target, tmp));
2953	    }
2954	  else
2955	    return 0;
2956	}
2957    }
2958  else
2959    return 0;
2960
2961  return 1;
2962}
2963
2964/* Look up the function X_floating library function name for the
2965   given operation.  */
2966
2967struct GTY(()) xfloating_op
2968{
2969  const enum rtx_code code;
2970  const char *const GTY((skip)) osf_func;
2971  const char *const GTY((skip)) vms_func;
2972  rtx libcall;
2973};
2974
2975static GTY(()) struct xfloating_op xfloating_ops[] =
2976{
2977  { PLUS,		"_OtsAddX", "OTS$ADD_X", 0 },
2978  { MINUS,		"_OtsSubX", "OTS$SUB_X", 0 },
2979  { MULT,		"_OtsMulX", "OTS$MUL_X", 0 },
2980  { DIV,		"_OtsDivX", "OTS$DIV_X", 0 },
2981  { EQ,			"_OtsEqlX", "OTS$EQL_X", 0 },
2982  { NE,			"_OtsNeqX", "OTS$NEQ_X", 0 },
2983  { LT,			"_OtsLssX", "OTS$LSS_X", 0 },
2984  { LE,			"_OtsLeqX", "OTS$LEQ_X", 0 },
2985  { GT,			"_OtsGtrX", "OTS$GTR_X", 0 },
2986  { GE,			"_OtsGeqX", "OTS$GEQ_X", 0 },
2987  { FIX,		"_OtsCvtXQ", "OTS$CVTXQ", 0 },
2988  { FLOAT,		"_OtsCvtQX", "OTS$CVTQX", 0 },
2989  { UNSIGNED_FLOAT,	"_OtsCvtQUX", "OTS$CVTQUX", 0 },
2990  { FLOAT_EXTEND,	"_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2991  { FLOAT_TRUNCATE,	"_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2992};
2993
2994static GTY(()) struct xfloating_op vax_cvt_ops[] =
2995{
2996  { FLOAT_EXTEND,	"_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2997  { FLOAT_TRUNCATE,	"_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2998};
2999
3000static rtx
3001alpha_lookup_xfloating_lib_func (enum rtx_code code)
3002{
3003  struct xfloating_op *ops = xfloating_ops;
3004  long n = ARRAY_SIZE (xfloating_ops);
3005  long i;
3006
3007  gcc_assert (TARGET_HAS_XFLOATING_LIBS);
3008
3009  /* How irritating.  Nothing to key off for the main table.  */
3010  if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
3011    {
3012      ops = vax_cvt_ops;
3013      n = ARRAY_SIZE (vax_cvt_ops);
3014    }
3015
3016  for (i = 0; i < n; ++i, ++ops)
3017    if (ops->code == code)
3018      {
3019	rtx func = ops->libcall;
3020	if (!func)
3021	  {
3022	    func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3023				     ? ops->vms_func : ops->osf_func);
3024	    ops->libcall = func;
3025	  }
3026        return func;
3027      }
3028
3029  gcc_unreachable ();
3030}
3031
3032/* Most X_floating operations take the rounding mode as an argument.
3033   Compute that here.  */
3034
3035static int
3036alpha_compute_xfloating_mode_arg (enum rtx_code code,
3037				  enum alpha_fp_rounding_mode round)
3038{
3039  int mode;
3040
3041  switch (round)
3042    {
3043    case ALPHA_FPRM_NORM:
3044      mode = 2;
3045      break;
3046    case ALPHA_FPRM_MINF:
3047      mode = 1;
3048      break;
3049    case ALPHA_FPRM_CHOP:
3050      mode = 0;
3051      break;
3052    case ALPHA_FPRM_DYN:
3053      mode = 4;
3054      break;
3055    default:
3056      gcc_unreachable ();
3057
3058    /* XXX For reference, round to +inf is mode = 3.  */
3059    }
3060
3061  if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3062    mode |= 0x10000;
3063
3064  return mode;
3065}
3066
3067/* Emit an X_floating library function call.
3068
3069   Note that these functions do not follow normal calling conventions:
3070   TFmode arguments are passed in two integer registers (as opposed to
3071   indirect); TFmode return values appear in R16+R17.
3072
3073   FUNC is the function to call.
3074   TARGET is where the output belongs.
3075   OPERANDS are the inputs.
3076   NOPERANDS is the count of inputs.
3077   EQUIV is the expression equivalent for the function.
3078*/
3079
3080static void
3081alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3082			      int noperands, rtx equiv)
3083{
3084  rtx usage = NULL_RTX, reg;
3085  int regno = 16, i;
3086
3087  start_sequence ();
3088
3089  for (i = 0; i < noperands; ++i)
3090    {
3091      switch (GET_MODE (operands[i]))
3092	{
3093	case E_TFmode:
3094	  reg = gen_rtx_REG (TFmode, regno);
3095	  regno += 2;
3096	  break;
3097
3098	case E_DFmode:
3099	  reg = gen_rtx_REG (DFmode, regno + 32);
3100	  regno += 1;
3101	  break;
3102
3103	case E_VOIDmode:
3104	  gcc_assert (CONST_INT_P (operands[i]));
3105	  /* FALLTHRU */
3106	case E_DImode:
3107	  reg = gen_rtx_REG (DImode, regno);
3108	  regno += 1;
3109	  break;
3110
3111	default:
3112	  gcc_unreachable ();
3113	}
3114
3115      emit_move_insn (reg, operands[i]);
3116      use_reg (&usage, reg);
3117    }
3118
3119  switch (GET_MODE (target))
3120    {
3121    case E_TFmode:
3122      reg = gen_rtx_REG (TFmode, 16);
3123      break;
3124    case E_DFmode:
3125      reg = gen_rtx_REG (DFmode, 32);
3126      break;
3127    case E_DImode:
3128      reg = gen_rtx_REG (DImode, 0);
3129      break;
3130    default:
3131      gcc_unreachable ();
3132    }
3133
3134  rtx mem = gen_rtx_MEM (QImode, func);
3135  rtx_insn *tmp = emit_call_insn (gen_call_value (reg, mem, const0_rtx,
3136						  const0_rtx, const0_rtx));
3137  CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3138  RTL_CONST_CALL_P (tmp) = 1;
3139
3140  tmp = get_insns ();
3141  end_sequence ();
3142
3143  emit_libcall_block (tmp, target, reg, equiv);
3144}
3145
3146/* Emit an X_floating library function call for arithmetic (+,-,*,/).  */
3147
3148void
3149alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3150{
3151  rtx func;
3152  int mode;
3153  rtx out_operands[3];
3154
3155  func = alpha_lookup_xfloating_lib_func (code);
3156  mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3157
3158  out_operands[0] = operands[1];
3159  out_operands[1] = operands[2];
3160  out_operands[2] = GEN_INT (mode);
3161  alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3162				gen_rtx_fmt_ee (code, TFmode, operands[1],
3163						operands[2]));
3164}
3165
3166/* Emit an X_floating library function call for a comparison.  */
3167
3168static rtx
3169alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3170{
3171  enum rtx_code cmp_code, res_code;
3172  rtx func, out, operands[2], note;
3173
3174  /* X_floating library comparison functions return
3175	   -1  unordered
3176	    0  false
3177	    1  true
3178     Convert the compare against the raw return value.  */
3179
3180  cmp_code = *pcode;
3181  switch (cmp_code)
3182    {
3183    case UNORDERED:
3184      cmp_code = EQ;
3185      res_code = LT;
3186      break;
3187    case ORDERED:
3188      cmp_code = EQ;
3189      res_code = GE;
3190      break;
3191    case NE:
3192      res_code = NE;
3193      break;
3194    case EQ:
3195    case LT:
3196    case GT:
3197    case LE:
3198    case GE:
3199      res_code = GT;
3200      break;
3201    default:
3202      gcc_unreachable ();
3203    }
3204  *pcode = res_code;
3205
3206  func = alpha_lookup_xfloating_lib_func (cmp_code);
3207
3208  operands[0] = op0;
3209  operands[1] = op1;
3210  out = gen_reg_rtx (DImode);
3211
3212  /* What's actually returned is -1,0,1, not a proper boolean value.  */
3213  note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3214  note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3215  alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3216
3217  return out;
3218}
3219
3220/* Emit an X_floating library function call for a conversion.  */
3221
3222void
3223alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3224{
3225  int noperands = 1, mode;
3226  rtx out_operands[2];
3227  rtx func;
3228  enum rtx_code code = orig_code;
3229
3230  if (code == UNSIGNED_FIX)
3231    code = FIX;
3232
3233  func = alpha_lookup_xfloating_lib_func (code);
3234
3235  out_operands[0] = operands[1];
3236
3237  switch (code)
3238    {
3239    case FIX:
3240      mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3241      out_operands[1] = GEN_INT (mode);
3242      noperands = 2;
3243      break;
3244    case FLOAT_TRUNCATE:
3245      mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3246      out_operands[1] = GEN_INT (mode);
3247      noperands = 2;
3248      break;
3249    default:
3250      break;
3251    }
3252
3253  alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3254				gen_rtx_fmt_e (orig_code,
3255					       GET_MODE (operands[0]),
3256					       operands[1]));
3257}
3258
3259/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3260   DImode moves from OP[2,3] to OP[0,1].  If FIXUP_OVERLAP is true,
3261   guarantee that the sequence
3262     set (OP[0] OP[2])
3263     set (OP[1] OP[3])
3264   is valid.  Naturally, output operand ordering is little-endian.
3265   This is used by *movtf_internal and *movti_internal.  */
3266
3267void
3268alpha_split_tmode_pair (rtx operands[4], machine_mode mode,
3269			bool fixup_overlap)
3270{
3271  switch (GET_CODE (operands[1]))
3272    {
3273    case REG:
3274      operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3275      operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3276      break;
3277
3278    case MEM:
3279      operands[3] = adjust_address (operands[1], DImode, 8);
3280      operands[2] = adjust_address (operands[1], DImode, 0);
3281      break;
3282
3283    CASE_CONST_SCALAR_INT:
3284    case CONST_DOUBLE:
3285      gcc_assert (operands[1] == CONST0_RTX (mode));
3286      operands[2] = operands[3] = const0_rtx;
3287      break;
3288
3289    default:
3290      gcc_unreachable ();
3291    }
3292
3293  switch (GET_CODE (operands[0]))
3294    {
3295    case REG:
3296      operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3297      operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3298      break;
3299
3300    case MEM:
3301      operands[1] = adjust_address (operands[0], DImode, 8);
3302      operands[0] = adjust_address (operands[0], DImode, 0);
3303      break;
3304
3305    default:
3306      gcc_unreachable ();
3307    }
3308
3309  if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3310    {
3311      std::swap (operands[0], operands[1]);
3312      std::swap (operands[2], operands[3]);
3313    }
3314}
3315
3316/* Implement negtf2 or abstf2.  Op0 is destination, op1 is source,
3317   op2 is a register containing the sign bit, operation is the
3318   logical operation to be performed.  */
3319
3320void
3321alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3322{
3323  rtx high_bit = operands[2];
3324  rtx scratch;
3325  int move;
3326
3327  alpha_split_tmode_pair (operands, TFmode, false);
3328
3329  /* Detect three flavors of operand overlap.  */
3330  move = 1;
3331  if (rtx_equal_p (operands[0], operands[2]))
3332    move = 0;
3333  else if (rtx_equal_p (operands[1], operands[2]))
3334    {
3335      if (rtx_equal_p (operands[0], high_bit))
3336	move = 2;
3337      else
3338	move = -1;
3339    }
3340
3341  if (move < 0)
3342    emit_move_insn (operands[0], operands[2]);
3343
3344  /* ??? If the destination overlaps both source tf and high_bit, then
3345     assume source tf is dead in its entirety and use the other half
3346     for a scratch register.  Otherwise "scratch" is just the proper
3347     destination register.  */
3348  scratch = operands[move < 2 ? 1 : 3];
3349
3350  emit_insn ((*operation) (scratch, high_bit, operands[3]));
3351
3352  if (move > 0)
3353    {
3354      emit_move_insn (operands[0], operands[2]);
3355      if (move > 1)
3356	emit_move_insn (operands[1], scratch);
3357    }
3358}
3359
3360/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3361   unaligned data:
3362
3363           unsigned:                       signed:
3364   word:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
3365           ldq_u  r2,X+1(r11)              ldq_u  r2,X+1(r11)
3366           lda    r3,X(r11)                lda    r3,X+2(r11)
3367           extwl  r1,r3,r1                 extql  r1,r3,r1
3368           extwh  r2,r3,r2                 extqh  r2,r3,r2
3369           or     r1.r2.r1                 or     r1,r2,r1
3370                                           sra    r1,48,r1
3371
3372   long:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
3373           ldq_u  r2,X+3(r11)              ldq_u  r2,X+3(r11)
3374           lda    r3,X(r11)                lda    r3,X(r11)
3375           extll  r1,r3,r1                 extll  r1,r3,r1
3376           extlh  r2,r3,r2                 extlh  r2,r3,r2
3377           or     r1.r2.r1                 addl   r1,r2,r1
3378
3379   quad:   ldq_u  r1,X(r11)
3380           ldq_u  r2,X+7(r11)
3381           lda    r3,X(r11)
3382           extql  r1,r3,r1
3383           extqh  r2,r3,r2
3384           or     r1.r2.r1
3385*/
3386
3387void
3388alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3389			     HOST_WIDE_INT ofs, int sign)
3390{
3391  rtx meml, memh, addr, extl, exth, tmp, mema;
3392  machine_mode mode;
3393
3394  if (TARGET_BWX && size == 2)
3395    {
3396      meml = adjust_address (mem, QImode, ofs);
3397      memh = adjust_address (mem, QImode, ofs+1);
3398      extl = gen_reg_rtx (DImode);
3399      exth = gen_reg_rtx (DImode);
3400      emit_insn (gen_zero_extendqidi2 (extl, meml));
3401      emit_insn (gen_zero_extendqidi2 (exth, memh));
3402      exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3403				  NULL, 1, OPTAB_LIB_WIDEN);
3404      addr = expand_simple_binop (DImode, IOR, extl, exth,
3405				  NULL, 1, OPTAB_LIB_WIDEN);
3406
3407      if (sign && GET_MODE (tgt) != HImode)
3408	{
3409	  addr = gen_lowpart (HImode, addr);
3410	  emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3411	}
3412      else
3413	{
3414	  if (GET_MODE (tgt) != DImode)
3415	    addr = gen_lowpart (GET_MODE (tgt), addr);
3416	  emit_move_insn (tgt, addr);
3417	}
3418      return;
3419    }
3420
3421  meml = gen_reg_rtx (DImode);
3422  memh = gen_reg_rtx (DImode);
3423  addr = gen_reg_rtx (DImode);
3424  extl = gen_reg_rtx (DImode);
3425  exth = gen_reg_rtx (DImode);
3426
3427  mema = XEXP (mem, 0);
3428  if (GET_CODE (mema) == LO_SUM)
3429    mema = force_reg (Pmode, mema);
3430
3431  /* AND addresses cannot be in any alias set, since they may implicitly
3432     alias surrounding code.  Ideally we'd have some alias set that
3433     covered all types except those with alignment 8 or higher.  */
3434
3435  tmp = change_address (mem, DImode,
3436			gen_rtx_AND (DImode,
3437				     plus_constant (DImode, mema, ofs),
3438				     GEN_INT (-8)));
3439  set_mem_alias_set (tmp, 0);
3440  emit_move_insn (meml, tmp);
3441
3442  tmp = change_address (mem, DImode,
3443			gen_rtx_AND (DImode,
3444				     plus_constant (DImode, mema,
3445						    ofs + size - 1),
3446				     GEN_INT (-8)));
3447  set_mem_alias_set (tmp, 0);
3448  emit_move_insn (memh, tmp);
3449
3450  if (sign && size == 2)
3451    {
3452      emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3453
3454      emit_insn (gen_extql (extl, meml, addr));
3455      emit_insn (gen_extqh (exth, memh, addr));
3456
3457      /* We must use tgt here for the target.  Alpha-vms port fails if we use
3458	 addr for the target, because addr is marked as a pointer and combine
3459	 knows that pointers are always sign-extended 32-bit values.  */
3460      addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3461      addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3462			   addr, 1, OPTAB_WIDEN);
3463    }
3464  else
3465    {
3466      emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3467      emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3468      switch ((int) size)
3469	{
3470	case 2:
3471	  emit_insn (gen_extwh (exth, memh, addr));
3472	  mode = HImode;
3473	  break;
3474	case 4:
3475	  emit_insn (gen_extlh (exth, memh, addr));
3476	  mode = SImode;
3477	  break;
3478	case 8:
3479	  emit_insn (gen_extqh (exth, memh, addr));
3480	  mode = DImode;
3481	  break;
3482	default:
3483	  gcc_unreachable ();
3484	}
3485
3486      addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3487			   gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3488			   sign, OPTAB_WIDEN);
3489    }
3490
3491  if (addr != tgt)
3492    emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3493}
3494
3495/* Similarly, use ins and msk instructions to perform unaligned stores.  */
3496
3497void
3498alpha_expand_unaligned_store (rtx dst, rtx src,
3499			      HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3500{
3501  rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3502
3503  if (TARGET_BWX && size == 2)
3504    {
3505      if (src != const0_rtx)
3506	{
3507	  dstl = gen_lowpart (QImode, src);
3508	  dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3509				      NULL, 1, OPTAB_LIB_WIDEN);
3510	  dsth = gen_lowpart (QImode, dsth);
3511	}
3512      else
3513	dstl = dsth = const0_rtx;
3514
3515      meml = adjust_address (dst, QImode, ofs);
3516      memh = adjust_address (dst, QImode, ofs+1);
3517
3518      emit_move_insn (meml, dstl);
3519      emit_move_insn (memh, dsth);
3520      return;
3521    }
3522
3523  dstl = gen_reg_rtx (DImode);
3524  dsth = gen_reg_rtx (DImode);
3525  insl = gen_reg_rtx (DImode);
3526  insh = gen_reg_rtx (DImode);
3527
3528  dsta = XEXP (dst, 0);
3529  if (GET_CODE (dsta) == LO_SUM)
3530    dsta = force_reg (Pmode, dsta);
3531
3532  /* AND addresses cannot be in any alias set, since they may implicitly
3533     alias surrounding code.  Ideally we'd have some alias set that
3534     covered all types except those with alignment 8 or higher.  */
3535
3536  meml = change_address (dst, DImode,
3537			 gen_rtx_AND (DImode,
3538				      plus_constant (DImode, dsta, ofs),
3539				      GEN_INT (-8)));
3540  set_mem_alias_set (meml, 0);
3541
3542  memh = change_address (dst, DImode,
3543			 gen_rtx_AND (DImode,
3544				      plus_constant (DImode, dsta,
3545						     ofs + size - 1),
3546				      GEN_INT (-8)));
3547  set_mem_alias_set (memh, 0);
3548
3549  emit_move_insn (dsth, memh);
3550  emit_move_insn (dstl, meml);
3551
3552  addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3553
3554  if (src != CONST0_RTX (GET_MODE (src)))
3555    {
3556      emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3557			    GEN_INT (size*8), addr));
3558
3559      switch ((int) size)
3560	{
3561	case 2:
3562	  emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3563	  break;
3564	case 4:
3565	  emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3566	  break;
3567	case 8:
3568	  emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3569	  break;
3570	default:
3571	  gcc_unreachable ();
3572	}
3573    }
3574
3575  emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3576
3577  switch ((int) size)
3578    {
3579    case 2:
3580      emit_insn (gen_mskwl (dstl, dstl, addr));
3581      break;
3582    case 4:
3583      emit_insn (gen_mskll (dstl, dstl, addr));
3584      break;
3585    case 8:
3586      emit_insn (gen_mskql (dstl, dstl, addr));
3587      break;
3588    default:
3589      gcc_unreachable ();
3590    }
3591
3592  if (src != CONST0_RTX (GET_MODE (src)))
3593    {
3594      dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3595      dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3596    }
3597
3598  /* Must store high before low for degenerate case of aligned.  */
3599  emit_move_insn (memh, dsth);
3600  emit_move_insn (meml, dstl);
3601}
3602
3603/* The block move code tries to maximize speed by separating loads and
3604   stores at the expense of register pressure: we load all of the data
3605   before we store it back out.  There are two secondary effects worth
3606   mentioning, that this speeds copying to/from aligned and unaligned
3607   buffers, and that it makes the code significantly easier to write.  */
3608
3609#define MAX_MOVE_WORDS	8
3610
3611/* Load an integral number of consecutive unaligned quadwords.  */
3612
3613static void
3614alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3615				   HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3616{
3617  rtx const im8 = GEN_INT (-8);
3618  rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3619  rtx sreg, areg, tmp, smema;
3620  HOST_WIDE_INT i;
3621
3622  smema = XEXP (smem, 0);
3623  if (GET_CODE (smema) == LO_SUM)
3624    smema = force_reg (Pmode, smema);
3625
3626  /* Generate all the tmp registers we need.  */
3627  for (i = 0; i < words; ++i)
3628    {
3629      data_regs[i] = out_regs[i];
3630      ext_tmps[i] = gen_reg_rtx (DImode);
3631    }
3632  data_regs[words] = gen_reg_rtx (DImode);
3633
3634  if (ofs != 0)
3635    smem = adjust_address (smem, GET_MODE (smem), ofs);
3636
3637  /* Load up all of the source data.  */
3638  for (i = 0; i < words; ++i)
3639    {
3640      tmp = change_address (smem, DImode,
3641			    gen_rtx_AND (DImode,
3642					 plus_constant (DImode, smema, 8*i),
3643					 im8));
3644      set_mem_alias_set (tmp, 0);
3645      emit_move_insn (data_regs[i], tmp);
3646    }
3647
3648  tmp = change_address (smem, DImode,
3649			gen_rtx_AND (DImode,
3650				     plus_constant (DImode, smema,
3651						    8*words - 1),
3652				     im8));
3653  set_mem_alias_set (tmp, 0);
3654  emit_move_insn (data_regs[words], tmp);
3655
3656  /* Extract the half-word fragments.  Unfortunately DEC decided to make
3657     extxh with offset zero a noop instead of zeroing the register, so
3658     we must take care of that edge condition ourselves with cmov.  */
3659
3660  sreg = copy_addr_to_reg (smema);
3661  areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3662		       1, OPTAB_WIDEN);
3663  for (i = 0; i < words; ++i)
3664    {
3665      emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3666      emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3667      emit_insn (gen_rtx_SET (ext_tmps[i],
3668			      gen_rtx_IF_THEN_ELSE (DImode,
3669						    gen_rtx_EQ (DImode, areg,
3670								const0_rtx),
3671						    const0_rtx, ext_tmps[i])));
3672    }
3673
3674  /* Merge the half-words into whole words.  */
3675  for (i = 0; i < words; ++i)
3676    {
3677      out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3678				  ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3679    }
3680}
3681
3682/* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
3683   may be NULL to store zeros.  */
3684
3685static void
3686alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3687				    HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3688{
3689  rtx const im8 = GEN_INT (-8);
3690  rtx ins_tmps[MAX_MOVE_WORDS];
3691  rtx st_tmp_1, st_tmp_2, dreg;
3692  rtx st_addr_1, st_addr_2, dmema;
3693  HOST_WIDE_INT i;
3694
3695  dmema = XEXP (dmem, 0);
3696  if (GET_CODE (dmema) == LO_SUM)
3697    dmema = force_reg (Pmode, dmema);
3698
3699  /* Generate all the tmp registers we need.  */
3700  if (data_regs != NULL)
3701    for (i = 0; i < words; ++i)
3702      ins_tmps[i] = gen_reg_rtx(DImode);
3703  st_tmp_1 = gen_reg_rtx(DImode);
3704  st_tmp_2 = gen_reg_rtx(DImode);
3705
3706  if (ofs != 0)
3707    dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3708
3709  st_addr_2 = change_address (dmem, DImode,
3710			      gen_rtx_AND (DImode,
3711					   plus_constant (DImode, dmema,
3712							  words*8 - 1),
3713					   im8));
3714  set_mem_alias_set (st_addr_2, 0);
3715
3716  st_addr_1 = change_address (dmem, DImode,
3717			      gen_rtx_AND (DImode, dmema, im8));
3718  set_mem_alias_set (st_addr_1, 0);
3719
3720  /* Load up the destination end bits.  */
3721  emit_move_insn (st_tmp_2, st_addr_2);
3722  emit_move_insn (st_tmp_1, st_addr_1);
3723
3724  /* Shift the input data into place.  */
3725  dreg = copy_addr_to_reg (dmema);
3726  if (data_regs != NULL)
3727    {
3728      for (i = words-1; i >= 0; --i)
3729	{
3730	  emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3731	  emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3732	}
3733      for (i = words-1; i > 0; --i)
3734	{
3735	  ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3736					ins_tmps[i-1], ins_tmps[i-1], 1,
3737					OPTAB_WIDEN);
3738	}
3739    }
3740
3741  /* Split and merge the ends with the destination data.  */
3742  emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3743  emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3744
3745  if (data_regs != NULL)
3746    {
3747      st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3748			       st_tmp_2, 1, OPTAB_WIDEN);
3749      st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3750			       st_tmp_1, 1, OPTAB_WIDEN);
3751    }
3752
3753  /* Store it all.  */
3754  emit_move_insn (st_addr_2, st_tmp_2);
3755  for (i = words-1; i > 0; --i)
3756    {
3757      rtx tmp = change_address (dmem, DImode,
3758				gen_rtx_AND (DImode,
3759					     plus_constant (DImode,
3760							    dmema, i*8),
3761					     im8));
3762      set_mem_alias_set (tmp, 0);
3763      emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3764    }
3765  emit_move_insn (st_addr_1, st_tmp_1);
3766}
3767
3768
3769/* Expand string/block move operations.
3770
3771   operands[0] is the pointer to the destination.
3772   operands[1] is the pointer to the source.
3773   operands[2] is the number of bytes to move.
3774   operands[3] is the alignment.  */
3775
3776int
3777alpha_expand_block_move (rtx operands[])
3778{
3779  rtx bytes_rtx	= operands[2];
3780  rtx align_rtx = operands[3];
3781  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3782  HOST_WIDE_INT bytes = orig_bytes;
3783  HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3784  HOST_WIDE_INT dst_align = src_align;
3785  rtx orig_src = operands[1];
3786  rtx orig_dst = operands[0];
3787  rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3788  rtx tmp;
3789  unsigned int i, words, ofs, nregs = 0;
3790
3791  if (orig_bytes <= 0)
3792    return 1;
3793  else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3794    return 0;
3795
3796  /* Look for additional alignment information from recorded register info.  */
3797
3798  tmp = XEXP (orig_src, 0);
3799  if (REG_P (tmp))
3800    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3801  else if (GET_CODE (tmp) == PLUS
3802	   && REG_P (XEXP (tmp, 0))
3803	   && CONST_INT_P (XEXP (tmp, 1)))
3804    {
3805      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3806      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3807
3808      if (a > src_align)
3809	{
3810          if (a >= 64 && c % 8 == 0)
3811	    src_align = 64;
3812          else if (a >= 32 && c % 4 == 0)
3813	    src_align = 32;
3814          else if (a >= 16 && c % 2 == 0)
3815	    src_align = 16;
3816	}
3817    }
3818
3819  tmp = XEXP (orig_dst, 0);
3820  if (REG_P (tmp))
3821    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3822  else if (GET_CODE (tmp) == PLUS
3823	   && REG_P (XEXP (tmp, 0))
3824	   && CONST_INT_P (XEXP (tmp, 1)))
3825    {
3826      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3827      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3828
3829      if (a > dst_align)
3830	{
3831          if (a >= 64 && c % 8 == 0)
3832	    dst_align = 64;
3833          else if (a >= 32 && c % 4 == 0)
3834	    dst_align = 32;
3835          else if (a >= 16 && c % 2 == 0)
3836	    dst_align = 16;
3837	}
3838    }
3839
3840  ofs = 0;
3841  if (src_align >= 64 && bytes >= 8)
3842    {
3843      words = bytes / 8;
3844
3845      for (i = 0; i < words; ++i)
3846	data_regs[nregs + i] = gen_reg_rtx (DImode);
3847
3848      for (i = 0; i < words; ++i)
3849	emit_move_insn (data_regs[nregs + i],
3850			adjust_address (orig_src, DImode, ofs + i * 8));
3851
3852      nregs += words;
3853      bytes -= words * 8;
3854      ofs += words * 8;
3855    }
3856
3857  if (src_align >= 32 && bytes >= 4)
3858    {
3859      words = bytes / 4;
3860
3861      for (i = 0; i < words; ++i)
3862	data_regs[nregs + i] = gen_reg_rtx (SImode);
3863
3864      for (i = 0; i < words; ++i)
3865	emit_move_insn (data_regs[nregs + i],
3866			adjust_address (orig_src, SImode, ofs + i * 4));
3867
3868      nregs += words;
3869      bytes -= words * 4;
3870      ofs += words * 4;
3871    }
3872
3873  if (bytes >= 8)
3874    {
3875      words = bytes / 8;
3876
3877      for (i = 0; i < words+1; ++i)
3878	data_regs[nregs + i] = gen_reg_rtx (DImode);
3879
3880      alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3881					 words, ofs);
3882
3883      nregs += words;
3884      bytes -= words * 8;
3885      ofs += words * 8;
3886    }
3887
3888  if (! TARGET_BWX && bytes >= 4)
3889    {
3890      data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3891      alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3892      bytes -= 4;
3893      ofs += 4;
3894    }
3895
3896  if (bytes >= 2)
3897    {
3898      if (src_align >= 16)
3899	{
3900	  do {
3901	    data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3902	    emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3903	    bytes -= 2;
3904	    ofs += 2;
3905	  } while (bytes >= 2);
3906	}
3907      else if (! TARGET_BWX)
3908	{
3909	  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3910	  alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3911	  bytes -= 2;
3912	  ofs += 2;
3913	}
3914    }
3915
3916  while (bytes > 0)
3917    {
3918      data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3919      emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3920      bytes -= 1;
3921      ofs += 1;
3922    }
3923
3924  gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3925
3926  /* Now save it back out again.  */
3927
3928  i = 0, ofs = 0;
3929
3930  /* Write out the data in whatever chunks reading the source allowed.  */
3931  if (dst_align >= 64)
3932    {
3933      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3934	{
3935	  emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3936			  data_regs[i]);
3937	  ofs += 8;
3938	  i++;
3939	}
3940    }
3941
3942  if (dst_align >= 32)
3943    {
3944      /* If the source has remaining DImode regs, write them out in
3945	 two pieces.  */
3946      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3947	{
3948	  tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3949			      NULL_RTX, 1, OPTAB_WIDEN);
3950
3951	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3952			  gen_lowpart (SImode, data_regs[i]));
3953	  emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3954			  gen_lowpart (SImode, tmp));
3955	  ofs += 8;
3956	  i++;
3957	}
3958
3959      while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3960	{
3961	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3962			  data_regs[i]);
3963	  ofs += 4;
3964	  i++;
3965	}
3966    }
3967
3968  if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3969    {
3970      /* Write out a remaining block of words using unaligned methods.  */
3971
3972      for (words = 1; i + words < nregs; words++)
3973	if (GET_MODE (data_regs[i + words]) != DImode)
3974	  break;
3975
3976      if (words == 1)
3977	alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3978      else
3979        alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3980					    words, ofs);
3981
3982      i += words;
3983      ofs += words * 8;
3984    }
3985
3986  /* Due to the above, this won't be aligned.  */
3987  /* ??? If we have more than one of these, consider constructing full
3988     words in registers and using alpha_expand_unaligned_store_words.  */
3989  while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3990    {
3991      alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3992      ofs += 4;
3993      i++;
3994    }
3995
3996  if (dst_align >= 16)
3997    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3998      {
3999	emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
4000	i++;
4001	ofs += 2;
4002      }
4003  else
4004    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4005      {
4006	alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4007	i++;
4008	ofs += 2;
4009      }
4010
4011  /* The remainder must be byte copies.  */
4012  while (i < nregs)
4013    {
4014      gcc_assert (GET_MODE (data_regs[i]) == QImode);
4015      emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4016      i++;
4017      ofs += 1;
4018    }
4019
4020  return 1;
4021}
4022
4023int
4024alpha_expand_block_clear (rtx operands[])
4025{
4026  rtx bytes_rtx	= operands[1];
4027  rtx align_rtx = operands[3];
4028  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4029  HOST_WIDE_INT bytes = orig_bytes;
4030  HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4031  HOST_WIDE_INT alignofs = 0;
4032  rtx orig_dst = operands[0];
4033  rtx tmp;
4034  int i, words, ofs = 0;
4035
4036  if (orig_bytes <= 0)
4037    return 1;
4038  if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4039    return 0;
4040
4041  /* Look for stricter alignment.  */
4042  tmp = XEXP (orig_dst, 0);
4043  if (REG_P (tmp))
4044    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4045  else if (GET_CODE (tmp) == PLUS
4046	   && REG_P (XEXP (tmp, 0))
4047	   && CONST_INT_P (XEXP (tmp, 1)))
4048    {
4049      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4050      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4051
4052      if (a > align)
4053	{
4054          if (a >= 64)
4055	    align = a, alignofs = 8 - c % 8;
4056          else if (a >= 32)
4057	    align = a, alignofs = 4 - c % 4;
4058          else if (a >= 16)
4059	    align = a, alignofs = 2 - c % 2;
4060	}
4061    }
4062
4063  /* Handle an unaligned prefix first.  */
4064
4065  if (alignofs > 0)
4066    {
4067      /* Given that alignofs is bounded by align, the only time BWX could
4068	 generate three stores is for a 7 byte fill.  Prefer two individual
4069	 stores over a load/mask/store sequence.  */
4070      if ((!TARGET_BWX || alignofs == 7)
4071	       && align >= 32
4072	       && !(alignofs == 4 && bytes >= 4))
4073	{
4074	  machine_mode mode = (align >= 64 ? DImode : SImode);
4075	  int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4076	  rtx mem, tmp;
4077	  HOST_WIDE_INT mask;
4078
4079	  mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4080	  set_mem_alias_set (mem, 0);
4081
4082	  mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8));
4083	  if (bytes < alignofs)
4084	    {
4085	      mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8);
4086	      ofs += bytes;
4087	      bytes = 0;
4088	    }
4089	  else
4090	    {
4091	      bytes -= alignofs;
4092	      ofs += alignofs;
4093	    }
4094	  alignofs = 0;
4095
4096	  tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4097			      NULL_RTX, 1, OPTAB_WIDEN);
4098
4099	  emit_move_insn (mem, tmp);
4100	}
4101
4102      if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4103	{
4104	  emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4105	  bytes -= 1;
4106	  ofs += 1;
4107	  alignofs -= 1;
4108	}
4109      if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4110	{
4111	  emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4112	  bytes -= 2;
4113	  ofs += 2;
4114	  alignofs -= 2;
4115	}
4116      if (alignofs == 4 && bytes >= 4)
4117	{
4118	  emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4119	  bytes -= 4;
4120	  ofs += 4;
4121	  alignofs = 0;
4122	}
4123
4124      /* If we've not used the extra lead alignment information by now,
4125	 we won't be able to.  Downgrade align to match what's left over.  */
4126      if (alignofs > 0)
4127	{
4128	  alignofs = alignofs & -alignofs;
4129	  align = MIN (align, alignofs * BITS_PER_UNIT);
4130	}
4131    }
4132
4133  /* Handle a block of contiguous long-words.  */
4134
4135  if (align >= 64 && bytes >= 8)
4136    {
4137      words = bytes / 8;
4138
4139      for (i = 0; i < words; ++i)
4140	emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4141			const0_rtx);
4142
4143      bytes -= words * 8;
4144      ofs += words * 8;
4145    }
4146
4147  /* If the block is large and appropriately aligned, emit a single
4148     store followed by a sequence of stq_u insns.  */
4149
4150  if (align >= 32 && bytes > 16)
4151    {
4152      rtx orig_dsta;
4153
4154      emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4155      bytes -= 4;
4156      ofs += 4;
4157
4158      orig_dsta = XEXP (orig_dst, 0);
4159      if (GET_CODE (orig_dsta) == LO_SUM)
4160	orig_dsta = force_reg (Pmode, orig_dsta);
4161
4162      words = bytes / 8;
4163      for (i = 0; i < words; ++i)
4164	{
4165	  rtx mem
4166	    = change_address (orig_dst, DImode,
4167			      gen_rtx_AND (DImode,
4168					   plus_constant (DImode, orig_dsta,
4169							  ofs + i*8),
4170					   GEN_INT (-8)));
4171	  set_mem_alias_set (mem, 0);
4172	  emit_move_insn (mem, const0_rtx);
4173	}
4174
4175      /* Depending on the alignment, the first stq_u may have overlapped
4176	 with the initial stl, which means that the last stq_u didn't
4177	 write as much as it would appear.  Leave those questionable bytes
4178	 unaccounted for.  */
4179      bytes -= words * 8 - 4;
4180      ofs += words * 8 - 4;
4181    }
4182
4183  /* Handle a smaller block of aligned words.  */
4184
4185  if ((align >= 64 && bytes == 4)
4186      || (align == 32 && bytes >= 4))
4187    {
4188      words = bytes / 4;
4189
4190      for (i = 0; i < words; ++i)
4191	emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4192			const0_rtx);
4193
4194      bytes -= words * 4;
4195      ofs += words * 4;
4196    }
4197
4198  /* An unaligned block uses stq_u stores for as many as possible.  */
4199
4200  if (bytes >= 8)
4201    {
4202      words = bytes / 8;
4203
4204      alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4205
4206      bytes -= words * 8;
4207      ofs += words * 8;
4208    }
4209
4210  /* Next clean up any trailing pieces.  */
4211
4212  /* Count the number of bits in BYTES for which aligned stores could
4213     be emitted.  */
4214  words = 0;
4215  for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4216    if (bytes & i)
4217      words += 1;
4218
4219  /* If we have appropriate alignment (and it wouldn't take too many
4220     instructions otherwise), mask out the bytes we need.  */
4221  if (TARGET_BWX ? words > 2 : bytes > 0)
4222    {
4223      if (align >= 64)
4224	{
4225	  rtx mem, tmp;
4226	  HOST_WIDE_INT mask;
4227
4228	  mem = adjust_address (orig_dst, DImode, ofs);
4229	  set_mem_alias_set (mem, 0);
4230
4231	  mask = HOST_WIDE_INT_M1U << (bytes * 8);
4232
4233	  tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4234			      NULL_RTX, 1, OPTAB_WIDEN);
4235
4236	  emit_move_insn (mem, tmp);
4237	  return 1;
4238	}
4239      else if (align >= 32 && bytes < 4)
4240	{
4241	  rtx mem, tmp;
4242	  HOST_WIDE_INT mask;
4243
4244	  mem = adjust_address (orig_dst, SImode, ofs);
4245	  set_mem_alias_set (mem, 0);
4246
4247	  mask = HOST_WIDE_INT_M1U << (bytes * 8);
4248
4249	  tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4250			      NULL_RTX, 1, OPTAB_WIDEN);
4251
4252	  emit_move_insn (mem, tmp);
4253	  return 1;
4254	}
4255    }
4256
4257  if (!TARGET_BWX && bytes >= 4)
4258    {
4259      alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4260      bytes -= 4;
4261      ofs += 4;
4262    }
4263
4264  if (bytes >= 2)
4265    {
4266      if (align >= 16)
4267	{
4268	  do {
4269	    emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4270			    const0_rtx);
4271	    bytes -= 2;
4272	    ofs += 2;
4273	  } while (bytes >= 2);
4274	}
4275      else if (! TARGET_BWX)
4276	{
4277	  alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4278	  bytes -= 2;
4279	  ofs += 2;
4280	}
4281    }
4282
4283  while (bytes > 0)
4284    {
4285      emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4286      bytes -= 1;
4287      ofs += 1;
4288    }
4289
4290  return 1;
4291}
4292
4293/* Returns a mask so that zap(x, value) == x & mask.  */
4294
4295rtx
4296alpha_expand_zap_mask (HOST_WIDE_INT value)
4297{
4298  rtx result;
4299  int i;
4300  HOST_WIDE_INT mask = 0;
4301
4302  for (i = 7; i >= 0; --i)
4303    {
4304      mask <<= 8;
4305      if (!((value >> i) & 1))
4306	mask |= 0xff;
4307    }
4308
4309  result = gen_int_mode (mask, DImode);
4310  return result;
4311}
4312
4313void
4314alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4315				   machine_mode mode,
4316				   rtx op0, rtx op1, rtx op2)
4317{
4318  op0 = gen_lowpart (mode, op0);
4319
4320  if (op1 == const0_rtx)
4321    op1 = CONST0_RTX (mode);
4322  else
4323    op1 = gen_lowpart (mode, op1);
4324
4325  if (op2 == const0_rtx)
4326    op2 = CONST0_RTX (mode);
4327  else
4328    op2 = gen_lowpart (mode, op2);
4329
4330  emit_insn ((*gen) (op0, op1, op2));
4331}
4332
4333/* A subroutine of the atomic operation splitters.  Jump to LABEL if
4334   COND is true.  Mark the jump as unlikely to be taken.  */
4335
4336static void
4337emit_unlikely_jump (rtx cond, rtx label)
4338{
4339  rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4340  rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
4341  add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
4342}
4343
4344/* Subroutines of the atomic operation splitters.  Emit barriers
4345   as needed for the memory MODEL.  */
4346
4347static void
4348alpha_pre_atomic_barrier (enum memmodel model)
4349{
4350  if (need_atomic_barrier_p (model, true))
4351    emit_insn (gen_memory_barrier ());
4352}
4353
4354static void
4355alpha_post_atomic_barrier (enum memmodel model)
4356{
4357  if (need_atomic_barrier_p (model, false))
4358    emit_insn (gen_memory_barrier ());
4359}
4360
4361/* A subroutine of the atomic operation splitters.  Emit an insxl
4362   instruction in MODE.  */
4363
4364static rtx
4365emit_insxl (machine_mode mode, rtx op1, rtx op2)
4366{
4367  rtx ret = gen_reg_rtx (DImode);
4368  rtx (*fn) (rtx, rtx, rtx);
4369
4370  switch (mode)
4371    {
4372    case E_QImode:
4373      fn = gen_insbl;
4374      break;
4375    case E_HImode:
4376      fn = gen_inswl;
4377      break;
4378    case E_SImode:
4379      fn = gen_insll;
4380      break;
4381    case E_DImode:
4382      fn = gen_insql;
4383      break;
4384    default:
4385      gcc_unreachable ();
4386    }
4387
4388  op1 = force_reg (mode, op1);
4389  emit_insn (fn (ret, op1, op2));
4390
4391  return ret;
4392}
4393
4394/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
4395   to perform.  MEM is the memory on which to operate.  VAL is the second
4396   operand of the binary operator.  BEFORE and AFTER are optional locations to
4397   return the value of MEM either before of after the operation.  SCRATCH is
4398   a scratch register.  */
4399
4400void
4401alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4402		       rtx after, rtx scratch, enum memmodel model)
4403{
4404  machine_mode mode = GET_MODE (mem);
4405  rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4406
4407  alpha_pre_atomic_barrier (model);
4408
4409  label = gen_label_rtx ();
4410  emit_label (label);
4411  label = gen_rtx_LABEL_REF (DImode, label);
4412
4413  if (before == NULL)
4414    before = scratch;
4415  emit_insn (gen_load_locked (mode, before, mem));
4416
4417  if (code == NOT)
4418    {
4419      x = gen_rtx_AND (mode, before, val);
4420      emit_insn (gen_rtx_SET (val, x));
4421
4422      x = gen_rtx_NOT (mode, val);
4423    }
4424  else
4425    x = gen_rtx_fmt_ee (code, mode, before, val);
4426  if (after)
4427    emit_insn (gen_rtx_SET (after, copy_rtx (x)));
4428  emit_insn (gen_rtx_SET (scratch, x));
4429
4430  emit_insn (gen_store_conditional (mode, cond, mem, scratch));
4431
4432  x = gen_rtx_EQ (DImode, cond, const0_rtx);
4433  emit_unlikely_jump (x, label);
4434
4435  alpha_post_atomic_barrier (model);
4436}
4437
4438/* Expand a compare and swap operation.  */
4439
4440void
4441alpha_split_compare_and_swap (rtx operands[])
4442{
4443  rtx cond, retval, mem, oldval, newval;
4444  bool is_weak;
4445  enum memmodel mod_s, mod_f;
4446  machine_mode mode;
4447  rtx label1, label2, x;
4448
4449  cond = operands[0];
4450  retval = operands[1];
4451  mem = operands[2];
4452  oldval = operands[3];
4453  newval = operands[4];
4454  is_weak = (operands[5] != const0_rtx);
4455  mod_s = memmodel_from_int (INTVAL (operands[6]));
4456  mod_f = memmodel_from_int (INTVAL (operands[7]));
4457  mode = GET_MODE (mem);
4458
4459  alpha_pre_atomic_barrier (mod_s);
4460
4461  label1 = NULL_RTX;
4462  if (!is_weak)
4463    {
4464      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4465      emit_label (XEXP (label1, 0));
4466    }
4467  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4468
4469  emit_insn (gen_load_locked (mode, retval, mem));
4470
4471  x = gen_lowpart (DImode, retval);
4472  if (oldval == const0_rtx)
4473    {
4474      emit_move_insn (cond, const0_rtx);
4475      x = gen_rtx_NE (DImode, x, const0_rtx);
4476    }
4477  else
4478    {
4479      x = gen_rtx_EQ (DImode, x, oldval);
4480      emit_insn (gen_rtx_SET (cond, x));
4481      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4482    }
4483  emit_unlikely_jump (x, label2);
4484
4485  emit_move_insn (cond, newval);
4486  emit_insn (gen_store_conditional
4487	     (mode, cond, mem, gen_lowpart (mode, cond)));
4488
4489  if (!is_weak)
4490    {
4491      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4492      emit_unlikely_jump (x, label1);
4493    }
4494
4495  if (!is_mm_relaxed (mod_f))
4496    emit_label (XEXP (label2, 0));
4497
4498  alpha_post_atomic_barrier (mod_s);
4499
4500  if (is_mm_relaxed (mod_f))
4501    emit_label (XEXP (label2, 0));
4502}
4503
4504void
4505alpha_expand_compare_and_swap_12 (rtx operands[])
4506{
4507  rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4508  machine_mode mode;
4509  rtx addr, align, wdst;
4510
4511  cond = operands[0];
4512  dst = operands[1];
4513  mem = operands[2];
4514  oldval = operands[3];
4515  newval = operands[4];
4516  is_weak = operands[5];
4517  mod_s = operands[6];
4518  mod_f = operands[7];
4519  mode = GET_MODE (mem);
4520
4521  /* We forced the address into a register via mem_noofs_operand.  */
4522  addr = XEXP (mem, 0);
4523  gcc_assert (register_operand (addr, DImode));
4524
4525  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4526			       NULL_RTX, 1, OPTAB_DIRECT);
4527
4528  oldval = convert_modes (DImode, mode, oldval, 1);
4529
4530  if (newval != const0_rtx)
4531    newval = emit_insxl (mode, newval, addr);
4532
4533  wdst = gen_reg_rtx (DImode);
4534  emit_insn (gen_atomic_compare_and_swap_1
4535	     (mode, cond, wdst, mem, oldval, newval, align,
4536	      is_weak, mod_s, mod_f));
4537
4538  emit_move_insn (dst, gen_lowpart (mode, wdst));
4539}
4540
4541void
4542alpha_split_compare_and_swap_12 (rtx operands[])
4543{
4544  rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4545  machine_mode mode;
4546  bool is_weak;
4547  enum memmodel mod_s, mod_f;
4548  rtx label1, label2, mem, addr, width, mask, x;
4549
4550  cond = operands[0];
4551  dest = operands[1];
4552  orig_mem = operands[2];
4553  oldval = operands[3];
4554  newval = operands[4];
4555  align = operands[5];
4556  is_weak = (operands[6] != const0_rtx);
4557  mod_s = memmodel_from_int (INTVAL (operands[7]));
4558  mod_f = memmodel_from_int (INTVAL (operands[8]));
4559  scratch = operands[9];
4560  mode = GET_MODE (orig_mem);
4561  addr = XEXP (orig_mem, 0);
4562
4563  mem = gen_rtx_MEM (DImode, align);
4564  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4565  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4566    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4567
4568  alpha_pre_atomic_barrier (mod_s);
4569
4570  label1 = NULL_RTX;
4571  if (!is_weak)
4572    {
4573      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4574      emit_label (XEXP (label1, 0));
4575    }
4576  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4577
4578  emit_insn (gen_load_locked (DImode, scratch, mem));
4579
4580  width = GEN_INT (GET_MODE_BITSIZE (mode));
4581  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4582  emit_insn (gen_extxl (dest, scratch, width, addr));
4583
4584  if (oldval == const0_rtx)
4585    {
4586      emit_move_insn (cond, const0_rtx);
4587      x = gen_rtx_NE (DImode, dest, const0_rtx);
4588    }
4589  else
4590    {
4591      x = gen_rtx_EQ (DImode, dest, oldval);
4592      emit_insn (gen_rtx_SET (cond, x));
4593      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4594    }
4595  emit_unlikely_jump (x, label2);
4596
4597  emit_insn (gen_mskxl (cond, scratch, mask, addr));
4598
4599  if (newval != const0_rtx)
4600    emit_insn (gen_iordi3 (cond, cond, newval));
4601
4602  emit_insn (gen_store_conditional (DImode, cond, mem, cond));
4603
4604  if (!is_weak)
4605    {
4606      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4607      emit_unlikely_jump (x, label1);
4608    }
4609
4610  if (!is_mm_relaxed (mod_f))
4611    emit_label (XEXP (label2, 0));
4612
4613  alpha_post_atomic_barrier (mod_s);
4614
4615  if (is_mm_relaxed (mod_f))
4616    emit_label (XEXP (label2, 0));
4617}
4618
4619/* Expand an atomic exchange operation.  */
4620
4621void
4622alpha_split_atomic_exchange (rtx operands[])
4623{
4624  rtx retval, mem, val, scratch;
4625  enum memmodel model;
4626  machine_mode mode;
4627  rtx label, x, cond;
4628
4629  retval = operands[0];
4630  mem = operands[1];
4631  val = operands[2];
4632  model = (enum memmodel) INTVAL (operands[3]);
4633  scratch = operands[4];
4634  mode = GET_MODE (mem);
4635  cond = gen_lowpart (DImode, scratch);
4636
4637  alpha_pre_atomic_barrier (model);
4638
4639  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4640  emit_label (XEXP (label, 0));
4641
4642  emit_insn (gen_load_locked (mode, retval, mem));
4643  emit_move_insn (scratch, val);
4644  emit_insn (gen_store_conditional (mode, cond, mem, scratch));
4645
4646  x = gen_rtx_EQ (DImode, cond, const0_rtx);
4647  emit_unlikely_jump (x, label);
4648
4649  alpha_post_atomic_barrier (model);
4650}
4651
4652void
4653alpha_expand_atomic_exchange_12 (rtx operands[])
4654{
4655  rtx dst, mem, val, model;
4656  machine_mode mode;
4657  rtx addr, align, wdst;
4658
4659  dst = operands[0];
4660  mem = operands[1];
4661  val = operands[2];
4662  model = operands[3];
4663  mode = GET_MODE (mem);
4664
4665  /* We forced the address into a register via mem_noofs_operand.  */
4666  addr = XEXP (mem, 0);
4667  gcc_assert (register_operand (addr, DImode));
4668
4669  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4670			       NULL_RTX, 1, OPTAB_DIRECT);
4671
4672  /* Insert val into the correct byte location within the word.  */
4673  if (val != const0_rtx)
4674    val = emit_insxl (mode, val, addr);
4675
4676  wdst = gen_reg_rtx (DImode);
4677  emit_insn (gen_atomic_exchange_1 (mode, wdst, mem, val, align, model));
4678
4679  emit_move_insn (dst, gen_lowpart (mode, wdst));
4680}
4681
4682void
4683alpha_split_atomic_exchange_12 (rtx operands[])
4684{
4685  rtx dest, orig_mem, addr, val, align, scratch;
4686  rtx label, mem, width, mask, x;
4687  machine_mode mode;
4688  enum memmodel model;
4689
4690  dest = operands[0];
4691  orig_mem = operands[1];
4692  val = operands[2];
4693  align = operands[3];
4694  model = (enum memmodel) INTVAL (operands[4]);
4695  scratch = operands[5];
4696  mode = GET_MODE (orig_mem);
4697  addr = XEXP (orig_mem, 0);
4698
4699  mem = gen_rtx_MEM (DImode, align);
4700  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4701  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4702    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4703
4704  alpha_pre_atomic_barrier (model);
4705
4706  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4707  emit_label (XEXP (label, 0));
4708
4709  emit_insn (gen_load_locked (DImode, scratch, mem));
4710
4711  width = GEN_INT (GET_MODE_BITSIZE (mode));
4712  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4713  emit_insn (gen_extxl (dest, scratch, width, addr));
4714  emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4715  if (val != const0_rtx)
4716    emit_insn (gen_iordi3 (scratch, scratch, val));
4717
4718  emit_insn (gen_store_conditional (DImode, scratch, mem, scratch));
4719
4720  x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4721  emit_unlikely_jump (x, label);
4722
4723  alpha_post_atomic_barrier (model);
4724}
4725
4726/* Adjust the cost of a scheduling dependency.  Return the new cost of
4727   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4728
4729static int
4730alpha_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4731		   unsigned int)
4732{
4733  enum attr_type dep_insn_type;
4734
4735  /* If the dependence is an anti-dependence, there is no cost.  For an
4736     output dependence, there is sometimes a cost, but it doesn't seem
4737     worth handling those few cases.  */
4738  if (dep_type != 0)
4739    return cost;
4740
4741  /* If we can't recognize the insns, we can't really do anything.  */
4742  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4743    return cost;
4744
4745  dep_insn_type = get_attr_type (dep_insn);
4746
4747  /* Bring in the user-defined memory latency.  */
4748  if (dep_insn_type == TYPE_ILD
4749      || dep_insn_type == TYPE_FLD
4750      || dep_insn_type == TYPE_LDSYM)
4751    cost += alpha_memory_latency-1;
4752
4753  /* Everything else handled in DFA bypasses now.  */
4754
4755  return cost;
4756}
4757
4758/* The number of instructions that can be issued per cycle.  */
4759
4760static int
4761alpha_issue_rate (void)
4762{
4763  return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4764}
4765
4766/* How many alternative schedules to try.  This should be as wide as the
4767   scheduling freedom in the DFA, but no wider.  Making this value too
4768   large results extra work for the scheduler.
4769
4770   For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4771   alternative schedules.  For EV5, we can choose between E0/E1 and
4772   FA/FM.  For EV6, an arithmetic insn can be issued to U0/U1/L0/L1.  */
4773
4774static int
4775alpha_multipass_dfa_lookahead (void)
4776{
4777  return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4778}
4779
4780/* Machine-specific function data.  */
4781
4782struct GTY(()) alpha_links;
4783
4784struct GTY(()) machine_function
4785{
4786  unsigned HOST_WIDE_INT sa_mask;
4787  HOST_WIDE_INT sa_size;
4788  HOST_WIDE_INT frame_size;
4789
4790  /* For flag_reorder_blocks_and_partition.  */
4791  rtx gp_save_rtx;
4792
4793  /* For VMS condition handlers.  */
4794  bool uses_condition_handler;
4795
4796  /* Linkage entries.  */
4797  hash_map<nofree_string_hash, alpha_links *> *links;
4798};
4799
4800/* How to allocate a 'struct machine_function'.  */
4801
4802static struct machine_function *
4803alpha_init_machine_status (void)
4804{
4805  return ggc_cleared_alloc<machine_function> ();
4806}
4807
4808/* Support for frame based VMS condition handlers.  */
4809
4810/* A VMS condition handler may be established for a function with a call to
4811   __builtin_establish_vms_condition_handler, and cancelled with a call to
4812   __builtin_revert_vms_condition_handler.
4813
4814   The VMS Condition Handling Facility knows about the existence of a handler
4815   from the procedure descriptor .handler field.  As the VMS native compilers,
4816   we store the user specified handler's address at a fixed location in the
4817   stack frame and point the procedure descriptor at a common wrapper which
4818   fetches the real handler's address and issues an indirect call.
4819
4820   The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4821
4822   We force the procedure kind to PT_STACK, and the fixed frame location is
4823   fp+8, just before the register save area. We use the handler_data field in
4824   the procedure descriptor to state the fp offset at which the installed
4825   handler address can be found.  */
4826
4827#define VMS_COND_HANDLER_FP_OFFSET 8
4828
4829/* Expand code to store the currently installed user VMS condition handler
4830   into TARGET and install HANDLER as the new condition handler.  */
4831
4832void
4833alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4834{
4835  rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4836					    VMS_COND_HANDLER_FP_OFFSET);
4837
4838  rtx handler_slot
4839    = gen_rtx_MEM (DImode, handler_slot_address);
4840
4841  emit_move_insn (target, handler_slot);
4842  emit_move_insn (handler_slot, handler);
4843
4844  /* Notify the start/prologue/epilogue emitters that the condition handler
4845     slot is needed.  In addition to reserving the slot space, this will force
4846     the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4847     use above is correct.  */
4848  cfun->machine->uses_condition_handler = true;
4849}
4850
4851/* Expand code to store the current VMS condition handler into TARGET and
4852   nullify it.  */
4853
4854void
4855alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4856{
4857  /* We implement this by establishing a null condition handler, with the tiny
4858     side effect of setting uses_condition_handler.  This is a little bit
4859     pessimistic if no actual builtin_establish call is ever issued, which is
4860     not a real problem and expected never to happen anyway.  */
4861
4862  alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4863}
4864
4865/* Functions to save and restore alpha_return_addr_rtx.  */
4866
4867/* Start the ball rolling with RETURN_ADDR_RTX.  */
4868
4869rtx
4870alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4871{
4872  if (count != 0)
4873    return const0_rtx;
4874
4875  return get_hard_reg_initial_val (Pmode, REG_RA);
4876}
4877
4878/* Return or create a memory slot containing the gp value for the current
4879   function.  Needed only if TARGET_LD_BUGGY_LDGP.  */
4880
4881rtx
4882alpha_gp_save_rtx (void)
4883{
4884  rtx_insn *seq;
4885  rtx m = cfun->machine->gp_save_rtx;
4886
4887  if (m == NULL)
4888    {
4889      start_sequence ();
4890
4891      m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4892      m = validize_mem (m);
4893      emit_move_insn (m, pic_offset_table_rtx);
4894
4895      seq = get_insns ();
4896      end_sequence ();
4897
4898      /* We used to simply emit the sequence after entry_of_function.
4899	 However this breaks the CFG if the first instruction in the
4900	 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4901	 label.  Emit the sequence properly on the edge.  We are only
4902	 invoked from dw2_build_landing_pads and finish_eh_generation
4903	 will call commit_edge_insertions thanks to a kludge.  */
4904      insert_insn_on_edge (seq,
4905			   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
4906
4907      cfun->machine->gp_save_rtx = m;
4908    }
4909
4910  return m;
4911}
4912
4913static void
4914alpha_instantiate_decls (void)
4915{
4916  if (cfun->machine->gp_save_rtx != NULL_RTX)
4917    instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4918}
4919
4920static int
4921alpha_ra_ever_killed (void)
4922{
4923  rtx_insn *top;
4924
4925  if (!has_hard_reg_initial_val (Pmode, REG_RA))
4926    return (int)df_regs_ever_live_p (REG_RA);
4927
4928  push_topmost_sequence ();
4929  top = get_insns ();
4930  pop_topmost_sequence ();
4931
4932  return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
4933}
4934
4935
4936/* Return the trap mode suffix applicable to the current
4937   instruction, or NULL.  */
4938
4939static const char *
4940get_trap_mode_suffix (void)
4941{
4942  enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4943
4944  switch (s)
4945    {
4946    case TRAP_SUFFIX_NONE:
4947      return NULL;
4948
4949    case TRAP_SUFFIX_SU:
4950      if (alpha_fptm >= ALPHA_FPTM_SU)
4951	return "su";
4952      return NULL;
4953
4954    case TRAP_SUFFIX_SUI:
4955      if (alpha_fptm >= ALPHA_FPTM_SUI)
4956	return "sui";
4957      return NULL;
4958
4959    case TRAP_SUFFIX_V_SV:
4960      switch (alpha_fptm)
4961	{
4962	case ALPHA_FPTM_N:
4963	  return NULL;
4964	case ALPHA_FPTM_U:
4965	  return "v";
4966	case ALPHA_FPTM_SU:
4967	case ALPHA_FPTM_SUI:
4968	  return "sv";
4969	default:
4970	  gcc_unreachable ();
4971	}
4972
4973    case TRAP_SUFFIX_V_SV_SVI:
4974      switch (alpha_fptm)
4975	{
4976	case ALPHA_FPTM_N:
4977	  return NULL;
4978	case ALPHA_FPTM_U:
4979	  return "v";
4980	case ALPHA_FPTM_SU:
4981	  return "sv";
4982	case ALPHA_FPTM_SUI:
4983	  return "svi";
4984	default:
4985	  gcc_unreachable ();
4986	}
4987      break;
4988
4989    case TRAP_SUFFIX_U_SU_SUI:
4990      switch (alpha_fptm)
4991	{
4992	case ALPHA_FPTM_N:
4993	  return NULL;
4994	case ALPHA_FPTM_U:
4995	  return "u";
4996	case ALPHA_FPTM_SU:
4997	  return "su";
4998	case ALPHA_FPTM_SUI:
4999	  return "sui";
5000	default:
5001	  gcc_unreachable ();
5002	}
5003      break;
5004
5005    default:
5006      gcc_unreachable ();
5007    }
5008  gcc_unreachable ();
5009}
5010
5011/* Return the rounding mode suffix applicable to the current
5012   instruction, or NULL.  */
5013
5014static const char *
5015get_round_mode_suffix (void)
5016{
5017  enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
5018
5019  switch (s)
5020    {
5021    case ROUND_SUFFIX_NONE:
5022      return NULL;
5023    case ROUND_SUFFIX_NORMAL:
5024      switch (alpha_fprm)
5025	{
5026	case ALPHA_FPRM_NORM:
5027	  return NULL;
5028	case ALPHA_FPRM_MINF:
5029	  return "m";
5030	case ALPHA_FPRM_CHOP:
5031	  return "c";
5032	case ALPHA_FPRM_DYN:
5033	  return "d";
5034	default:
5035	  gcc_unreachable ();
5036	}
5037      break;
5038
5039    case ROUND_SUFFIX_C:
5040      return "c";
5041
5042    default:
5043      gcc_unreachable ();
5044    }
5045  gcc_unreachable ();
5046}
5047
5048/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
5049
5050static bool
5051alpha_print_operand_punct_valid_p (unsigned char code)
5052{
5053  return (code == '/' || code == ',' || code == '-' || code == '~'
5054	  || code == '#' || code == '*' || code == '&');
5055}
5056
5057/* Implement TARGET_PRINT_OPERAND.  The alpha-specific
5058   operand codes are documented below.  */
5059
5060static void
5061alpha_print_operand (FILE *file, rtx x, int code)
5062{
5063  int i;
5064
5065  switch (code)
5066    {
5067    case '~':
5068      /* Print the assembler name of the current function.  */
5069      assemble_name (file, alpha_fnname);
5070      break;
5071
5072    case '&':
5073      if (const char *name = get_some_local_dynamic_name ())
5074	assemble_name (file, name);
5075      else
5076	output_operand_lossage ("'%%&' used without any "
5077				"local dynamic TLS references");
5078      break;
5079
5080    case '/':
5081      /* Generates the instruction suffix.  The TRAP_SUFFIX and ROUND_SUFFIX
5082	 attributes are examined to determine what is appropriate.  */
5083      {
5084	const char *trap = get_trap_mode_suffix ();
5085	const char *round = get_round_mode_suffix ();
5086
5087	if (trap || round)
5088	  fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5089	break;
5090      }
5091
5092    case ',':
5093      /* Generates single precision suffix for floating point
5094	 instructions (s for IEEE, f for VAX).  */
5095      fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5096      break;
5097
5098    case '-':
5099      /* Generates double precision suffix for floating point
5100	 instructions (t for IEEE, g for VAX).  */
5101      fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5102      break;
5103
5104    case '#':
5105      if (alpha_this_literal_sequence_number == 0)
5106	alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5107      fprintf (file, "%d", alpha_this_literal_sequence_number);
5108      break;
5109
5110    case '*':
5111      if (alpha_this_gpdisp_sequence_number == 0)
5112	alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5113      fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5114      break;
5115
5116    case 'J':
5117      {
5118	const char *lituse;
5119
5120        if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5121	  {
5122	    x = XVECEXP (x, 0, 0);
5123	    lituse = "lituse_tlsgd";
5124	  }
5125	else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5126	  {
5127	    x = XVECEXP (x, 0, 0);
5128	    lituse = "lituse_tlsldm";
5129	  }
5130	else if (CONST_INT_P (x))
5131	  lituse = "lituse_jsr";
5132	else
5133	  {
5134	    output_operand_lossage ("invalid %%J value");
5135	    break;
5136	  }
5137
5138	if (x != const0_rtx)
5139	  fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5140      }
5141      break;
5142
5143    case 'j':
5144      {
5145	const char *lituse;
5146
5147#ifdef HAVE_AS_JSRDIRECT_RELOCS
5148	lituse = "lituse_jsrdirect";
5149#else
5150	lituse = "lituse_jsr";
5151#endif
5152
5153	gcc_assert (INTVAL (x) != 0);
5154	fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5155      }
5156      break;
5157    case 'r':
5158      /* If this operand is the constant zero, write it as "$31".  */
5159      if (REG_P (x))
5160	fprintf (file, "%s", reg_names[REGNO (x)]);
5161      else if (x == CONST0_RTX (GET_MODE (x)))
5162	fprintf (file, "$31");
5163      else
5164	output_operand_lossage ("invalid %%r value");
5165      break;
5166
5167    case 'R':
5168      /* Similar, but for floating-point.  */
5169      if (REG_P (x))
5170	fprintf (file, "%s", reg_names[REGNO (x)]);
5171      else if (x == CONST0_RTX (GET_MODE (x)))
5172	fprintf (file, "$f31");
5173      else
5174	output_operand_lossage ("invalid %%R value");
5175      break;
5176
5177    case 'N':
5178      /* Write the 1's complement of a constant.  */
5179      if (!CONST_INT_P (x))
5180	output_operand_lossage ("invalid %%N value");
5181
5182      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5183      break;
5184
5185    case 'P':
5186      /* Write 1 << C, for a constant C.  */
5187      if (!CONST_INT_P (x))
5188	output_operand_lossage ("invalid %%P value");
5189
5190      fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x));
5191      break;
5192
5193    case 'h':
5194      /* Write the high-order 16 bits of a constant, sign-extended.  */
5195      if (!CONST_INT_P (x))
5196	output_operand_lossage ("invalid %%h value");
5197
5198      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5199      break;
5200
5201    case 'L':
5202      /* Write the low-order 16 bits of a constant, sign-extended.  */
5203      if (!CONST_INT_P (x))
5204	output_operand_lossage ("invalid %%L value");
5205
5206      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5207	       (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5208      break;
5209
5210    case 'm':
5211      /* Write mask for ZAP insn.  */
5212      if (CONST_INT_P (x))
5213	{
5214	  HOST_WIDE_INT mask = 0, value = INTVAL (x);
5215
5216	  for (i = 0; i < 8; i++, value >>= 8)
5217	    if (value & 0xff)
5218	      mask |= (1 << i);
5219
5220	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5221	}
5222      else
5223	output_operand_lossage ("invalid %%m value");
5224      break;
5225
5226    case 'M':
5227      /* 'b', 'w', 'l', or 'q' as the value of the constant.  */
5228      if (!mode_width_operand (x, VOIDmode))
5229	output_operand_lossage ("invalid %%M value");
5230
5231      fprintf (file, "%s",
5232	       (INTVAL (x) == 8 ? "b"
5233		: INTVAL (x) == 16 ? "w"
5234		: INTVAL (x) == 32 ? "l"
5235		: "q"));
5236      break;
5237
5238    case 'U':
5239      /* Similar, except do it from the mask.  */
5240      if (CONST_INT_P (x))
5241	{
5242	  HOST_WIDE_INT value = INTVAL (x);
5243
5244	  if (value == 0xff)
5245	    {
5246	      fputc ('b', file);
5247	      break;
5248	    }
5249	  if (value == 0xffff)
5250	    {
5251	      fputc ('w', file);
5252	      break;
5253	    }
5254	  if (value == 0xffffffff)
5255	    {
5256	      fputc ('l', file);
5257	      break;
5258	    }
5259	  if (value == -1)
5260	    {
5261	      fputc ('q', file);
5262	      break;
5263	    }
5264	}
5265
5266      output_operand_lossage ("invalid %%U value");
5267      break;
5268
5269    case 's':
5270      /* Write the constant value divided by 8.  */
5271      if (!CONST_INT_P (x)
5272	  || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5273	  || (INTVAL (x) & 7) != 0)
5274	output_operand_lossage ("invalid %%s value");
5275
5276      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5277      break;
5278
5279    case 'C': case 'D': case 'c': case 'd':
5280      /* Write out comparison name.  */
5281      {
5282	enum rtx_code c = GET_CODE (x);
5283
5284        if (!COMPARISON_P (x))
5285	  output_operand_lossage ("invalid %%C value");
5286
5287	else if (code == 'D')
5288	  c = reverse_condition (c);
5289	else if (code == 'c')
5290	  c = swap_condition (c);
5291	else if (code == 'd')
5292	  c = swap_condition (reverse_condition (c));
5293
5294        if (c == LEU)
5295	  fprintf (file, "ule");
5296        else if (c == LTU)
5297	  fprintf (file, "ult");
5298	else if (c == UNORDERED)
5299	  fprintf (file, "un");
5300        else
5301	  fprintf (file, "%s", GET_RTX_NAME (c));
5302      }
5303      break;
5304
5305    case 'E':
5306      /* Write the divide or modulus operator.  */
5307      switch (GET_CODE (x))
5308	{
5309	case DIV:
5310	  fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5311	  break;
5312	case UDIV:
5313	  fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5314	  break;
5315	case MOD:
5316	  fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5317	  break;
5318	case UMOD:
5319	  fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5320	  break;
5321	default:
5322	  output_operand_lossage ("invalid %%E value");
5323	  break;
5324	}
5325      break;
5326
5327    case 'A':
5328      /* Write "_u" for unaligned access.  */
5329      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5330	fprintf (file, "_u");
5331      break;
5332
5333    case 0:
5334      if (REG_P (x))
5335	fprintf (file, "%s", reg_names[REGNO (x)]);
5336      else if (MEM_P (x))
5337	output_address (GET_MODE (x), XEXP (x, 0));
5338      else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5339	{
5340	  switch (XINT (XEXP (x, 0), 1))
5341	    {
5342	    case UNSPEC_DTPREL:
5343	    case UNSPEC_TPREL:
5344	      output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5345	      break;
5346	    default:
5347	      output_operand_lossage ("unknown relocation unspec");
5348	      break;
5349	    }
5350	}
5351      else
5352	output_addr_const (file, x);
5353      break;
5354
5355    default:
5356      output_operand_lossage ("invalid %%xn code");
5357    }
5358}
5359
5360/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
5361
5362static void
5363alpha_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
5364{
5365  int basereg = 31;
5366  HOST_WIDE_INT offset = 0;
5367
5368  if (GET_CODE (addr) == AND)
5369    addr = XEXP (addr, 0);
5370
5371  if (GET_CODE (addr) == PLUS
5372      && CONST_INT_P (XEXP (addr, 1)))
5373    {
5374      offset = INTVAL (XEXP (addr, 1));
5375      addr = XEXP (addr, 0);
5376    }
5377
5378  if (GET_CODE (addr) == LO_SUM)
5379    {
5380      const char *reloc16, *reloclo;
5381      rtx op1 = XEXP (addr, 1);
5382
5383      if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5384	{
5385	  op1 = XEXP (op1, 0);
5386	  switch (XINT (op1, 1))
5387	    {
5388	    case UNSPEC_DTPREL:
5389	      reloc16 = NULL;
5390	      reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5391	      break;
5392	    case UNSPEC_TPREL:
5393	      reloc16 = NULL;
5394	      reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5395	      break;
5396	    default:
5397	      output_operand_lossage ("unknown relocation unspec");
5398	      return;
5399	    }
5400
5401	  output_addr_const (file, XVECEXP (op1, 0, 0));
5402	}
5403      else
5404	{
5405	  reloc16 = "gprel";
5406	  reloclo = "gprellow";
5407	  output_addr_const (file, op1);
5408	}
5409
5410      if (offset)
5411	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5412
5413      addr = XEXP (addr, 0);
5414      switch (GET_CODE (addr))
5415	{
5416	case REG:
5417	  basereg = REGNO (addr);
5418	  break;
5419
5420	case SUBREG:
5421	  basereg = subreg_regno (addr);
5422	  break;
5423
5424	default:
5425	  gcc_unreachable ();
5426	}
5427
5428      fprintf (file, "($%d)\t\t!%s", basereg,
5429	       (basereg == 29 ? reloc16 : reloclo));
5430      return;
5431    }
5432
5433  switch (GET_CODE (addr))
5434    {
5435    case REG:
5436      basereg = REGNO (addr);
5437      break;
5438
5439    case SUBREG:
5440      basereg = subreg_regno (addr);
5441      break;
5442
5443    case CONST_INT:
5444      offset = INTVAL (addr);
5445      break;
5446
5447    case SYMBOL_REF:
5448      gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5449      fprintf (file, "%s", XSTR (addr, 0));
5450      return;
5451
5452    case CONST:
5453      gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5454      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5455		  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5456      fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5457	       XSTR (XEXP (XEXP (addr, 0), 0), 0),
5458	       INTVAL (XEXP (XEXP (addr, 0), 1)));
5459      return;
5460
5461    default:
5462      output_operand_lossage ("invalid operand address");
5463      return;
5464    }
5465
5466  fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5467}
5468
5469/* Emit RTL insns to initialize the variable parts of a trampoline at
5470   M_TRAMP.  FNDECL is target function's decl.  CHAIN_VALUE is an rtx
5471   for the static chain value for the function.  */
5472
5473static void
5474alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5475{
5476  rtx fnaddr, mem, word1, word2;
5477
5478  fnaddr = XEXP (DECL_RTL (fndecl), 0);
5479
5480#ifdef POINTERS_EXTEND_UNSIGNED
5481  fnaddr = convert_memory_address (Pmode, fnaddr);
5482  chain_value = convert_memory_address (Pmode, chain_value);
5483#endif
5484
5485  if (TARGET_ABI_OPEN_VMS)
5486    {
5487      const char *fnname;
5488      char *trname;
5489
5490      /* Construct the name of the trampoline entry point.  */
5491      fnname = XSTR (fnaddr, 0);
5492      trname = (char *) alloca (strlen (fnname) + 5);
5493      strcpy (trname, fnname);
5494      strcat (trname, "..tr");
5495      fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5496      word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5497
5498      /* Trampoline (or "bounded") procedure descriptor is constructed from
5499	 the function's procedure descriptor with certain fields zeroed IAW
5500	 the VMS calling standard. This is stored in the first quadword.  */
5501      word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5502      word1 = expand_and (DImode, word1,
5503			  GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5504			  NULL);
5505    }
5506  else
5507    {
5508      /* These 4 instructions are:
5509	    ldq $1,24($27)
5510	    ldq $27,16($27)
5511	    jmp $31,($27),0
5512	    nop
5513	 We don't bother setting the HINT field of the jump; the nop
5514	 is merely there for padding.  */
5515      word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5516      word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5517    }
5518
5519  /* Store the first two words, as computed above.  */
5520  mem = adjust_address (m_tramp, DImode, 0);
5521  emit_move_insn (mem, word1);
5522  mem = adjust_address (m_tramp, DImode, 8);
5523  emit_move_insn (mem, word2);
5524
5525  /* Store function address and static chain value.  */
5526  mem = adjust_address (m_tramp, Pmode, 16);
5527  emit_move_insn (mem, fnaddr);
5528  mem = adjust_address (m_tramp, Pmode, 24);
5529  emit_move_insn (mem, chain_value);
5530
5531  if (TARGET_ABI_OSF)
5532    {
5533      emit_insn (gen_imb ());
5534#ifdef HAVE_ENABLE_EXECUTE_STACK
5535      emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5536			 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
5537#endif
5538    }
5539}
5540
5541/* Determine where to put an argument to a function.
5542   Value is zero to push the argument on the stack,
5543   or a hard register in which to store the argument.
5544
5545   CUM is a variable of type CUMULATIVE_ARGS which gives info about
5546    the preceding args and about the function being called.
5547   ARG is a description of the argument.
5548
5549   On Alpha the first 6 words of args are normally in registers
5550   and the rest are pushed.  */
5551
5552static rtx
5553alpha_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5554{
5555  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5556  int basereg;
5557  int num_args;
5558
5559  /* Don't get confused and pass small structures in FP registers.  */
5560  if (arg.aggregate_type_p ())
5561    basereg = 16;
5562  else
5563    {
5564      /* With alpha_split_complex_arg, we shouldn't see any raw complex
5565	 values here.  */
5566      gcc_checking_assert (!COMPLEX_MODE_P (arg.mode));
5567
5568      /* Set up defaults for FP operands passed in FP registers, and
5569	 integral operands passed in integer registers.  */
5570      if (TARGET_FPREGS && GET_MODE_CLASS (arg.mode) == MODE_FLOAT)
5571	basereg = 32 + 16;
5572      else
5573	basereg = 16;
5574    }
5575
5576  /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5577     the two platforms, so we can't avoid conditional compilation.  */
5578#if TARGET_ABI_OPEN_VMS
5579    {
5580      if (arg.end_marker_p ())
5581	return alpha_arg_info_reg_val (*cum);
5582
5583      num_args = cum->num_args;
5584      if (num_args >= 6
5585	  || targetm.calls.must_pass_in_stack (arg))
5586	return NULL_RTX;
5587    }
5588#elif TARGET_ABI_OSF
5589    {
5590      if (*cum >= 6)
5591	return NULL_RTX;
5592      num_args = *cum;
5593
5594      if (arg.end_marker_p ())
5595	basereg = 16;
5596      else if (targetm.calls.must_pass_in_stack (arg))
5597	return NULL_RTX;
5598    }
5599#else
5600#error Unhandled ABI
5601#endif
5602
5603  return gen_rtx_REG (arg.mode, num_args + basereg);
5604}
5605
5606/* Update the data in CUM to advance over argument ARG.  */
5607
5608static void
5609alpha_function_arg_advance (cumulative_args_t cum_v,
5610			    const function_arg_info &arg)
5611{
5612  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5613  bool onstack = targetm.calls.must_pass_in_stack (arg);
5614  int increment = onstack ? 6 : ALPHA_ARG_SIZE (arg.mode, arg.type);
5615
5616#if TARGET_ABI_OSF
5617  *cum += increment;
5618#else
5619  if (!onstack && cum->num_args < 6)
5620    cum->atypes[cum->num_args] = alpha_arg_type (arg.mode);
5621  cum->num_args += increment;
5622#endif
5623}
5624
5625static int
5626alpha_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
5627{
5628  int words = 0;
5629  CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5630
5631#if TARGET_ABI_OPEN_VMS
5632  if (cum->num_args < 6
5633      && 6 < cum->num_args + ALPHA_ARG_SIZE (arg.mode, arg.type))
5634    words = 6 - cum->num_args;
5635#elif TARGET_ABI_OSF
5636  if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (arg.mode, arg.type))
5637    words = 6 - *cum;
5638#else
5639#error Unhandled ABI
5640#endif
5641
5642  return words * UNITS_PER_WORD;
5643}
5644
5645
5646/* Return true if TYPE must be returned in memory, instead of in registers.  */
5647
5648static bool
5649alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5650{
5651  machine_mode mode = VOIDmode;
5652  int size;
5653
5654  if (type)
5655    {
5656      mode = TYPE_MODE (type);
5657
5658      /* All aggregates are returned in memory, except on OpenVMS where
5659	 records that fit 64 bits should be returned by immediate value
5660	 as required by section 3.8.7.1 of the OpenVMS Calling Standard.  */
5661      if (TARGET_ABI_OPEN_VMS
5662	  && TREE_CODE (type) != ARRAY_TYPE
5663	  && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5664	return false;
5665
5666      if (AGGREGATE_TYPE_P (type))
5667	return true;
5668    }
5669
5670  size = GET_MODE_SIZE (mode);
5671  switch (GET_MODE_CLASS (mode))
5672    {
5673    case MODE_VECTOR_FLOAT:
5674      /* Pass all float vectors in memory, like an aggregate.  */
5675      return true;
5676
5677    case MODE_COMPLEX_FLOAT:
5678      /* We judge complex floats on the size of their element,
5679	 not the size of the whole type.  */
5680      size = GET_MODE_UNIT_SIZE (mode);
5681      break;
5682
5683    case MODE_INT:
5684    case MODE_FLOAT:
5685    case MODE_COMPLEX_INT:
5686    case MODE_VECTOR_INT:
5687      break;
5688
5689    default:
5690      /* ??? We get called on all sorts of random stuff from
5691	 aggregate_value_p.  We must return something, but it's not
5692	 clear what's safe to return.  Pretend it's a struct I
5693	 guess.  */
5694      return true;
5695    }
5696
5697  /* Otherwise types must fit in one register.  */
5698  return size > UNITS_PER_WORD;
5699}
5700
5701/* Return true if ARG should be passed by invisible reference.  */
5702
5703static bool
5704alpha_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
5705{
5706  /* Pass float and _Complex float variable arguments by reference.
5707     This avoids 64-bit store from a FP register to a pretend args save area
5708     and subsequent 32-bit load from the saved location to a FP register.
5709
5710     Note that 32-bit loads and stores to/from a FP register on alpha reorder
5711     bits to form a canonical 64-bit value in the FP register.  This fact
5712     invalidates compiler assumption that 32-bit FP value lives in the lower
5713     32-bits of the passed 64-bit FP value, so loading the 32-bit value from
5714     the stored 64-bit location using 32-bit FP load is invalid on alpha.
5715
5716     This introduces sort of ABI incompatibility, but until _Float32 was
5717     introduced, C-family languages promoted 32-bit float variable arg to
5718     a 64-bit double, and it was not allowed to pass float as a varible
5719     argument.  Passing _Complex float as a variable argument never
5720     worked on alpha.  Thus, we have no backward compatibility issues
5721     to worry about, and passing unpromoted _Float32 and _Complex float
5722     as a variable argument will actually work in the future.  */
5723
5724  if (arg.mode == SFmode || arg.mode == SCmode)
5725    return !arg.named;
5726
5727  return arg.mode == TFmode || arg.mode == TCmode;
5728}
5729
5730/* Define how to find the value returned by a function.  VALTYPE is the
5731   data type of the value (as a tree).  If the precise function being
5732   called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5733   MODE is set instead of VALTYPE for libcalls.
5734
5735   On Alpha the value is found in $0 for integer functions and
5736   $f0 for floating-point functions.  */
5737
5738static rtx
5739alpha_function_value_1 (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5740			machine_mode mode)
5741{
5742  unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5743  enum mode_class mclass;
5744
5745  gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5746
5747  if (valtype)
5748    mode = TYPE_MODE (valtype);
5749
5750  mclass = GET_MODE_CLASS (mode);
5751  switch (mclass)
5752    {
5753    case MODE_INT:
5754      /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5755	 where we have them returning both SImode and DImode.  */
5756      if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5757        PROMOTE_MODE (mode, dummy, valtype);
5758      /* FALLTHRU */
5759
5760    case MODE_COMPLEX_INT:
5761    case MODE_VECTOR_INT:
5762      regnum = 0;
5763      break;
5764
5765    case MODE_FLOAT:
5766      regnum = 32;
5767      break;
5768
5769    case MODE_COMPLEX_FLOAT:
5770      {
5771	machine_mode cmode = GET_MODE_INNER (mode);
5772
5773	return gen_rtx_PARALLEL
5774	  (VOIDmode,
5775	   gen_rtvec (2,
5776		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5777				         const0_rtx),
5778		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5779				         GEN_INT (GET_MODE_SIZE (cmode)))));
5780      }
5781
5782    case MODE_RANDOM:
5783      /* We should only reach here for BLKmode on VMS.  */
5784      gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5785      regnum = 0;
5786      break;
5787
5788    default:
5789      gcc_unreachable ();
5790    }
5791
5792  return gen_rtx_REG (mode, regnum);
5793}
5794
5795/* Implement TARGET_FUNCTION_VALUE.  */
5796
5797static rtx
5798alpha_function_value (const_tree valtype, const_tree fn_decl_or_type,
5799		      bool /*outgoing*/)
5800{
5801  return alpha_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
5802}
5803
5804/* Implement TARGET_LIBCALL_VALUE.  */
5805
5806static rtx
5807alpha_libcall_value (machine_mode mode, const_rtx /*fun*/)
5808{
5809  return alpha_function_value_1 (NULL_TREE, NULL_TREE, mode);
5810}
5811
5812/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
5813
5814   On the Alpha, $0 $1 and $f0 $f1 are the only register thus used.  */
5815
5816static bool
5817alpha_function_value_regno_p (const unsigned int regno)
5818{
5819  return (regno == 0 || regno == 1 || regno == 32 || regno == 33);
5820}
5821
5822/* TCmode complex values are passed by invisible reference.  We
5823   should not split these values.  */
5824
5825static bool
5826alpha_split_complex_arg (const_tree type)
5827{
5828  return TYPE_MODE (type) != TCmode;
5829}
5830
5831static tree
5832alpha_build_builtin_va_list (void)
5833{
5834  tree base, ofs, space, record, type_decl;
5835
5836  if (TARGET_ABI_OPEN_VMS)
5837    return ptr_type_node;
5838
5839  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5840  type_decl = build_decl (BUILTINS_LOCATION,
5841			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
5842  TYPE_STUB_DECL (record) = type_decl;
5843  TYPE_NAME (record) = type_decl;
5844
5845  /* C++? SET_IS_AGGR_TYPE (record, 1); */
5846
5847  /* Dummy field to prevent alignment warnings.  */
5848  space = build_decl (BUILTINS_LOCATION,
5849		      FIELD_DECL, NULL_TREE, integer_type_node);
5850  DECL_FIELD_CONTEXT (space) = record;
5851  DECL_ARTIFICIAL (space) = 1;
5852  DECL_IGNORED_P (space) = 1;
5853
5854  ofs = build_decl (BUILTINS_LOCATION,
5855		    FIELD_DECL, get_identifier ("__offset"),
5856		    integer_type_node);
5857  DECL_FIELD_CONTEXT (ofs) = record;
5858  DECL_CHAIN (ofs) = space;
5859
5860  base = build_decl (BUILTINS_LOCATION,
5861		     FIELD_DECL, get_identifier ("__base"),
5862		     ptr_type_node);
5863  DECL_FIELD_CONTEXT (base) = record;
5864  DECL_CHAIN (base) = ofs;
5865
5866  TYPE_FIELDS (record) = base;
5867  layout_type (record);
5868
5869  va_list_gpr_counter_field = ofs;
5870  return record;
5871}
5872
5873#if TARGET_ABI_OSF
5874/* Helper function for alpha_stdarg_optimize_hook.  Skip over casts
5875   and constant additions.  */
5876
5877static gimple *
5878va_list_skip_additions (tree lhs)
5879{
5880  gimple  *stmt;
5881
5882  for (;;)
5883    {
5884      enum tree_code code;
5885
5886      stmt = SSA_NAME_DEF_STMT (lhs);
5887
5888      if (gimple_code (stmt) == GIMPLE_PHI)
5889	return stmt;
5890
5891      if (!is_gimple_assign (stmt)
5892	  || gimple_assign_lhs (stmt) != lhs)
5893	return NULL;
5894
5895      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5896	return stmt;
5897      code = gimple_assign_rhs_code (stmt);
5898      if (!CONVERT_EXPR_CODE_P (code)
5899	  && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5900	      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5901	      || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
5902	return stmt;
5903
5904      lhs = gimple_assign_rhs1 (stmt);
5905    }
5906}
5907
5908/* Check if LHS = RHS statement is
5909   LHS = *(ap.__base + ap.__offset + cst)
5910   or
5911   LHS = *(ap.__base
5912	   + ((ap.__offset + cst <= 47)
5913	      ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5914   If the former, indicate that GPR registers are needed,
5915   if the latter, indicate that FPR registers are needed.
5916
5917   Also look for LHS = (*ptr).field, where ptr is one of the forms
5918   listed above.
5919
5920   On alpha, cfun->va_list_gpr_size is used as size of the needed
5921   regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5922   registers are needed and bit 1 set if FPR registers are needed.
5923   Return true if va_list references should not be scanned for the
5924   current statement.  */
5925
5926static bool
5927alpha_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt)
5928{
5929  tree base, offset, rhs;
5930  int offset_arg = 1;
5931  gimple *base_stmt;
5932
5933  if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
5934      != GIMPLE_SINGLE_RHS)
5935    return false;
5936
5937  rhs = gimple_assign_rhs1 (stmt);
5938  while (handled_component_p (rhs))
5939    rhs = TREE_OPERAND (rhs, 0);
5940  if (TREE_CODE (rhs) != MEM_REF
5941      || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5942    return false;
5943
5944  stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5945  if (stmt == NULL
5946      || !is_gimple_assign (stmt)
5947      || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
5948    return false;
5949
5950  base = gimple_assign_rhs1 (stmt);
5951  if (TREE_CODE (base) == SSA_NAME)
5952    {
5953      base_stmt = va_list_skip_additions (base);
5954      if (base_stmt
5955	  && is_gimple_assign (base_stmt)
5956	  && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5957	base = gimple_assign_rhs1 (base_stmt);
5958    }
5959
5960  if (TREE_CODE (base) != COMPONENT_REF
5961      || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5962    {
5963      base = gimple_assign_rhs2 (stmt);
5964      if (TREE_CODE (base) == SSA_NAME)
5965	{
5966	  base_stmt = va_list_skip_additions (base);
5967	  if (base_stmt
5968	      && is_gimple_assign (base_stmt)
5969	      && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5970	    base = gimple_assign_rhs1 (base_stmt);
5971	}
5972
5973      if (TREE_CODE (base) != COMPONENT_REF
5974	  || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5975	return false;
5976
5977      offset_arg = 0;
5978    }
5979
5980  base = get_base_address (base);
5981  if (TREE_CODE (base) != VAR_DECL
5982      || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
5983    return false;
5984
5985  offset = gimple_op (stmt, 1 + offset_arg);
5986  if (TREE_CODE (offset) == SSA_NAME)
5987    {
5988      gimple *offset_stmt = va_list_skip_additions (offset);
5989
5990      if (offset_stmt
5991	  && gimple_code (offset_stmt) == GIMPLE_PHI)
5992	{
5993	  HOST_WIDE_INT sub;
5994	  gimple *arg1_stmt, *arg2_stmt;
5995	  tree arg1, arg2;
5996	  enum tree_code code1, code2;
5997
5998	  if (gimple_phi_num_args (offset_stmt) != 2)
5999	    goto escapes;
6000
6001	  arg1_stmt
6002	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
6003	  arg2_stmt
6004	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
6005	  if (arg1_stmt == NULL
6006	      || !is_gimple_assign (arg1_stmt)
6007	      || arg2_stmt == NULL
6008	      || !is_gimple_assign (arg2_stmt))
6009	    goto escapes;
6010
6011	  code1 = gimple_assign_rhs_code (arg1_stmt);
6012	  code2 = gimple_assign_rhs_code (arg2_stmt);
6013	  if (code1 == COMPONENT_REF
6014	      && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
6015	    /* Do nothing.  */;
6016	  else if (code2 == COMPONENT_REF
6017		   && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
6018	    {
6019	      std::swap (arg1_stmt, arg2_stmt);
6020	      code2 = code1;
6021	    }
6022	  else
6023	    goto escapes;
6024
6025	  if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
6026	    goto escapes;
6027
6028	  sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
6029	  if (code2 == MINUS_EXPR)
6030	    sub = -sub;
6031	  if (sub < -48 || sub > -32)
6032	    goto escapes;
6033
6034	  arg1 = gimple_assign_rhs1 (arg1_stmt);
6035	  arg2 = gimple_assign_rhs1 (arg2_stmt);
6036	  if (TREE_CODE (arg2) == SSA_NAME)
6037	    {
6038	      arg2_stmt = va_list_skip_additions (arg2);
6039	      if (arg2_stmt == NULL
6040		  || !is_gimple_assign (arg2_stmt)
6041		  || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6042		goto escapes;
6043	      arg2 = gimple_assign_rhs1 (arg2_stmt);
6044	    }
6045	  if (arg1 != arg2)
6046	    goto escapes;
6047
6048	  if (TREE_CODE (arg1) != COMPONENT_REF
6049	      || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6050	      || get_base_address (arg1) != base)
6051	    goto escapes;
6052
6053	  /* Need floating point regs.  */
6054	  cfun->va_list_fpr_size |= 2;
6055	  return false;
6056	}
6057      if (offset_stmt
6058	  && is_gimple_assign (offset_stmt)
6059	  && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6060	offset = gimple_assign_rhs1 (offset_stmt);
6061    }
6062  if (TREE_CODE (offset) != COMPONENT_REF
6063      || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6064      || get_base_address (offset) != base)
6065    goto escapes;
6066  else
6067    /* Need general regs.  */
6068    cfun->va_list_fpr_size |= 1;
6069  return false;
6070
6071escapes:
6072  si->va_list_escapes = true;
6073  return false;
6074}
6075#endif
6076
6077/* Perform any needed actions needed for a function that is receiving a
6078   variable number of arguments.  */
6079
6080static void
6081alpha_setup_incoming_varargs (cumulative_args_t pcum,
6082			      const function_arg_info &arg,
6083			      int *pretend_size, int no_rtl)
6084{
6085  CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6086
6087  /* Skip the current argument.  */
6088  targetm.calls.function_arg_advance (pack_cumulative_args (&cum), arg);
6089
6090#if TARGET_ABI_OPEN_VMS
6091  /* For VMS, we allocate space for all 6 arg registers plus a count.
6092
6093     However, if NO registers need to be saved, don't allocate any space.
6094     This is not only because we won't need the space, but because AP
6095     includes the current_pretend_args_size and we don't want to mess up
6096     any ap-relative addresses already made.  */
6097  if (cum.num_args < 6)
6098    {
6099      if (!no_rtl)
6100	{
6101	  emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6102	  emit_insn (gen_arg_home ());
6103	}
6104      *pretend_size = 7 * UNITS_PER_WORD;
6105    }
6106#else
6107  /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6108     only push those that are remaining.  However, if NO registers need to
6109     be saved, don't allocate any space.  This is not only because we won't
6110     need the space, but because AP includes the current_pretend_args_size
6111     and we don't want to mess up any ap-relative addresses already made.
6112
6113     If we are not to use the floating-point registers, save the integer
6114     registers where we would put the floating-point registers.  This is
6115     not the most efficient way to implement varargs with just one register
6116     class, but it isn't worth doing anything more efficient in this rare
6117     case.  */
6118  if (cum >= 6)
6119    return;
6120
6121  if (!no_rtl)
6122    {
6123      int count;
6124      alias_set_type set = get_varargs_alias_set ();
6125      rtx tmp;
6126
6127      count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6128      if (count > 6 - cum)
6129	count = 6 - cum;
6130
6131      /* Detect whether integer registers or floating-point registers
6132	 are needed by the detected va_arg statements.  See above for
6133	 how these values are computed.  Note that the "escape" value
6134	 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6135	 these bits set.  */
6136      gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6137
6138      if (cfun->va_list_fpr_size & 1)
6139	{
6140	  tmp = gen_rtx_MEM (BLKmode,
6141			     plus_constant (Pmode, virtual_incoming_args_rtx,
6142					    (cum + 6) * UNITS_PER_WORD));
6143	  MEM_NOTRAP_P (tmp) = 1;
6144	  set_mem_alias_set (tmp, set);
6145	  move_block_from_reg (16 + cum, tmp, count);
6146	}
6147
6148      if (cfun->va_list_fpr_size & 2)
6149	{
6150	  tmp = gen_rtx_MEM (BLKmode,
6151			     plus_constant (Pmode, virtual_incoming_args_rtx,
6152					    cum * UNITS_PER_WORD));
6153	  MEM_NOTRAP_P (tmp) = 1;
6154	  set_mem_alias_set (tmp, set);
6155	  move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6156	}
6157     }
6158  *pretend_size = 12 * UNITS_PER_WORD;
6159#endif
6160}
6161
6162static void
6163alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6164{
6165  HOST_WIDE_INT offset;
6166  tree t, offset_field, base_field;
6167
6168  if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6169    return;
6170
6171  /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6172     up by 48, storing fp arg registers in the first 48 bytes, and the
6173     integer arg registers in the next 48 bytes.  This is only done,
6174     however, if any integer registers need to be stored.
6175
6176     If no integer registers need be stored, then we must subtract 48
6177     in order to account for the integer arg registers which are counted
6178     in argsize above, but which are not actually stored on the stack.
6179     Must further be careful here about structures straddling the last
6180     integer argument register; that futzes with pretend_args_size,
6181     which changes the meaning of AP.  */
6182
6183  if (NUM_ARGS < 6)
6184    offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6185  else
6186    offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6187
6188  if (TARGET_ABI_OPEN_VMS)
6189    {
6190      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6191      t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6192      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6193      TREE_SIDE_EFFECTS (t) = 1;
6194      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6195    }
6196  else
6197    {
6198      base_field = TYPE_FIELDS (TREE_TYPE (valist));
6199      offset_field = DECL_CHAIN (base_field);
6200
6201      base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6202			   valist, base_field, NULL_TREE);
6203      offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6204			     valist, offset_field, NULL_TREE);
6205
6206      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6207      t = fold_build_pointer_plus_hwi (t, offset);
6208      t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6209      TREE_SIDE_EFFECTS (t) = 1;
6210      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6211
6212      t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6213      t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6214      TREE_SIDE_EFFECTS (t) = 1;
6215      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6216    }
6217}
6218
6219static tree
6220alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6221			 gimple_seq *pre_p)
6222{
6223  tree type_size, ptr_type, addend, t, addr;
6224  gimple_seq internal_post;
6225
6226  /* If the type could not be passed in registers, skip the block
6227     reserved for the registers.  */
6228  if (must_pass_va_arg_in_stack (type))
6229    {
6230      t = build_int_cst (TREE_TYPE (offset), 6*8);
6231      gimplify_assign (offset,
6232		       build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6233		       pre_p);
6234    }
6235
6236  addend = offset;
6237  ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6238
6239  if (TREE_CODE (type) == COMPLEX_TYPE)
6240    {
6241      tree real_part, imag_part, real_temp;
6242
6243      real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6244					   offset, pre_p);
6245
6246      /* Copy the value into a new temporary, lest the formal temporary
6247	 be reused out from under us.  */
6248      real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6249
6250      imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6251					   offset, pre_p);
6252
6253      return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6254    }
6255  else if (TREE_CODE (type) == REAL_TYPE)
6256    {
6257      tree fpaddend, cond, fourtyeight;
6258
6259      fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6260      fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6261			      addend, fourtyeight);
6262      cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6263      addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6264			    fpaddend, addend);
6265    }
6266
6267  /* Build the final address and force that value into a temporary.  */
6268  addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6269  internal_post = NULL;
6270  gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6271  gimple_seq_add_seq (pre_p, internal_post);
6272
6273  /* Update the offset field.  */
6274  type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6275  if (type_size == NULL || TREE_OVERFLOW (type_size))
6276    t = size_zero_node;
6277  else
6278    {
6279      t = size_binop (PLUS_EXPR, type_size, size_int (7));
6280      t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6281      t = size_binop (MULT_EXPR, t, size_int (8));
6282    }
6283  t = fold_convert (TREE_TYPE (offset), t);
6284  gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6285      		   pre_p);
6286
6287  return build_va_arg_indirect_ref (addr);
6288}
6289
6290static tree
6291alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6292		       gimple_seq *post_p)
6293{
6294  tree offset_field, base_field, offset, base, t, r;
6295  bool indirect;
6296
6297  if (TARGET_ABI_OPEN_VMS)
6298    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6299
6300  base_field = TYPE_FIELDS (va_list_type_node);
6301  offset_field = DECL_CHAIN (base_field);
6302  base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6303		       valist, base_field, NULL_TREE);
6304  offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6305			 valist, offset_field, NULL_TREE);
6306
6307  /* Pull the fields of the structure out into temporaries.  Since we never
6308     modify the base field, we can use a formal temporary.  Sign-extend the
6309     offset field so that it's the proper width for pointer arithmetic.  */
6310  base = get_formal_tmp_var (base_field, pre_p);
6311
6312  t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6313  offset = get_initialized_tmp_var (t, pre_p, NULL);
6314
6315  indirect = pass_va_arg_by_reference (type);
6316
6317  if (indirect)
6318    {
6319      if (TREE_CODE (type) == COMPLEX_TYPE
6320	  && targetm.calls.split_complex_arg (type))
6321	{
6322	  tree real_part, imag_part, real_temp;
6323
6324	  tree ptr_type = build_pointer_type_for_mode (TREE_TYPE (type),
6325						       ptr_mode, true);
6326
6327	  real_part = alpha_gimplify_va_arg_1 (ptr_type, base,
6328					       offset, pre_p);
6329	  real_part = build_va_arg_indirect_ref (real_part);
6330
6331	  /* Copy the value into a new temporary, lest the formal temporary
6332	     be reused out from under us.  */
6333	  real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6334
6335	  imag_part = alpha_gimplify_va_arg_1 (ptr_type, base,
6336					       offset, pre_p);
6337	  imag_part = build_va_arg_indirect_ref (imag_part);
6338
6339	  r = build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6340
6341	  /* Stuff the offset temporary back into its field.  */
6342	  gimplify_assign (unshare_expr (offset_field),
6343			   fold_convert (TREE_TYPE (offset_field), offset),
6344			   pre_p);
6345	  return r;
6346	}
6347      else
6348	type = build_pointer_type_for_mode (type, ptr_mode, true);
6349    }
6350
6351  /* Find the value.  Note that this will be a stable indirection, or
6352     a composite of stable indirections in the case of complex.  */
6353  r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6354
6355  /* Stuff the offset temporary back into its field.  */
6356  gimplify_assign (unshare_expr (offset_field),
6357		   fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6358
6359  if (indirect)
6360    r = build_va_arg_indirect_ref (r);
6361
6362  return r;
6363}
6364
6365/* Builtins.  */
6366
6367enum alpha_builtin
6368{
6369  ALPHA_BUILTIN_CMPBGE,
6370  ALPHA_BUILTIN_EXTBL,
6371  ALPHA_BUILTIN_EXTWL,
6372  ALPHA_BUILTIN_EXTLL,
6373  ALPHA_BUILTIN_EXTQL,
6374  ALPHA_BUILTIN_EXTWH,
6375  ALPHA_BUILTIN_EXTLH,
6376  ALPHA_BUILTIN_EXTQH,
6377  ALPHA_BUILTIN_INSBL,
6378  ALPHA_BUILTIN_INSWL,
6379  ALPHA_BUILTIN_INSLL,
6380  ALPHA_BUILTIN_INSQL,
6381  ALPHA_BUILTIN_INSWH,
6382  ALPHA_BUILTIN_INSLH,
6383  ALPHA_BUILTIN_INSQH,
6384  ALPHA_BUILTIN_MSKBL,
6385  ALPHA_BUILTIN_MSKWL,
6386  ALPHA_BUILTIN_MSKLL,
6387  ALPHA_BUILTIN_MSKQL,
6388  ALPHA_BUILTIN_MSKWH,
6389  ALPHA_BUILTIN_MSKLH,
6390  ALPHA_BUILTIN_MSKQH,
6391  ALPHA_BUILTIN_UMULH,
6392  ALPHA_BUILTIN_ZAP,
6393  ALPHA_BUILTIN_ZAPNOT,
6394  ALPHA_BUILTIN_AMASK,
6395  ALPHA_BUILTIN_IMPLVER,
6396  ALPHA_BUILTIN_RPCC,
6397  ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6398  ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6399
6400  /* TARGET_MAX */
6401  ALPHA_BUILTIN_MINUB8,
6402  ALPHA_BUILTIN_MINSB8,
6403  ALPHA_BUILTIN_MINUW4,
6404  ALPHA_BUILTIN_MINSW4,
6405  ALPHA_BUILTIN_MAXUB8,
6406  ALPHA_BUILTIN_MAXSB8,
6407  ALPHA_BUILTIN_MAXUW4,
6408  ALPHA_BUILTIN_MAXSW4,
6409  ALPHA_BUILTIN_PERR,
6410  ALPHA_BUILTIN_PKLB,
6411  ALPHA_BUILTIN_PKWB,
6412  ALPHA_BUILTIN_UNPKBL,
6413  ALPHA_BUILTIN_UNPKBW,
6414
6415  /* TARGET_CIX */
6416  ALPHA_BUILTIN_CTTZ,
6417  ALPHA_BUILTIN_CTLZ,
6418  ALPHA_BUILTIN_CTPOP,
6419
6420  ALPHA_BUILTIN_max
6421};
6422
6423static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6424  CODE_FOR_builtin_cmpbge,
6425  CODE_FOR_extbl,
6426  CODE_FOR_extwl,
6427  CODE_FOR_extll,
6428  CODE_FOR_extql,
6429  CODE_FOR_extwh,
6430  CODE_FOR_extlh,
6431  CODE_FOR_extqh,
6432  CODE_FOR_builtin_insbl,
6433  CODE_FOR_builtin_inswl,
6434  CODE_FOR_builtin_insll,
6435  CODE_FOR_insql,
6436  CODE_FOR_inswh,
6437  CODE_FOR_inslh,
6438  CODE_FOR_insqh,
6439  CODE_FOR_mskbl,
6440  CODE_FOR_mskwl,
6441  CODE_FOR_mskll,
6442  CODE_FOR_mskql,
6443  CODE_FOR_mskwh,
6444  CODE_FOR_msklh,
6445  CODE_FOR_mskqh,
6446  CODE_FOR_umuldi3_highpart,
6447  CODE_FOR_builtin_zap,
6448  CODE_FOR_builtin_zapnot,
6449  CODE_FOR_builtin_amask,
6450  CODE_FOR_builtin_implver,
6451  CODE_FOR_builtin_rpcc,
6452  CODE_FOR_builtin_establish_vms_condition_handler,
6453  CODE_FOR_builtin_revert_vms_condition_handler,
6454
6455  /* TARGET_MAX */
6456  CODE_FOR_builtin_minub8,
6457  CODE_FOR_builtin_minsb8,
6458  CODE_FOR_builtin_minuw4,
6459  CODE_FOR_builtin_minsw4,
6460  CODE_FOR_builtin_maxub8,
6461  CODE_FOR_builtin_maxsb8,
6462  CODE_FOR_builtin_maxuw4,
6463  CODE_FOR_builtin_maxsw4,
6464  CODE_FOR_builtin_perr,
6465  CODE_FOR_builtin_pklb,
6466  CODE_FOR_builtin_pkwb,
6467  CODE_FOR_builtin_unpkbl,
6468  CODE_FOR_builtin_unpkbw,
6469
6470  /* TARGET_CIX */
6471  CODE_FOR_ctzdi2,
6472  CODE_FOR_clzdi2,
6473  CODE_FOR_popcountdi2
6474};
6475
6476struct alpha_builtin_def
6477{
6478  const char *name;
6479  enum alpha_builtin code;
6480  unsigned int target_mask;
6481  bool is_const;
6482};
6483
6484static struct alpha_builtin_def const zero_arg_builtins[] = {
6485  { "__builtin_alpha_implver",	ALPHA_BUILTIN_IMPLVER,	0, true },
6486  { "__builtin_alpha_rpcc",	ALPHA_BUILTIN_RPCC,	0, false }
6487};
6488
6489static struct alpha_builtin_def const one_arg_builtins[] = {
6490  { "__builtin_alpha_amask",	ALPHA_BUILTIN_AMASK,	0, true },
6491  { "__builtin_alpha_pklb",	ALPHA_BUILTIN_PKLB,	MASK_MAX, true },
6492  { "__builtin_alpha_pkwb",	ALPHA_BUILTIN_PKWB,	MASK_MAX, true },
6493  { "__builtin_alpha_unpkbl",	ALPHA_BUILTIN_UNPKBL,	MASK_MAX, true },
6494  { "__builtin_alpha_unpkbw",	ALPHA_BUILTIN_UNPKBW,	MASK_MAX, true },
6495  { "__builtin_alpha_cttz",	ALPHA_BUILTIN_CTTZ,	MASK_CIX, true },
6496  { "__builtin_alpha_ctlz",	ALPHA_BUILTIN_CTLZ,	MASK_CIX, true },
6497  { "__builtin_alpha_ctpop",	ALPHA_BUILTIN_CTPOP,	MASK_CIX, true }
6498};
6499
6500static struct alpha_builtin_def const two_arg_builtins[] = {
6501  { "__builtin_alpha_cmpbge",	ALPHA_BUILTIN_CMPBGE,	0, true },
6502  { "__builtin_alpha_extbl",	ALPHA_BUILTIN_EXTBL,	0, true },
6503  { "__builtin_alpha_extwl",	ALPHA_BUILTIN_EXTWL,	0, true },
6504  { "__builtin_alpha_extll",	ALPHA_BUILTIN_EXTLL,	0, true },
6505  { "__builtin_alpha_extql",	ALPHA_BUILTIN_EXTQL,	0, true },
6506  { "__builtin_alpha_extwh",	ALPHA_BUILTIN_EXTWH,	0, true },
6507  { "__builtin_alpha_extlh",	ALPHA_BUILTIN_EXTLH,	0, true },
6508  { "__builtin_alpha_extqh",	ALPHA_BUILTIN_EXTQH,	0, true },
6509  { "__builtin_alpha_insbl",	ALPHA_BUILTIN_INSBL,	0, true },
6510  { "__builtin_alpha_inswl",	ALPHA_BUILTIN_INSWL,	0, true },
6511  { "__builtin_alpha_insll",	ALPHA_BUILTIN_INSLL,	0, true },
6512  { "__builtin_alpha_insql",	ALPHA_BUILTIN_INSQL,	0, true },
6513  { "__builtin_alpha_inswh",	ALPHA_BUILTIN_INSWH,	0, true },
6514  { "__builtin_alpha_inslh",	ALPHA_BUILTIN_INSLH,	0, true },
6515  { "__builtin_alpha_insqh",	ALPHA_BUILTIN_INSQH,	0, true },
6516  { "__builtin_alpha_mskbl",	ALPHA_BUILTIN_MSKBL,	0, true },
6517  { "__builtin_alpha_mskwl",	ALPHA_BUILTIN_MSKWL,	0, true },
6518  { "__builtin_alpha_mskll",	ALPHA_BUILTIN_MSKLL,	0, true },
6519  { "__builtin_alpha_mskql",	ALPHA_BUILTIN_MSKQL,	0, true },
6520  { "__builtin_alpha_mskwh",	ALPHA_BUILTIN_MSKWH,	0, true },
6521  { "__builtin_alpha_msklh",	ALPHA_BUILTIN_MSKLH,	0, true },
6522  { "__builtin_alpha_mskqh",	ALPHA_BUILTIN_MSKQH,	0, true },
6523  { "__builtin_alpha_umulh",	ALPHA_BUILTIN_UMULH,	0, true },
6524  { "__builtin_alpha_zap",	ALPHA_BUILTIN_ZAP,	0, true },
6525  { "__builtin_alpha_zapnot",	ALPHA_BUILTIN_ZAPNOT,	0, true },
6526  { "__builtin_alpha_minub8",	ALPHA_BUILTIN_MINUB8,	MASK_MAX, true },
6527  { "__builtin_alpha_minsb8",	ALPHA_BUILTIN_MINSB8,	MASK_MAX, true },
6528  { "__builtin_alpha_minuw4",	ALPHA_BUILTIN_MINUW4,	MASK_MAX, true },
6529  { "__builtin_alpha_minsw4",	ALPHA_BUILTIN_MINSW4,	MASK_MAX, true },
6530  { "__builtin_alpha_maxub8",	ALPHA_BUILTIN_MAXUB8,	MASK_MAX, true },
6531  { "__builtin_alpha_maxsb8",	ALPHA_BUILTIN_MAXSB8,	MASK_MAX, true },
6532  { "__builtin_alpha_maxuw4",	ALPHA_BUILTIN_MAXUW4,	MASK_MAX, true },
6533  { "__builtin_alpha_maxsw4",	ALPHA_BUILTIN_MAXSW4,	MASK_MAX, true },
6534  { "__builtin_alpha_perr",	ALPHA_BUILTIN_PERR,	MASK_MAX, true }
6535};
6536
6537static GTY(()) tree alpha_dimode_u;
6538static GTY(()) tree alpha_v8qi_u;
6539static GTY(()) tree alpha_v8qi_s;
6540static GTY(()) tree alpha_v4hi_u;
6541static GTY(()) tree alpha_v4hi_s;
6542
6543static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6544
6545/* Return the alpha builtin for CODE.  */
6546
6547static tree
6548alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6549{
6550  if (code >= ALPHA_BUILTIN_max)
6551    return error_mark_node;
6552  return alpha_builtins[code];
6553}
6554
6555/* Helper function of alpha_init_builtins.  Add the built-in specified
6556   by NAME, TYPE, CODE, and ECF.  */
6557
6558static void
6559alpha_builtin_function (const char *name, tree ftype,
6560			enum alpha_builtin code, unsigned ecf)
6561{
6562  tree decl = add_builtin_function (name, ftype, (int) code,
6563				    BUILT_IN_MD, NULL, NULL_TREE);
6564
6565  if (ecf & ECF_CONST)
6566    TREE_READONLY (decl) = 1;
6567  if (ecf & ECF_NOTHROW)
6568    TREE_NOTHROW (decl) = 1;
6569
6570  alpha_builtins [(int) code] = decl;
6571}
6572
6573/* Helper function of alpha_init_builtins.  Add the COUNT built-in
6574   functions pointed to by P, with function type FTYPE.  */
6575
6576static void
6577alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6578		    tree ftype)
6579{
6580  size_t i;
6581
6582  for (i = 0; i < count; ++i, ++p)
6583    if ((target_flags & p->target_mask) == p->target_mask)
6584      alpha_builtin_function (p->name, ftype, p->code,
6585			      (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6586}
6587
6588static void
6589alpha_init_builtins (void)
6590{
6591  tree ftype;
6592
6593  alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6594  alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6595  alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6596  alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6597  alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6598
6599  ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6600  alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6601
6602  ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6603  alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6604
6605  ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6606				    alpha_dimode_u, NULL_TREE);
6607  alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6608
6609  if (TARGET_ABI_OPEN_VMS)
6610    {
6611      ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6612					NULL_TREE);
6613      alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6614			      ftype,
6615			      ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6616			      0);
6617
6618      ftype = build_function_type_list (ptr_type_node, void_type_node,
6619					NULL_TREE);
6620      alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6621			      ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6622
6623      vms_patch_builtins ();
6624    }
6625}
6626
6627/* Expand an expression EXP that calls a built-in function,
6628   with result going to TARGET if that's convenient
6629   (and in mode MODE if that's convenient).
6630   SUBTARGET may be used as the target for computing one of EXP's operands.
6631   IGNORE is nonzero if the value is to be ignored.  */
6632
6633static rtx
6634alpha_expand_builtin (tree exp, rtx target,
6635		      rtx subtarget ATTRIBUTE_UNUSED,
6636		      machine_mode mode ATTRIBUTE_UNUSED,
6637		      int ignore ATTRIBUTE_UNUSED)
6638{
6639#define MAX_ARGS 2
6640
6641  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6642  unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
6643  tree arg;
6644  call_expr_arg_iterator iter;
6645  enum insn_code icode;
6646  rtx op[MAX_ARGS], pat;
6647  int arity;
6648  bool nonvoid;
6649
6650  if (fcode >= ALPHA_BUILTIN_max)
6651    internal_error ("bad builtin fcode");
6652  icode = code_for_builtin[fcode];
6653  if (icode == 0)
6654    internal_error ("bad builtin fcode");
6655
6656  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6657
6658  arity = 0;
6659  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6660    {
6661      const struct insn_operand_data *insn_op;
6662
6663      if (arg == error_mark_node)
6664	return NULL_RTX;
6665      if (arity > MAX_ARGS)
6666	return NULL_RTX;
6667
6668      insn_op = &insn_data[icode].operand[arity + nonvoid];
6669
6670      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6671
6672      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6673	op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6674      arity++;
6675    }
6676
6677  if (nonvoid)
6678    {
6679      machine_mode tmode = insn_data[icode].operand[0].mode;
6680      if (!target
6681	  || GET_MODE (target) != tmode
6682	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6683	target = gen_reg_rtx (tmode);
6684    }
6685
6686  switch (arity)
6687    {
6688    case 0:
6689      pat = GEN_FCN (icode) (target);
6690      break;
6691    case 1:
6692      if (nonvoid)
6693        pat = GEN_FCN (icode) (target, op[0]);
6694      else
6695	pat = GEN_FCN (icode) (op[0]);
6696      break;
6697    case 2:
6698      pat = GEN_FCN (icode) (target, op[0], op[1]);
6699      break;
6700    default:
6701      gcc_unreachable ();
6702    }
6703  if (!pat)
6704    return NULL_RTX;
6705  emit_insn (pat);
6706
6707  if (nonvoid)
6708    return target;
6709  else
6710    return const0_rtx;
6711}
6712
6713/* Fold the builtin for the CMPBGE instruction.  This is a vector comparison
6714   with an 8-bit output vector.  OPINT contains the integer operands; bit N
6715   of OP_CONST is set if OPINT[N] is valid.  */
6716
6717static tree
6718alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6719{
6720  if (op_const == 3)
6721    {
6722      int i, val;
6723      for (i = 0, val = 0; i < 8; ++i)
6724	{
6725	  unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6726	  unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6727	  if (c0 >= c1)
6728	    val |= 1 << i;
6729	}
6730      return build_int_cst (alpha_dimode_u, val);
6731    }
6732  else if (op_const == 2 && opint[1] == 0)
6733    return build_int_cst (alpha_dimode_u, 0xff);
6734  return NULL;
6735}
6736
6737/* Fold the builtin for the ZAPNOT instruction.  This is essentially a
6738   specialized form of an AND operation.  Other byte manipulation instructions
6739   are defined in terms of this instruction, so this is also used as a
6740   subroutine for other builtins.
6741
6742   OP contains the tree operands; OPINT contains the extracted integer values.
6743   Bit N of OP_CONST it set if OPINT[N] is valid.  OP may be null if only
6744   OPINT may be considered.  */
6745
6746static tree
6747alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6748			   long op_const)
6749{
6750  if (op_const & 2)
6751    {
6752      unsigned HOST_WIDE_INT mask = 0;
6753      int i;
6754
6755      for (i = 0; i < 8; ++i)
6756	if ((opint[1] >> i) & 1)
6757	  mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6758
6759      if (op_const & 1)
6760	return build_int_cst (alpha_dimode_u, opint[0] & mask);
6761
6762      if (op)
6763	return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6764			    build_int_cst (alpha_dimode_u, mask));
6765    }
6766  else if ((op_const & 1) && opint[0] == 0)
6767    return build_int_cst (alpha_dimode_u, 0);
6768  return NULL;
6769}
6770
6771/* Fold the builtins for the EXT family of instructions.  */
6772
6773static tree
6774alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6775			  long op_const, unsigned HOST_WIDE_INT bytemask,
6776			  bool is_high)
6777{
6778  long zap_const = 2;
6779  tree *zap_op = NULL;
6780
6781  if (op_const & 2)
6782    {
6783      unsigned HOST_WIDE_INT loc;
6784
6785      loc = opint[1] & 7;
6786      loc *= BITS_PER_UNIT;
6787
6788      if (loc != 0)
6789	{
6790	  if (op_const & 1)
6791	    {
6792	      unsigned HOST_WIDE_INT temp = opint[0];
6793	      if (is_high)
6794		temp <<= loc;
6795	      else
6796		temp >>= loc;
6797	      opint[0] = temp;
6798	      zap_const = 3;
6799	    }
6800	}
6801      else
6802	zap_op = op;
6803    }
6804
6805  opint[1] = bytemask;
6806  return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6807}
6808
6809/* Fold the builtins for the INS family of instructions.  */
6810
6811static tree
6812alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6813			  long op_const, unsigned HOST_WIDE_INT bytemask,
6814			  bool is_high)
6815{
6816  if ((op_const & 1) && opint[0] == 0)
6817    return build_int_cst (alpha_dimode_u, 0);
6818
6819  if (op_const & 2)
6820    {
6821      unsigned HOST_WIDE_INT temp, loc, byteloc;
6822      tree *zap_op = NULL;
6823
6824      loc = opint[1] & 7;
6825      bytemask <<= loc;
6826
6827      temp = opint[0];
6828      if (is_high)
6829	{
6830	  byteloc = (64 - (loc * 8)) & 0x3f;
6831	  if (byteloc == 0)
6832	    zap_op = op;
6833	  else
6834	    temp >>= byteloc;
6835	  bytemask >>= 8;
6836	}
6837      else
6838	{
6839	  byteloc = loc * 8;
6840	  if (byteloc == 0)
6841	    zap_op = op;
6842	  else
6843	    temp <<= byteloc;
6844	}
6845
6846      opint[0] = temp;
6847      opint[1] = bytemask;
6848      return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6849    }
6850
6851  return NULL;
6852}
6853
6854static tree
6855alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6856			  long op_const, unsigned HOST_WIDE_INT bytemask,
6857			  bool is_high)
6858{
6859  if (op_const & 2)
6860    {
6861      unsigned HOST_WIDE_INT loc;
6862
6863      loc = opint[1] & 7;
6864      bytemask <<= loc;
6865
6866      if (is_high)
6867	bytemask >>= 8;
6868
6869      opint[1] = bytemask ^ 0xff;
6870    }
6871
6872  return alpha_fold_builtin_zapnot (op, opint, op_const);
6873}
6874
6875static tree
6876alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6877{
6878  tree op0 = fold_convert (vtype, op[0]);
6879  tree op1 = fold_convert (vtype, op[1]);
6880  tree val = fold_build2 (code, vtype, op0, op1);
6881  return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6882}
6883
6884static tree
6885alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6886{
6887  unsigned HOST_WIDE_INT temp = 0;
6888  int i;
6889
6890  if (op_const != 3)
6891    return NULL;
6892
6893  for (i = 0; i < 8; ++i)
6894    {
6895      unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6896      unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6897      if (a >= b)
6898	temp += a - b;
6899      else
6900	temp += b - a;
6901    }
6902
6903  return build_int_cst (alpha_dimode_u, temp);
6904}
6905
6906static tree
6907alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6908{
6909  unsigned HOST_WIDE_INT temp;
6910
6911  if (op_const == 0)
6912    return NULL;
6913
6914  temp = opint[0] & 0xff;
6915  temp |= (opint[0] >> 24) & 0xff00;
6916
6917  return build_int_cst (alpha_dimode_u, temp);
6918}
6919
6920static tree
6921alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6922{
6923  unsigned HOST_WIDE_INT temp;
6924
6925  if (op_const == 0)
6926    return NULL;
6927
6928  temp = opint[0] & 0xff;
6929  temp |= (opint[0] >>  8) & 0xff00;
6930  temp |= (opint[0] >> 16) & 0xff0000;
6931  temp |= (opint[0] >> 24) & 0xff000000;
6932
6933  return build_int_cst (alpha_dimode_u, temp);
6934}
6935
6936static tree
6937alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6938{
6939  unsigned HOST_WIDE_INT temp;
6940
6941  if (op_const == 0)
6942    return NULL;
6943
6944  temp = opint[0] & 0xff;
6945  temp |= (opint[0] & 0xff00) << 24;
6946
6947  return build_int_cst (alpha_dimode_u, temp);
6948}
6949
6950static tree
6951alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6952{
6953  unsigned HOST_WIDE_INT temp;
6954
6955  if (op_const == 0)
6956    return NULL;
6957
6958  temp = opint[0] & 0xff;
6959  temp |= (opint[0] & 0x0000ff00) << 8;
6960  temp |= (opint[0] & 0x00ff0000) << 16;
6961  temp |= (opint[0] & 0xff000000) << 24;
6962
6963  return build_int_cst (alpha_dimode_u, temp);
6964}
6965
6966static tree
6967alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6968{
6969  unsigned HOST_WIDE_INT temp;
6970
6971  if (op_const == 0)
6972    return NULL;
6973
6974  if (opint[0] == 0)
6975    temp = 64;
6976  else
6977    temp = exact_log2 (opint[0] & -opint[0]);
6978
6979  return build_int_cst (alpha_dimode_u, temp);
6980}
6981
6982static tree
6983alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6984{
6985  unsigned HOST_WIDE_INT temp;
6986
6987  if (op_const == 0)
6988    return NULL;
6989
6990  if (opint[0] == 0)
6991    temp = 64;
6992  else
6993    temp = 64 - floor_log2 (opint[0]) - 1;
6994
6995  return build_int_cst (alpha_dimode_u, temp);
6996}
6997
6998static tree
6999alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
7000{
7001  unsigned HOST_WIDE_INT temp, op;
7002
7003  if (op_const == 0)
7004    return NULL;
7005
7006  op = opint[0];
7007  temp = 0;
7008  while (op)
7009    temp++, op &= op - 1;
7010
7011  return build_int_cst (alpha_dimode_u, temp);
7012}
7013
7014/* Fold one of our builtin functions.  */
7015
7016static tree
7017alpha_fold_builtin (tree fndecl, int n_args, tree *op,
7018		    bool ignore ATTRIBUTE_UNUSED)
7019{
7020  unsigned HOST_WIDE_INT opint[MAX_ARGS];
7021  long op_const = 0;
7022  int i;
7023
7024  if (n_args > MAX_ARGS)
7025    return NULL;
7026
7027  for (i = 0; i < n_args; i++)
7028    {
7029      tree arg = op[i];
7030      if (arg == error_mark_node)
7031	return NULL;
7032
7033      opint[i] = 0;
7034      if (TREE_CODE (arg) == INTEGER_CST)
7035	{
7036          op_const |= 1L << i;
7037	  opint[i] = int_cst_value (arg);
7038	}
7039    }
7040
7041  switch (DECL_MD_FUNCTION_CODE (fndecl))
7042    {
7043    case ALPHA_BUILTIN_CMPBGE:
7044      return alpha_fold_builtin_cmpbge (opint, op_const);
7045
7046    case ALPHA_BUILTIN_EXTBL:
7047      return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7048    case ALPHA_BUILTIN_EXTWL:
7049      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7050    case ALPHA_BUILTIN_EXTLL:
7051      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7052    case ALPHA_BUILTIN_EXTQL:
7053      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7054    case ALPHA_BUILTIN_EXTWH:
7055      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7056    case ALPHA_BUILTIN_EXTLH:
7057      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7058    case ALPHA_BUILTIN_EXTQH:
7059      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7060
7061    case ALPHA_BUILTIN_INSBL:
7062      return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7063    case ALPHA_BUILTIN_INSWL:
7064      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7065    case ALPHA_BUILTIN_INSLL:
7066      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7067    case ALPHA_BUILTIN_INSQL:
7068      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7069    case ALPHA_BUILTIN_INSWH:
7070      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7071    case ALPHA_BUILTIN_INSLH:
7072      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7073    case ALPHA_BUILTIN_INSQH:
7074      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7075
7076    case ALPHA_BUILTIN_MSKBL:
7077      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7078    case ALPHA_BUILTIN_MSKWL:
7079      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7080    case ALPHA_BUILTIN_MSKLL:
7081      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7082    case ALPHA_BUILTIN_MSKQL:
7083      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7084    case ALPHA_BUILTIN_MSKWH:
7085      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7086    case ALPHA_BUILTIN_MSKLH:
7087      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7088    case ALPHA_BUILTIN_MSKQH:
7089      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7090
7091    case ALPHA_BUILTIN_ZAP:
7092      opint[1] ^= 0xff;
7093      /* FALLTHRU */
7094    case ALPHA_BUILTIN_ZAPNOT:
7095      return alpha_fold_builtin_zapnot (op, opint, op_const);
7096
7097    case ALPHA_BUILTIN_MINUB8:
7098      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7099    case ALPHA_BUILTIN_MINSB8:
7100      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7101    case ALPHA_BUILTIN_MINUW4:
7102      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7103    case ALPHA_BUILTIN_MINSW4:
7104      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7105    case ALPHA_BUILTIN_MAXUB8:
7106      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7107    case ALPHA_BUILTIN_MAXSB8:
7108      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7109    case ALPHA_BUILTIN_MAXUW4:
7110      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7111    case ALPHA_BUILTIN_MAXSW4:
7112      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7113
7114    case ALPHA_BUILTIN_PERR:
7115      return alpha_fold_builtin_perr (opint, op_const);
7116    case ALPHA_BUILTIN_PKLB:
7117      return alpha_fold_builtin_pklb (opint, op_const);
7118    case ALPHA_BUILTIN_PKWB:
7119      return alpha_fold_builtin_pkwb (opint, op_const);
7120    case ALPHA_BUILTIN_UNPKBL:
7121      return alpha_fold_builtin_unpkbl (opint, op_const);
7122    case ALPHA_BUILTIN_UNPKBW:
7123      return alpha_fold_builtin_unpkbw (opint, op_const);
7124
7125    case ALPHA_BUILTIN_CTTZ:
7126      return alpha_fold_builtin_cttz (opint, op_const);
7127    case ALPHA_BUILTIN_CTLZ:
7128      return alpha_fold_builtin_ctlz (opint, op_const);
7129    case ALPHA_BUILTIN_CTPOP:
7130      return alpha_fold_builtin_ctpop (opint, op_const);
7131
7132    case ALPHA_BUILTIN_AMASK:
7133    case ALPHA_BUILTIN_IMPLVER:
7134    case ALPHA_BUILTIN_RPCC:
7135      /* None of these are foldable at compile-time.  */
7136    default:
7137      return NULL;
7138    }
7139}
7140
7141bool
7142alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi)
7143{
7144  bool changed = false;
7145  gimple *stmt = gsi_stmt (*gsi);
7146  tree call = gimple_call_fn (stmt);
7147  gimple *new_stmt = NULL;
7148
7149  if (call)
7150    {
7151      tree fndecl = gimple_call_fndecl (stmt);
7152
7153      if (fndecl)
7154	{
7155	  tree arg0, arg1;
7156
7157	  switch (DECL_MD_FUNCTION_CODE (fndecl))
7158	    {
7159	    case ALPHA_BUILTIN_UMULH:
7160	      arg0 = gimple_call_arg (stmt, 0);
7161	      arg1 = gimple_call_arg (stmt, 1);
7162
7163	      new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
7164					      MULT_HIGHPART_EXPR, arg0, arg1);
7165	      break;
7166	    default:
7167	      break;
7168	    }
7169	}
7170    }
7171
7172  if (new_stmt)
7173    {
7174      gsi_replace (gsi, new_stmt, true);
7175      changed = true;
7176    }
7177
7178  return changed;
7179}
7180
7181/* This page contains routines that are used to determine what the function
7182   prologue and epilogue code will do and write them out.  */
7183
7184/* Compute the size of the save area in the stack.  */
7185
7186/* These variables are used for communication between the following functions.
7187   They indicate various things about the current function being compiled
7188   that are used to tell what kind of prologue, epilogue and procedure
7189   descriptor to generate.  */
7190
7191/* Nonzero if we need a stack procedure.  */
7192enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7193static enum alpha_procedure_types alpha_procedure_type;
7194
7195/* Register number (either FP or SP) that is used to unwind the frame.  */
7196static int vms_unwind_regno;
7197
7198/* Register number used to save FP.  We need not have one for RA since
7199   we don't modify it for register procedures.  This is only defined
7200   for register frame procedures.  */
7201static int vms_save_fp_regno;
7202
7203/* Register number used to reference objects off our PV.  */
7204static int vms_base_regno;
7205
7206/* Compute register masks for saved registers, register save area size,
7207   and total frame size.  */
7208static void
7209alpha_compute_frame_layout (void)
7210{
7211  unsigned HOST_WIDE_INT sa_mask = 0;
7212  HOST_WIDE_INT frame_size;
7213  int sa_size;
7214
7215  /* When outputting a thunk, we don't have valid register life info,
7216     but assemble_start_function wants to output .frame and .mask
7217     directives.  */
7218  if (!cfun->is_thunk)
7219    {
7220      if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7221	sa_mask |= HOST_WIDE_INT_1U << HARD_FRAME_POINTER_REGNUM;
7222
7223      /* One for every register we have to save.  */
7224      for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7225	if (! call_used_or_fixed_reg_p (i)
7226	    && df_regs_ever_live_p (i) && i != REG_RA)
7227	  sa_mask |= HOST_WIDE_INT_1U << i;
7228
7229      /* We need to restore these for the handler.  */
7230      if (crtl->calls_eh_return)
7231	{
7232	  for (unsigned i = 0; ; ++i)
7233	    {
7234	      unsigned regno = EH_RETURN_DATA_REGNO (i);
7235	      if (regno == INVALID_REGNUM)
7236		break;
7237	      sa_mask |= HOST_WIDE_INT_1U << regno;
7238	    }
7239	}
7240
7241      /* If any register spilled, then spill the return address also.  */
7242      /* ??? This is required by the Digital stack unwind specification
7243	 and isn't needed if we're doing Dwarf2 unwinding.  */
7244      if (sa_mask || alpha_ra_ever_killed ())
7245	sa_mask |= HOST_WIDE_INT_1U << REG_RA;
7246    }
7247
7248  sa_size = popcount_hwi(sa_mask);
7249  frame_size = get_frame_size ();
7250
7251  if (TARGET_ABI_OPEN_VMS)
7252    {
7253      /* Start with a stack procedure if we make any calls (REG_RA used), or
7254	 need a frame pointer, with a register procedure if we otherwise need
7255	 at least a slot, and with a null procedure in other cases.  */
7256      if ((sa_mask >> REG_RA) & 1 || frame_pointer_needed)
7257	alpha_procedure_type = PT_STACK;
7258      else if (frame_size != 0)
7259	alpha_procedure_type = PT_REGISTER;
7260      else
7261	alpha_procedure_type = PT_NULL;
7262
7263      /* Don't reserve space for saving FP & RA yet.  Do that later after we've
7264	 made the final decision on stack procedure vs register procedure.  */
7265      if (alpha_procedure_type == PT_STACK)
7266	sa_size -= 2;
7267
7268      /* Decide whether to refer to objects off our PV via FP or PV.
7269	 If we need FP for something else or if we receive a nonlocal
7270	 goto (which expects PV to contain the value), we must use PV.
7271	 Otherwise, start by assuming we can use FP.  */
7272
7273      vms_base_regno
7274	= (frame_pointer_needed
7275	   || cfun->has_nonlocal_label
7276	   || alpha_procedure_type == PT_STACK
7277	   || crtl->outgoing_args_size)
7278	  ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7279
7280      /* If we want to copy PV into FP, we need to find some register
7281	 in which to save FP.  */
7282      vms_save_fp_regno = -1;
7283      if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7284	for (unsigned i = 0; i < 32; i++)
7285	  if (! fixed_regs[i] && call_used_or_fixed_reg_p (i)
7286	      && ! df_regs_ever_live_p (i))
7287	    {
7288	      vms_save_fp_regno = i;
7289	      break;
7290	    }
7291
7292      /* A VMS condition handler requires a stack procedure in our
7293	 implementation. (not required by the calling standard).  */
7294      if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7295	  || cfun->machine->uses_condition_handler)
7296	vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7297      else if (alpha_procedure_type == PT_NULL)
7298	vms_base_regno = REG_PV;
7299
7300      /* Stack unwinding should be done via FP unless we use it for PV.  */
7301      vms_unwind_regno = (vms_base_regno == REG_PV
7302			  ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7303
7304      /* If this is a stack procedure, allow space for saving FP, RA and
7305	 a condition handler slot if needed.  */
7306      if (alpha_procedure_type == PT_STACK)
7307	sa_size += 2 + cfun->machine->uses_condition_handler;
7308    }
7309  else
7310    {
7311      /* Our size must be even (multiple of 16 bytes).  */
7312      if (sa_size & 1)
7313	sa_size++;
7314    }
7315  sa_size *= 8;
7316
7317  if (TARGET_ABI_OPEN_VMS)
7318    frame_size = ALPHA_ROUND (sa_size
7319			      + (alpha_procedure_type == PT_STACK ? 8 : 0)
7320			      + frame_size
7321			      + crtl->args.pretend_args_size);
7322  else
7323    frame_size = (ALPHA_ROUND (crtl->outgoing_args_size)
7324		  + sa_size
7325		  + ALPHA_ROUND (frame_size + crtl->args.pretend_args_size));
7326
7327  cfun->machine->sa_mask = sa_mask;
7328  cfun->machine->sa_size = sa_size;
7329  cfun->machine->frame_size = frame_size;
7330}
7331
7332#undef  TARGET_COMPUTE_FRAME_LAYOUT
7333#define TARGET_COMPUTE_FRAME_LAYOUT  alpha_compute_frame_layout
7334
7335/* Return 1 if this function can directly return via $26.  */
7336
7337bool
7338direct_return (void)
7339{
7340  return (TARGET_ABI_OSF
7341	  && reload_completed
7342	  && cfun->machine->frame_size == 0);
7343}
7344
7345/* Define the offset between two registers, one to be eliminated,
7346   and the other its replacement, at the start of a routine.  */
7347
7348HOST_WIDE_INT
7349alpha_initial_elimination_offset (unsigned int from,
7350				  unsigned int to ATTRIBUTE_UNUSED)
7351{
7352  HOST_WIDE_INT ret;
7353
7354  ret = cfun->machine->sa_size;
7355  ret += ALPHA_ROUND (crtl->outgoing_args_size);
7356
7357  switch (from)
7358    {
7359    case FRAME_POINTER_REGNUM:
7360      break;
7361
7362    case ARG_POINTER_REGNUM:
7363      ret += (ALPHA_ROUND (get_frame_size ()
7364			   + crtl->args.pretend_args_size)
7365	      - crtl->args.pretend_args_size);
7366      break;
7367
7368    default:
7369      gcc_unreachable ();
7370    }
7371
7372  return ret;
7373}
7374
7375#if TARGET_ABI_OPEN_VMS
7376
7377/* Worker function for TARGET_CAN_ELIMINATE.  */
7378
7379static bool
7380alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7381{
7382  switch (alpha_procedure_type)
7383    {
7384    case PT_NULL:
7385      /* NULL procedures have no frame of their own and we only
7386	 know how to resolve from the current stack pointer.  */
7387      return to == STACK_POINTER_REGNUM;
7388
7389    case PT_REGISTER:
7390    case PT_STACK:
7391      /* We always eliminate except to the stack pointer if there is no
7392	 usable frame pointer at hand.  */
7393      return (to != STACK_POINTER_REGNUM
7394	      || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7395    }
7396
7397  gcc_unreachable ();
7398}
7399
7400/* FROM is to be eliminated for TO. Return the offset so that TO+offset
7401   designates the same location as FROM.  */
7402
7403HOST_WIDE_INT
7404alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7405{
7406  /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7407     HARD_FRAME or STACK_PTR.  We need the alpha_procedure_type to decide
7408     on the proper computations and will need the register save area size
7409     in most cases.  */
7410
7411  HOST_WIDE_INT sa_size = cfun->machine->sa_size;
7412
7413  /* PT_NULL procedures have no frame of their own and we only allow
7414     elimination to the stack pointer. This is the argument pointer and we
7415     resolve the soft frame pointer to that as well.  */
7416
7417  if (alpha_procedure_type == PT_NULL)
7418    return 0;
7419
7420  /* For a PT_STACK procedure the frame layout looks as follows
7421
7422                      -----> decreasing addresses
7423
7424		   <             size rounded up to 16       |   likewise   >
7425     --------------#------------------------------+++--------------+++-------#
7426     incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7427     --------------#---------------------------------------------------------#
7428                                   ^         ^              ^               ^
7429			      ARG_PTR FRAME_PTR HARD_FRAME_PTR       STACK_PTR
7430
7431
7432     PT_REGISTER procedures are similar in that they may have a frame of their
7433     own. They have no regs-sa/pv/outgoing-args area.
7434
7435     We first compute offset to HARD_FRAME_PTR, then add what we need to get
7436     to STACK_PTR if need be.  */
7437
7438  {
7439    HOST_WIDE_INT offset;
7440    HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7441
7442    switch (from)
7443      {
7444      case FRAME_POINTER_REGNUM:
7445	offset = ALPHA_ROUND (sa_size + pv_save_size);
7446	break;
7447      case ARG_POINTER_REGNUM:
7448	offset = (ALPHA_ROUND (sa_size + pv_save_size
7449			       + get_frame_size ()
7450			       + crtl->args.pretend_args_size)
7451		  - crtl->args.pretend_args_size);
7452	break;
7453      default:
7454	gcc_unreachable ();
7455      }
7456
7457    if (to == STACK_POINTER_REGNUM)
7458      offset += ALPHA_ROUND (crtl->outgoing_args_size);
7459
7460    return offset;
7461  }
7462}
7463
7464#define COMMON_OBJECT "common_object"
7465
7466static tree
7467common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7468		       tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7469		       bool *no_add_attrs ATTRIBUTE_UNUSED)
7470{
7471  tree decl = *node;
7472  gcc_assert (DECL_P (decl));
7473
7474  DECL_COMMON (decl) = 1;
7475  return NULL_TREE;
7476}
7477
7478static const struct attribute_spec vms_attribute_table[] =
7479{
7480  /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7481       affects_type_identity, handler, exclude } */
7482  { COMMON_OBJECT,   0, 1, true,  false, false, false, common_object_handler,
7483    NULL },
7484  { NULL,            0, 0, false, false, false, false, NULL, NULL }
7485};
7486
7487void
7488vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7489			       unsigned HOST_WIDE_INT size,
7490			       unsigned int align)
7491{
7492  tree attr = DECL_ATTRIBUTES (decl);
7493  fprintf (file, "%s", COMMON_ASM_OP);
7494  assemble_name (file, name);
7495  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7496  /* ??? Unlike on OSF/1, the alignment factor is not in log units.  */
7497  fprintf (file, ",%u", align / BITS_PER_UNIT);
7498  if (attr)
7499    {
7500      attr = lookup_attribute (COMMON_OBJECT, attr);
7501      if (attr)
7502        fprintf (file, ",%s",
7503		 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7504    }
7505  fputc ('\n', file);
7506}
7507
7508#undef COMMON_OBJECT
7509
7510#endif
7511
7512bool
7513alpha_find_lo_sum_using_gp (rtx insn)
7514{
7515  subrtx_iterator::array_type array;
7516  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
7517    {
7518      const_rtx x = *iter;
7519      if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx)
7520	return true;
7521    }
7522  return false;
7523}
7524
7525static int
7526alpha_does_function_need_gp (void)
7527{
7528  rtx_insn *insn;
7529
7530  /* The GP being variable is an OSF abi thing.  */
7531  if (! TARGET_ABI_OSF)
7532    return 0;
7533
7534  /* We need the gp to load the address of __mcount.  */
7535  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7536    return 1;
7537
7538  /* The code emitted by alpha_output_mi_thunk_osf uses the gp.  */
7539  if (cfun->is_thunk)
7540    return 1;
7541
7542  /* The nonlocal receiver pattern assumes that the gp is valid for
7543     the nested function.  Reasonable because it's almost always set
7544     correctly already.  For the cases where that's wrong, make sure
7545     the nested function loads its gp on entry.  */
7546  if (crtl->has_nonlocal_goto)
7547    return 1;
7548
7549  /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7550     Even if we are a static function, we still need to do this in case
7551     our address is taken and passed to something like qsort.  */
7552
7553  push_topmost_sequence ();
7554  insn = get_insns ();
7555  pop_topmost_sequence ();
7556
7557  for (; insn; insn = NEXT_INSN (insn))
7558    if (NONDEBUG_INSN_P (insn)
7559	&& GET_CODE (PATTERN (insn)) != USE
7560	&& GET_CODE (PATTERN (insn)) != CLOBBER
7561	&& get_attr_usegp (insn))
7562      return 1;
7563
7564  return 0;
7565}
7566
7567/* Helper function for alpha_store_data_bypass_p, handle just a single SET
7568   IN_SET.  */
7569
7570static bool
7571alpha_store_data_bypass_p_1 (rtx_insn *out_insn, rtx in_set)
7572{
7573  if (!MEM_P (SET_DEST (in_set)))
7574    return false;
7575
7576  rtx out_set = single_set (out_insn);
7577  if (out_set)
7578    return !reg_mentioned_p (SET_DEST (out_set), SET_DEST (in_set));
7579
7580  rtx out_pat = PATTERN (out_insn);
7581  if (GET_CODE (out_pat) != PARALLEL)
7582    return false;
7583
7584  for (int i = 0; i < XVECLEN (out_pat, 0); i++)
7585    {
7586      rtx out_exp = XVECEXP (out_pat, 0, i);
7587
7588      if (GET_CODE (out_exp) == CLOBBER || GET_CODE (out_exp) == USE
7589	  || GET_CODE (out_exp) == TRAP_IF)
7590	continue;
7591
7592      gcc_assert (GET_CODE (out_exp) == SET);
7593
7594      if (reg_mentioned_p (SET_DEST (out_exp), SET_DEST (in_set)))
7595	return false;
7596    }
7597
7598  return true;
7599}
7600
7601/* True if the dependency between OUT_INSN and IN_INSN is on the store
7602   data not the address operand(s) of the store.  IN_INSN and OUT_INSN
7603   must be either a single_set or a PARALLEL with SETs inside.
7604
7605   This alpha-specific version of store_data_bypass_p ignores TRAP_IF
7606   that would result in assertion failure (and internal compiler error)
7607   in the generic store_data_bypass_p function.  */
7608
7609int
7610alpha_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
7611{
7612  rtx in_set = single_set (in_insn);
7613  if (in_set)
7614    return alpha_store_data_bypass_p_1 (out_insn, in_set);
7615
7616  rtx in_pat = PATTERN (in_insn);
7617  if (GET_CODE (in_pat) != PARALLEL)
7618    return false;
7619
7620  for (int i = 0; i < XVECLEN (in_pat, 0); i++)
7621    {
7622      rtx in_exp = XVECEXP (in_pat, 0, i);
7623
7624      if (GET_CODE (in_exp) == CLOBBER || GET_CODE (in_exp) == USE
7625	  || GET_CODE (in_exp) == TRAP_IF)
7626	continue;
7627
7628      gcc_assert (GET_CODE (in_exp) == SET);
7629
7630      if (!alpha_store_data_bypass_p_1 (out_insn, in_exp))
7631	return false;
7632    }
7633
7634  return true;
7635}
7636
7637/* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7638   sequences.  */
7639
7640static rtx_insn *
7641set_frame_related_p (void)
7642{
7643  rtx_insn *seq = get_insns ();
7644  rtx_insn *insn;
7645
7646  end_sequence ();
7647
7648  if (!seq)
7649    return NULL;
7650
7651  if (INSN_P (seq))
7652    {
7653      insn = seq;
7654      while (insn != NULL_RTX)
7655	{
7656	  RTX_FRAME_RELATED_P (insn) = 1;
7657	  insn = NEXT_INSN (insn);
7658	}
7659      seq = emit_insn (seq);
7660    }
7661  else
7662    {
7663      seq = emit_insn (seq);
7664      RTX_FRAME_RELATED_P (seq) = 1;
7665    }
7666  return seq;
7667}
7668
7669#define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
7670
7671/* Generates a store with the proper unwind info attached.  VALUE is
7672   stored at BASE_REG+BASE_OFS.  If FRAME_BIAS is nonzero, then BASE_REG
7673   contains SP+FRAME_BIAS, and that is the unwind info that should be
7674   generated.  If FRAME_REG != VALUE, then VALUE is being stored on
7675   behalf of FRAME_REG, and FRAME_REG should be present in the unwind.  */
7676
7677static void
7678emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7679		    HOST_WIDE_INT base_ofs, rtx frame_reg)
7680{
7681  rtx addr, mem;
7682  rtx_insn *insn;
7683
7684  addr = plus_constant (Pmode, base_reg, base_ofs);
7685  mem = gen_frame_mem (DImode, addr);
7686
7687  insn = emit_move_insn (mem, value);
7688  RTX_FRAME_RELATED_P (insn) = 1;
7689
7690  if (frame_bias || value != frame_reg)
7691    {
7692      if (frame_bias)
7693	{
7694	  addr = plus_constant (Pmode, stack_pointer_rtx,
7695			        frame_bias + base_ofs);
7696	  mem = gen_rtx_MEM (DImode, addr);
7697	}
7698
7699      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7700		    gen_rtx_SET (mem, frame_reg));
7701    }
7702}
7703
7704static void
7705emit_frame_store (unsigned int regno, rtx base_reg,
7706		  HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7707{
7708  rtx reg = gen_rtx_REG (DImode, regno);
7709  emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7710}
7711
7712/* Write function prologue.  */
7713
7714/* On vms we have two kinds of functions:
7715
7716   - stack frame (PROC_STACK)
7717	these are 'normal' functions with local vars and which are
7718	calling other functions
7719   - register frame (PROC_REGISTER)
7720	keeps all data in registers, needs no stack
7721
7722   We must pass this to the assembler so it can generate the
7723   proper pdsc (procedure descriptor)
7724   This is done with the '.pdesc' command.
7725
7726   On not-vms, we don't really differentiate between the two, as we can
7727   simply allocate stack without saving registers.  */
7728
7729void
7730alpha_expand_prologue (void)
7731{
7732  /* Registers to save.  */
7733  unsigned HOST_WIDE_INT sa_mask = cfun->machine->sa_mask;
7734  /* Stack space needed for pushing registers clobbered by us.  */
7735  HOST_WIDE_INT sa_size = cfun->machine->sa_size;
7736  /* Complete stack size needed.  */
7737  HOST_WIDE_INT frame_size = cfun->machine->frame_size;
7738  /* Probed stack size; it additionally includes the size of
7739     the "reserve region" if any.  */
7740  HOST_WIDE_INT probed_size, sa_bias;
7741  /* Offset from base reg to register save area.  */
7742  HOST_WIDE_INT reg_offset;
7743  rtx sa_reg;
7744
7745  if (flag_stack_usage_info)
7746    current_function_static_stack_size = frame_size;
7747
7748  if (TARGET_ABI_OPEN_VMS)
7749    reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7750  else
7751    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7752
7753  /* Emit an insn to reload GP, if needed.  */
7754  if (TARGET_ABI_OSF)
7755    {
7756      alpha_function_needs_gp = alpha_does_function_need_gp ();
7757      if (alpha_function_needs_gp)
7758	emit_insn (gen_prologue_ldgp ());
7759    }
7760
7761  /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7762     the call to mcount ourselves, rather than having the linker do it
7763     magically in response to -pg.  Since _mcount has special linkage,
7764     don't represent the call as a call.  */
7765  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7766    emit_insn (gen_prologue_mcount ());
7767
7768  /* Adjust the stack by the frame size.  If the frame size is > 4096
7769     bytes, we need to be sure we probe somewhere in the first and last
7770     4096 bytes (we can probably get away without the latter test) and
7771     every 8192 bytes in between.  If the frame size is > 32768, we
7772     do this in a loop.  Otherwise, we generate the explicit probe
7773     instructions.
7774
7775     Note that we are only allowed to adjust sp once in the prologue.  */
7776
7777  probed_size = frame_size;
7778  if (flag_stack_check || flag_stack_clash_protection)
7779    probed_size += get_stack_check_protect ();
7780
7781  if (probed_size <= 32768)
7782    {
7783      if (probed_size > 4096)
7784	{
7785	  int probed;
7786
7787	  for (probed = 4096; probed < probed_size; probed += 8192)
7788	    emit_insn (gen_stack_probe_internal (GEN_INT (-probed)));
7789
7790	  /* We only have to do this probe if we aren't saving registers or
7791	     if we are probing beyond the frame because of -fstack-check.  */
7792	  if ((sa_size == 0 && probed_size > probed - 4096)
7793	      || flag_stack_check || flag_stack_clash_protection)
7794	    emit_insn (gen_stack_probe_internal (GEN_INT (-probed_size)));
7795	}
7796
7797      if (frame_size != 0)
7798	FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7799				    GEN_INT (-frame_size))));
7800    }
7801  else
7802    {
7803      /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7804	 number of 8192 byte blocks to probe.  We then probe each block
7805	 in the loop and then set SP to the proper location.  If the
7806	 amount remaining is > 4096, we have to do one more probe if we
7807	 are not saving any registers or if we are probing beyond the
7808	 frame because of -fstack-check.  */
7809
7810      HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7811      HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7812      rtx ptr = gen_rtx_REG (DImode, 22);
7813      rtx count = gen_rtx_REG (DImode, 23);
7814      rtx seq;
7815
7816      emit_move_insn (count, GEN_INT (blocks));
7817      emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7818
7819      /* Because of the difficulty in emitting a new basic block this
7820	 late in the compilation, generate the loop as a single insn.  */
7821      emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7822
7823      if ((leftover > 4096 && sa_size == 0)
7824	  || flag_stack_check || flag_stack_clash_protection)
7825	{
7826	  rtx last = gen_rtx_MEM (DImode,
7827				  plus_constant (Pmode, ptr, -leftover));
7828	  MEM_VOLATILE_P (last) = 1;
7829	  emit_move_insn (last, const0_rtx);
7830	}
7831
7832      if (flag_stack_check || flag_stack_clash_protection)
7833	{
7834	  /* If -fstack-check is specified we have to load the entire
7835	     constant into a register and subtract from the sp in one go,
7836	     because the probed stack size is not equal to the frame size.  */
7837	  HOST_WIDE_INT lo, hi;
7838	  lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7839	  hi = frame_size - lo;
7840
7841	  emit_move_insn (ptr, GEN_INT (hi));
7842	  emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7843	  seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7844				       ptr));
7845	}
7846      else
7847	{
7848	  seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7849				       GEN_INT (-leftover)));
7850	}
7851
7852      /* This alternative is special, because the DWARF code cannot
7853         possibly intuit through the loop above.  So we invent this
7854         note it looks at instead.  */
7855      RTX_FRAME_RELATED_P (seq) = 1;
7856      add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7857		    gen_rtx_SET (stack_pointer_rtx,
7858				 plus_constant (Pmode, stack_pointer_rtx,
7859						-frame_size)));
7860    }
7861
7862  /* Cope with very large offsets to the register save area.  */
7863  sa_bias = 0;
7864  sa_reg = stack_pointer_rtx;
7865  if (reg_offset + sa_size > 0x8000)
7866    {
7867      int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7868      rtx sa_bias_rtx;
7869
7870      if (low + sa_size <= 0x8000)
7871	sa_bias = reg_offset - low, reg_offset = low;
7872      else
7873	sa_bias = reg_offset, reg_offset = 0;
7874
7875      sa_reg = gen_rtx_REG (DImode, 24);
7876      sa_bias_rtx = GEN_INT (sa_bias);
7877
7878      if (add_operand (sa_bias_rtx, DImode))
7879	emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7880      else
7881	{
7882	  emit_move_insn (sa_reg, sa_bias_rtx);
7883	  emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7884	}
7885    }
7886
7887  /* Save regs in stack order.  Beginning with VMS PV.  */
7888  if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7889    emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7890
7891  /* Save register RA next, followed by any other registers
7892     that need to be saved.  */
7893  for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi(sa_mask))
7894    {
7895      emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7896      reg_offset += 8;
7897      sa_mask &= ~(HOST_WIDE_INT_1U << i);
7898    }
7899
7900  if (TARGET_ABI_OPEN_VMS)
7901    {
7902      /* Register frame procedures save the fp.  */
7903      if (alpha_procedure_type == PT_REGISTER)
7904	{
7905	  rtx_insn *insn =
7906	    emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7907			    hard_frame_pointer_rtx);
7908	  add_reg_note (insn, REG_CFA_REGISTER, NULL);
7909	  RTX_FRAME_RELATED_P (insn) = 1;
7910	}
7911
7912      if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7913	emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7914				    gen_rtx_REG (DImode, REG_PV)));
7915
7916      if (alpha_procedure_type != PT_NULL
7917	  && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7918	FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7919
7920      /* If we have to allocate space for outgoing args, do it now.  */
7921      if (crtl->outgoing_args_size != 0)
7922	{
7923	  rtx_insn *seq
7924	    = emit_move_insn (stack_pointer_rtx,
7925			      plus_constant
7926			      (Pmode, hard_frame_pointer_rtx,
7927			       - (ALPHA_ROUND
7928				  (crtl->outgoing_args_size))));
7929
7930	  /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7931	     if ! frame_pointer_needed. Setting the bit will change the CFA
7932	     computation rule to use sp again, which would be wrong if we had
7933	     frame_pointer_needed, as this means sp might move unpredictably
7934	     later on.
7935
7936	     Also, note that
7937	       frame_pointer_needed
7938	       => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7939	     and
7940	       crtl->outgoing_args_size != 0
7941	       => alpha_procedure_type != PT_NULL,
7942
7943	     so when we are not setting the bit here, we are guaranteed to
7944	     have emitted an FRP frame pointer update just before.  */
7945	  RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7946	}
7947    }
7948  else
7949    {
7950      /* If we need a frame pointer, set it from the stack pointer.  */
7951      if (frame_pointer_needed)
7952	{
7953	  if (TARGET_CAN_FAULT_IN_PROLOGUE)
7954	    FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7955	  else
7956	    /* This must always be the last instruction in the
7957	       prologue, thus we emit a special move + clobber.  */
7958	      FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7959				           stack_pointer_rtx, sa_reg)));
7960	}
7961    }
7962
7963  /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7964     the prologue, for exception handling reasons, we cannot do this for
7965     any insn that might fault.  We could prevent this for mems with a
7966     (clobber:BLK (scratch)), but this doesn't work for fp insns.  So we
7967     have to prevent all such scheduling with a blockage.
7968
7969     Linux, on the other hand, never bothered to implement OSF/1's
7970     exception handling, and so doesn't care about such things.  Anyone
7971     planning to use dwarf2 frame-unwind info can also omit the blockage.  */
7972
7973  if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7974    emit_insn (gen_blockage ());
7975}
7976
7977/* Count the number of .file directives, so that .loc is up to date.  */
7978int num_source_filenames = 0;
7979
7980/* Output the textual info surrounding the prologue.  */
7981
7982void
7983alpha_start_function (FILE *file, const char *fnname,
7984		      tree decl ATTRIBUTE_UNUSED)
7985{
7986  unsigned long imask, fmask;
7987  /* Complete stack size needed.  */
7988  HOST_WIDE_INT frame_size = cfun->machine->frame_size;
7989  /* The maximum debuggable frame size.  */
7990  const HOST_WIDE_INT max_frame_size = HOST_WIDE_INT_1 << 31;
7991  /* Offset from base reg to register save area.  */
7992  HOST_WIDE_INT reg_offset;
7993  char *entry_label = (char *) alloca (strlen (fnname) + 6);
7994  char *tramp_label = (char *) alloca (strlen (fnname) + 6);
7995  int i;
7996
7997#if TARGET_ABI_OPEN_VMS
7998  vms_start_function (fnname);
7999#endif
8000
8001  alpha_fnname = fnname;
8002
8003  if (TARGET_ABI_OPEN_VMS)
8004    reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8005  else
8006    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8007
8008  imask = cfun->machine->sa_mask & 0xffffffffu;
8009  fmask = cfun->machine->sa_mask >> 32;
8010
8011  /* Issue function start and label.  */
8012  if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
8013    {
8014      fputs ("\t.ent ", file);
8015      assemble_name (file, fnname);
8016      putc ('\n', file);
8017
8018      /* If the function needs GP, we'll write the "..ng" label there.
8019	 Otherwise, do it here.  */
8020      if (TARGET_ABI_OSF
8021          && ! alpha_function_needs_gp
8022	  && ! cfun->is_thunk)
8023	{
8024	  putc ('$', file);
8025	  assemble_name (file, fnname);
8026	  fputs ("..ng:\n", file);
8027	}
8028    }
8029  /* Nested functions on VMS that are potentially called via trampoline
8030     get a special transfer entry point that loads the called functions
8031     procedure descriptor and static chain.  */
8032   if (TARGET_ABI_OPEN_VMS
8033       && !TREE_PUBLIC (decl)
8034       && DECL_CONTEXT (decl)
8035       && !TYPE_P (DECL_CONTEXT (decl))
8036       && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
8037     {
8038	strcpy (tramp_label, fnname);
8039	strcat (tramp_label, "..tr");
8040	ASM_OUTPUT_LABEL (file, tramp_label);
8041	fprintf (file, "\tldq $1,24($27)\n");
8042	fprintf (file, "\tldq $27,16($27)\n");
8043     }
8044
8045  strcpy (entry_label, fnname);
8046  if (TARGET_ABI_OPEN_VMS)
8047    strcat (entry_label, "..en");
8048
8049  ASM_OUTPUT_LABEL (file, entry_label);
8050  inside_function = TRUE;
8051
8052  if (TARGET_ABI_OPEN_VMS)
8053    fprintf (file, "\t.base $%d\n", vms_base_regno);
8054
8055  if (TARGET_ABI_OSF
8056      && TARGET_IEEE_CONFORMANT
8057      && !flag_inhibit_size_directive)
8058    {
8059      /* Set flags in procedure descriptor to request IEEE-conformant
8060	 math-library routines.  The value we set it to is PDSC_EXC_IEEE
8061	 (/usr/include/pdsc.h).  */
8062      fputs ("\t.eflag 48\n", file);
8063    }
8064
8065  /* Set up offsets to alpha virtual arg/local debugging pointer.  */
8066  alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
8067  alpha_arg_offset = -frame_size + 48;
8068
8069  /* Describe our frame.  If the frame size is larger than an integer,
8070     print it as zero to avoid an assembler error.  We won't be
8071     properly describing such a frame, but that's the best we can do.  */
8072  if (TARGET_ABI_OPEN_VMS)
8073    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
8074	     HOST_WIDE_INT_PRINT_DEC "\n",
8075	     vms_unwind_regno,
8076	     frame_size >= max_frame_size ? 0 : frame_size,
8077	     reg_offset);
8078  else if (!flag_inhibit_size_directive)
8079    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
8080	     (frame_pointer_needed
8081	      ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
8082	     frame_size >= max_frame_size ? 0 : frame_size,
8083	     crtl->args.pretend_args_size);
8084
8085  /* Describe which registers were spilled.  */
8086  if (TARGET_ABI_OPEN_VMS)
8087    {
8088      if (imask)
8089        /* ??? Does VMS care if mask contains ra?  The old code didn't
8090           set it, so I don't here.  */
8091	fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
8092      if (fmask)
8093	fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
8094      if (alpha_procedure_type == PT_REGISTER)
8095	fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
8096    }
8097  else if (!flag_inhibit_size_directive)
8098    {
8099      if (imask)
8100	{
8101	  fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8102		   frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8103
8104	  for (i = 0; i < 32; ++i)
8105	    if (imask & (1UL << i))
8106	      reg_offset += 8;
8107	}
8108
8109      if (fmask)
8110	fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8111		 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8112    }
8113
8114#if TARGET_ABI_OPEN_VMS
8115  /* If a user condition handler has been installed at some point, emit
8116     the procedure descriptor bits to point the Condition Handling Facility
8117     at the indirection wrapper, and state the fp offset at which the user
8118     handler may be found.  */
8119  if (cfun->machine->uses_condition_handler)
8120    {
8121      fprintf (file, "\t.handler __gcc_shell_handler\n");
8122      fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8123    }
8124
8125#ifdef TARGET_VMS_CRASH_DEBUG
8126  /* Support of minimal traceback info.  */
8127  switch_to_section (readonly_data_section);
8128  fprintf (file, "\t.align 3\n");
8129  assemble_name (file, fnname); fputs ("..na:\n", file);
8130  fputs ("\t.ascii \"", file);
8131  assemble_name (file, fnname);
8132  fputs ("\\0\"\n", file);
8133  switch_to_section (text_section);
8134#endif
8135#endif /* TARGET_ABI_OPEN_VMS */
8136}
8137
8138/* Emit the .prologue note at the scheduled end of the prologue.  */
8139
8140static void
8141alpha_output_function_end_prologue (FILE *file)
8142{
8143  if (TARGET_ABI_OPEN_VMS)
8144    fputs ("\t.prologue\n", file);
8145  else if (!flag_inhibit_size_directive)
8146    fprintf (file, "\t.prologue %d\n",
8147	     alpha_function_needs_gp || cfun->is_thunk);
8148}
8149
8150/* Write function epilogue.  */
8151
8152void
8153alpha_expand_epilogue (void)
8154{
8155  /* Registers to save.  */
8156  unsigned HOST_WIDE_INT sa_mask = cfun->machine->sa_mask;
8157  /* Stack space needed for pushing registers clobbered by us.  */
8158  HOST_WIDE_INT sa_size = cfun->machine->sa_size;
8159  /* Complete stack size needed.  */
8160  HOST_WIDE_INT frame_size = cfun->machine->frame_size;
8161  /* Offset from base reg to register save area.  */
8162  HOST_WIDE_INT reg_offset;
8163  int fp_is_frame_pointer, fp_offset;
8164  rtx sa_reg, sa_reg_exp = NULL;
8165  rtx sp_adj1, sp_adj2, mem, reg, insn;
8166  rtx eh_ofs;
8167  rtx cfa_restores = NULL_RTX;
8168
8169  if (TARGET_ABI_OPEN_VMS)
8170    {
8171       if (alpha_procedure_type == PT_STACK)
8172          reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8173       else
8174          reg_offset = 0;
8175    }
8176  else
8177    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8178
8179  fp_is_frame_pointer
8180    = (TARGET_ABI_OPEN_VMS
8181       ? alpha_procedure_type == PT_STACK
8182       : frame_pointer_needed);
8183  fp_offset = 0;
8184  sa_reg = stack_pointer_rtx;
8185
8186  if (crtl->calls_eh_return)
8187    eh_ofs = EH_RETURN_STACKADJ_RTX;
8188  else
8189    eh_ofs = NULL_RTX;
8190
8191  if (sa_size)
8192    {
8193      /* If we have a frame pointer, restore SP from it.  */
8194      if (TARGET_ABI_OPEN_VMS
8195	  ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8196	  : frame_pointer_needed)
8197	emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8198
8199      /* Cope with very large offsets to the register save area.  */
8200      if (reg_offset + sa_size > 0x8000)
8201	{
8202	  int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8203	  HOST_WIDE_INT bias;
8204
8205	  if (low + sa_size <= 0x8000)
8206	    bias = reg_offset - low, reg_offset = low;
8207	  else
8208	    bias = reg_offset, reg_offset = 0;
8209
8210	  sa_reg = gen_rtx_REG (DImode, 22);
8211	  sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8212
8213	  emit_move_insn (sa_reg, sa_reg_exp);
8214	}
8215
8216      /* Restore registers in order, excepting a true frame pointer.  */
8217      for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi(sa_mask))
8218	{
8219	  if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8220	    fp_offset = reg_offset;
8221	  else
8222	    {
8223	      mem = gen_frame_mem (DImode,
8224				   plus_constant (Pmode, sa_reg,
8225						  reg_offset));
8226	      reg = gen_rtx_REG (DImode, i);
8227	      emit_move_insn (reg, mem);
8228	      cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8229					     cfa_restores);
8230	    }
8231	  reg_offset += 8;
8232	  sa_mask &= ~(HOST_WIDE_INT_1U << i);
8233	}
8234    }
8235
8236  if (frame_size || eh_ofs)
8237    {
8238      sp_adj1 = stack_pointer_rtx;
8239
8240      if (eh_ofs)
8241	{
8242	  sp_adj1 = gen_rtx_REG (DImode, 23);
8243	  emit_move_insn (sp_adj1,
8244			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8245	}
8246
8247      /* If the stack size is large, begin computation into a temporary
8248	 register so as not to interfere with a potential fp restore,
8249	 which must be consecutive with an SP restore.  */
8250      if (frame_size < 32768 && !cfun->calls_alloca)
8251	sp_adj2 = GEN_INT (frame_size);
8252      else if (frame_size < 0x40007fffL)
8253	{
8254	  int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8255
8256	  sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8257	  if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8258	    sp_adj1 = sa_reg;
8259	  else
8260	    {
8261	      sp_adj1 = gen_rtx_REG (DImode, 23);
8262	      emit_move_insn (sp_adj1, sp_adj2);
8263	    }
8264	  sp_adj2 = GEN_INT (low);
8265	}
8266      else
8267	{
8268	  rtx tmp = gen_rtx_REG (DImode, 23);
8269	  sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8270	  if (!sp_adj2)
8271	    {
8272	      /* We can't drop new things to memory this late, afaik,
8273		 so build it up by pieces.  */
8274	      sp_adj2 = alpha_emit_set_long_const (tmp, frame_size);
8275	      gcc_assert (sp_adj2);
8276	    }
8277	}
8278
8279      /* From now on, things must be in order.  So emit blockages.  */
8280
8281      /* Restore the frame pointer.  */
8282      if (fp_is_frame_pointer)
8283	{
8284	  emit_insn (gen_blockage ());
8285	  mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8286						      fp_offset));
8287	  emit_move_insn (hard_frame_pointer_rtx, mem);
8288	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8289					 hard_frame_pointer_rtx, cfa_restores);
8290	}
8291      else if (TARGET_ABI_OPEN_VMS)
8292	{
8293	  emit_insn (gen_blockage ());
8294	  emit_move_insn (hard_frame_pointer_rtx,
8295			  gen_rtx_REG (DImode, vms_save_fp_regno));
8296	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8297					 hard_frame_pointer_rtx, cfa_restores);
8298	}
8299
8300      /* Restore the stack pointer.  */
8301      emit_insn (gen_blockage ());
8302      if (sp_adj2 == const0_rtx)
8303	insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8304      else
8305	insn = emit_move_insn (stack_pointer_rtx,
8306			       gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8307      REG_NOTES (insn) = cfa_restores;
8308      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8309      RTX_FRAME_RELATED_P (insn) = 1;
8310    }
8311  else
8312    {
8313      gcc_assert (cfa_restores == NULL);
8314
8315      if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8316        {
8317          emit_insn (gen_blockage ());
8318          insn = emit_move_insn (hard_frame_pointer_rtx,
8319				 gen_rtx_REG (DImode, vms_save_fp_regno));
8320	  add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8321	  RTX_FRAME_RELATED_P (insn) = 1;
8322        }
8323    }
8324}
8325
8326/* Output the rest of the textual info surrounding the epilogue.  */
8327
8328void
8329alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8330{
8331  rtx_insn *insn;
8332
8333  /* We output a nop after noreturn calls at the very end of the function to
8334     ensure that the return address always remains in the caller's code range,
8335     as not doing so might confuse unwinding engines.  */
8336  insn = get_last_insn ();
8337  if (!INSN_P (insn))
8338    insn = prev_active_insn (insn);
8339  if (insn && CALL_P (insn))
8340    output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8341
8342#if TARGET_ABI_OPEN_VMS
8343  /* Write the linkage entries.  */
8344  alpha_write_linkage (file, fnname);
8345#endif
8346
8347  /* End the function.  */
8348  if (TARGET_ABI_OPEN_VMS
8349      || !flag_inhibit_size_directive)
8350    {
8351      fputs ("\t.end ", file);
8352      assemble_name (file, fnname);
8353      putc ('\n', file);
8354    }
8355  inside_function = FALSE;
8356}
8357
8358#if TARGET_ABI_OSF
8359/* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8360
8361   In order to avoid the hordes of differences between generated code
8362   with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8363   lots of code loading up large constants, generate rtl and emit it
8364   instead of going straight to text.
8365
8366   Not sure why this idea hasn't been explored before...  */
8367
8368static void
8369alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8370			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8371			   tree function)
8372{
8373  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8374  HOST_WIDE_INT hi, lo;
8375  rtx this_rtx, funexp;
8376  rtx_insn *insn;
8377
8378  /* We always require a valid GP.  */
8379  emit_insn (gen_prologue_ldgp ());
8380  emit_note (NOTE_INSN_PROLOGUE_END);
8381
8382  /* Find the "this" pointer.  If the function returns a structure,
8383     the structure return pointer is in $16.  */
8384  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8385    this_rtx = gen_rtx_REG (Pmode, 17);
8386  else
8387    this_rtx = gen_rtx_REG (Pmode, 16);
8388
8389  /* Add DELTA.  When possible we use ldah+lda.  Otherwise load the
8390     entire constant for the add.  */
8391  lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8392  hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8393  if (hi + lo == delta)
8394    {
8395      if (hi)
8396	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8397      if (lo)
8398	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8399    }
8400  else
8401    {
8402      rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta);
8403      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8404    }
8405
8406  /* Add a delta stored in the vtable at VCALL_OFFSET.  */
8407  if (vcall_offset)
8408    {
8409      rtx tmp, tmp2;
8410
8411      tmp = gen_rtx_REG (Pmode, 0);
8412      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8413
8414      lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8415      hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8416      if (hi + lo == vcall_offset)
8417	{
8418	  if (hi)
8419	    emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8420	}
8421      else
8422	{
8423	  tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8424					    vcall_offset);
8425          emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8426	  lo = 0;
8427	}
8428      if (lo)
8429	tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8430      else
8431	tmp2 = tmp;
8432      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8433
8434      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8435    }
8436
8437  /* Generate a tail call to the target function.  */
8438  if (! TREE_USED (function))
8439    {
8440      assemble_external (function);
8441      TREE_USED (function) = 1;
8442    }
8443  funexp = XEXP (DECL_RTL (function), 0);
8444  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8445  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8446  SIBLING_CALL_P (insn) = 1;
8447
8448  /* Run just enough of rest_of_compilation to get the insns emitted.
8449     There's not really enough bulk here to make other passes such as
8450     instruction scheduling worth while.  */
8451  insn = get_insns ();
8452  shorten_branches (insn);
8453  assemble_start_function (thunk_fndecl, fnname);
8454  final_start_function (insn, file, 1);
8455  final (insn, file, 1);
8456  final_end_function ();
8457  assemble_end_function (thunk_fndecl, fnname);
8458}
8459#endif /* TARGET_ABI_OSF */
8460
8461/* Debugging support.  */
8462
8463#include "gstab.h"
8464
8465/* Name of the file containing the current function.  */
8466
8467static const char *current_function_file = "";
8468
8469/* Offsets to alpha virtual arg/local debugging pointers.  */
8470
8471long alpha_arg_offset;
8472long alpha_auto_offset;
8473
8474/* Emit a new filename to a stream.  */
8475
8476void
8477alpha_output_filename (FILE *stream, const char *name)
8478{
8479  static int first_time = TRUE;
8480
8481  if (first_time)
8482    {
8483      first_time = FALSE;
8484      ++num_source_filenames;
8485      current_function_file = name;
8486      fprintf (stream, "\t.file\t%d ", num_source_filenames);
8487      output_quoted_string (stream, name);
8488      fprintf (stream, "\n");
8489    }
8490
8491  else if (name != current_function_file
8492	   && strcmp (name, current_function_file) != 0)
8493    {
8494      ++num_source_filenames;
8495      current_function_file = name;
8496      fprintf (stream, "\t.file\t%d ", num_source_filenames);
8497
8498      output_quoted_string (stream, name);
8499      fprintf (stream, "\n");
8500    }
8501}
8502
8503/* Structure to show the current status of registers and memory.  */
8504
8505struct shadow_summary
8506{
8507  struct {
8508    unsigned int i     : 31;	/* Mask of int regs */
8509    unsigned int fp    : 31;	/* Mask of fp regs */
8510    unsigned int mem   :  1;	/* mem == imem | fpmem */
8511  } used, defd;
8512};
8513
8514/* Summary the effects of expression X on the machine.  Update SUM, a pointer
8515   to the summary structure.  SET is nonzero if the insn is setting the
8516   object, otherwise zero.  */
8517
8518static void
8519summarize_insn (rtx x, struct shadow_summary *sum, int set)
8520{
8521  const char *format_ptr;
8522  int i, j;
8523
8524  if (x == 0)
8525    return;
8526
8527  switch (GET_CODE (x))
8528    {
8529      /* ??? Note that this case would be incorrect if the Alpha had a
8530	 ZERO_EXTRACT in SET_DEST.  */
8531    case SET:
8532      summarize_insn (SET_SRC (x), sum, 0);
8533      summarize_insn (SET_DEST (x), sum, 1);
8534      break;
8535
8536    case CLOBBER:
8537      summarize_insn (XEXP (x, 0), sum, 1);
8538      break;
8539
8540    case USE:
8541      summarize_insn (XEXP (x, 0), sum, 0);
8542      break;
8543
8544    case ASM_OPERANDS:
8545      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8546	summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8547      break;
8548
8549    case PARALLEL:
8550      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8551	summarize_insn (XVECEXP (x, 0, i), sum, 0);
8552      break;
8553
8554    case SUBREG:
8555      summarize_insn (SUBREG_REG (x), sum, 0);
8556      break;
8557
8558    case REG:
8559      {
8560	int regno = REGNO (x);
8561	unsigned long mask = ((unsigned long) 1) << (regno % 32);
8562
8563	if (regno == 31 || regno == 63)
8564	  break;
8565
8566	if (set)
8567	  {
8568	    if (regno < 32)
8569	      sum->defd.i |= mask;
8570	    else
8571	      sum->defd.fp |= mask;
8572	  }
8573	else
8574	  {
8575	    if (regno < 32)
8576	      sum->used.i  |= mask;
8577	    else
8578	      sum->used.fp |= mask;
8579	  }
8580	}
8581      break;
8582
8583    case MEM:
8584      if (set)
8585	sum->defd.mem = 1;
8586      else
8587	sum->used.mem = 1;
8588
8589      /* Find the regs used in memory address computation: */
8590      summarize_insn (XEXP (x, 0), sum, 0);
8591      break;
8592
8593    case CONST_INT:   case CONST_WIDE_INT:  case CONST_DOUBLE:
8594    case SYMBOL_REF:  case LABEL_REF:       case CONST:
8595    case SCRATCH:     case ASM_INPUT:
8596      break;
8597
8598      /* Handle common unary and binary ops for efficiency.  */
8599    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
8600    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
8601    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
8602    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
8603    case NE:       case EQ:      case GE:      case GT:        case LE:
8604    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
8605      summarize_insn (XEXP (x, 0), sum, 0);
8606      summarize_insn (XEXP (x, 1), sum, 0);
8607      break;
8608
8609    case NEG:  case NOT:  case SIGN_EXTEND:  case ZERO_EXTEND:
8610    case TRUNCATE:  case FLOAT_EXTEND:  case FLOAT_TRUNCATE:  case FLOAT:
8611    case FIX:  case UNSIGNED_FLOAT:  case UNSIGNED_FIX:  case ABS:
8612    case SQRT:  case FFS:
8613      summarize_insn (XEXP (x, 0), sum, 0);
8614      break;
8615
8616    default:
8617      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8618      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8619	switch (format_ptr[i])
8620	  {
8621	  case 'e':
8622	    summarize_insn (XEXP (x, i), sum, 0);
8623	    break;
8624
8625	  case 'E':
8626	    for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8627	      summarize_insn (XVECEXP (x, i, j), sum, 0);
8628	    break;
8629
8630	  case 'i':
8631	    break;
8632
8633	  default:
8634	    gcc_unreachable ();
8635	  }
8636    }
8637}
8638
8639/* Ensure a sufficient number of `trapb' insns are in the code when
8640   the user requests code with a trap precision of functions or
8641   instructions.
8642
8643   In naive mode, when the user requests a trap-precision of
8644   "instruction", a trapb is needed after every instruction that may
8645   generate a trap.  This ensures that the code is resumption safe but
8646   it is also slow.
8647
8648   When optimizations are turned on, we delay issuing a trapb as long
8649   as possible.  In this context, a trap shadow is the sequence of
8650   instructions that starts with a (potentially) trap generating
8651   instruction and extends to the next trapb or call_pal instruction
8652   (but GCC never generates call_pal by itself).  We can delay (and
8653   therefore sometimes omit) a trapb subject to the following
8654   conditions:
8655
8656   (a) On entry to the trap shadow, if any Alpha register or memory
8657   location contains a value that is used as an operand value by some
8658   instruction in the trap shadow (live on entry), then no instruction
8659   in the trap shadow may modify the register or memory location.
8660
8661   (b) Within the trap shadow, the computation of the base register
8662   for a memory load or store instruction may not involve using the
8663   result of an instruction that might generate an UNPREDICTABLE
8664   result.
8665
8666   (c) Within the trap shadow, no register may be used more than once
8667   as a destination register.  (This is to make life easier for the
8668   trap-handler.)
8669
8670   (d) The trap shadow may not include any branch instructions.  */
8671
8672static void
8673alpha_handle_trap_shadows (void)
8674{
8675  struct shadow_summary shadow;
8676  int trap_pending, exception_nesting;
8677  rtx_insn *i, *n;
8678
8679  trap_pending = 0;
8680  exception_nesting = 0;
8681  shadow.used.i = 0;
8682  shadow.used.fp = 0;
8683  shadow.used.mem = 0;
8684  shadow.defd = shadow.used;
8685
8686  for (i = get_insns (); i ; i = NEXT_INSN (i))
8687    {
8688      if (NOTE_P (i))
8689	{
8690	  switch (NOTE_KIND (i))
8691	    {
8692	    case NOTE_INSN_EH_REGION_BEG:
8693	      exception_nesting++;
8694	      if (trap_pending)
8695		goto close_shadow;
8696	      break;
8697
8698	    case NOTE_INSN_EH_REGION_END:
8699	      exception_nesting--;
8700	      if (trap_pending)
8701		goto close_shadow;
8702	      break;
8703
8704	    case NOTE_INSN_EPILOGUE_BEG:
8705	      if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8706		goto close_shadow;
8707	      break;
8708	    }
8709	}
8710      else if (trap_pending)
8711	{
8712	  if (alpha_tp == ALPHA_TP_FUNC)
8713	    {
8714	      if (JUMP_P (i)
8715		  && GET_CODE (PATTERN (i)) == RETURN)
8716		goto close_shadow;
8717	    }
8718	  else if (alpha_tp == ALPHA_TP_INSN)
8719	    {
8720	      if (optimize > 0)
8721		{
8722		  struct shadow_summary sum;
8723
8724		  sum.used.i = 0;
8725		  sum.used.fp = 0;
8726		  sum.used.mem = 0;
8727		  sum.defd = sum.used;
8728
8729		  switch (GET_CODE (i))
8730		    {
8731		    case INSN:
8732		      /* Annoyingly, get_attr_trap will die on these.  */
8733		      if (GET_CODE (PATTERN (i)) == USE
8734			  || GET_CODE (PATTERN (i)) == CLOBBER)
8735			break;
8736
8737		      summarize_insn (PATTERN (i), &sum, 0);
8738
8739		      if ((sum.defd.i & shadow.defd.i)
8740			  || (sum.defd.fp & shadow.defd.fp))
8741			{
8742			  /* (c) would be violated */
8743			  goto close_shadow;
8744			}
8745
8746		      /* Combine shadow with summary of current insn: */
8747		      shadow.used.i   |= sum.used.i;
8748		      shadow.used.fp  |= sum.used.fp;
8749		      shadow.used.mem |= sum.used.mem;
8750		      shadow.defd.i   |= sum.defd.i;
8751		      shadow.defd.fp  |= sum.defd.fp;
8752		      shadow.defd.mem |= sum.defd.mem;
8753
8754		      if ((sum.defd.i & shadow.used.i)
8755			  || (sum.defd.fp & shadow.used.fp)
8756			  || (sum.defd.mem & shadow.used.mem))
8757			{
8758			  /* (a) would be violated (also takes care of (b))  */
8759			  gcc_assert (get_attr_trap (i) != TRAP_YES
8760				      || (!(sum.defd.i & sum.used.i)
8761					  && !(sum.defd.fp & sum.used.fp)));
8762
8763			  goto close_shadow;
8764			}
8765		      break;
8766
8767		    case BARRIER:
8768		      /* __builtin_unreachable can expand to no code at all,
8769			 leaving (barrier) RTXes in the instruction stream.  */
8770		      goto close_shadow_notrapb;
8771
8772		    case JUMP_INSN:
8773		    case CALL_INSN:
8774		    case CODE_LABEL:
8775		      goto close_shadow;
8776
8777		    case DEBUG_INSN:
8778		      break;
8779
8780		    default:
8781		      gcc_unreachable ();
8782		    }
8783		}
8784	      else
8785		{
8786		close_shadow:
8787		  n = emit_insn_before (gen_trapb (), i);
8788		  PUT_MODE (n, TImode);
8789		  PUT_MODE (i, TImode);
8790		close_shadow_notrapb:
8791		  trap_pending = 0;
8792		  shadow.used.i = 0;
8793		  shadow.used.fp = 0;
8794		  shadow.used.mem = 0;
8795		  shadow.defd = shadow.used;
8796		}
8797	    }
8798	}
8799
8800      if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8801	  && NONJUMP_INSN_P (i)
8802	  && GET_CODE (PATTERN (i)) != USE
8803	  && GET_CODE (PATTERN (i)) != CLOBBER
8804	  && get_attr_trap (i) == TRAP_YES)
8805	{
8806	  if (optimize && !trap_pending)
8807	    summarize_insn (PATTERN (i), &shadow, 0);
8808	  trap_pending = 1;
8809	}
8810    }
8811}
8812
8813/* Alpha can only issue instruction groups simultaneously if they are
8814   suitably aligned.  This is very processor-specific.  */
8815/* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8816   that are marked "fake".  These instructions do not exist on that target,
8817   but it is possible to see these insns with deranged combinations of
8818   command-line options, such as "-mtune=ev4 -mmax".  Instead of aborting,
8819   choose a result at random.  */
8820
8821enum alphaev4_pipe {
8822  EV4_STOP = 0,
8823  EV4_IB0 = 1,
8824  EV4_IB1 = 2,
8825  EV4_IBX = 4
8826};
8827
8828enum alphaev5_pipe {
8829  EV5_STOP = 0,
8830  EV5_NONE = 1,
8831  EV5_E01 = 2,
8832  EV5_E0 = 4,
8833  EV5_E1 = 8,
8834  EV5_FAM = 16,
8835  EV5_FA = 32,
8836  EV5_FM = 64
8837};
8838
8839static enum alphaev4_pipe
8840alphaev4_insn_pipe (rtx_insn *insn)
8841{
8842  if (recog_memoized (insn) < 0)
8843    return EV4_STOP;
8844  if (get_attr_length (insn) != 4)
8845    return EV4_STOP;
8846
8847  switch (get_attr_type (insn))
8848    {
8849    case TYPE_ILD:
8850    case TYPE_LDSYM:
8851    case TYPE_FLD:
8852    case TYPE_LD_L:
8853      return EV4_IBX;
8854
8855    case TYPE_IADD:
8856    case TYPE_ILOG:
8857    case TYPE_ICMOV:
8858    case TYPE_ICMP:
8859    case TYPE_FST:
8860    case TYPE_SHIFT:
8861    case TYPE_IMUL:
8862    case TYPE_FBR:
8863    case TYPE_MVI:		/* fake */
8864      return EV4_IB0;
8865
8866    case TYPE_IST:
8867    case TYPE_MISC:
8868    case TYPE_IBR:
8869    case TYPE_JSR:
8870    case TYPE_CALLPAL:
8871    case TYPE_FCPYS:
8872    case TYPE_FCMOV:
8873    case TYPE_FADD:
8874    case TYPE_FDIV:
8875    case TYPE_FMUL:
8876    case TYPE_ST_C:
8877    case TYPE_MB:
8878    case TYPE_FSQRT:		/* fake */
8879    case TYPE_FTOI:		/* fake */
8880    case TYPE_ITOF:		/* fake */
8881      return EV4_IB1;
8882
8883    default:
8884      gcc_unreachable ();
8885    }
8886}
8887
8888static enum alphaev5_pipe
8889alphaev5_insn_pipe (rtx_insn *insn)
8890{
8891  if (recog_memoized (insn) < 0)
8892    return EV5_STOP;
8893  if (get_attr_length (insn) != 4)
8894    return EV5_STOP;
8895
8896  switch (get_attr_type (insn))
8897    {
8898    case TYPE_ILD:
8899    case TYPE_FLD:
8900    case TYPE_LDSYM:
8901    case TYPE_IADD:
8902    case TYPE_ILOG:
8903    case TYPE_ICMOV:
8904    case TYPE_ICMP:
8905      return EV5_E01;
8906
8907    case TYPE_IST:
8908    case TYPE_FST:
8909    case TYPE_SHIFT:
8910    case TYPE_IMUL:
8911    case TYPE_MISC:
8912    case TYPE_MVI:
8913    case TYPE_LD_L:
8914    case TYPE_ST_C:
8915    case TYPE_MB:
8916    case TYPE_FTOI:		/* fake */
8917    case TYPE_ITOF:		/* fake */
8918      return EV5_E0;
8919
8920    case TYPE_IBR:
8921    case TYPE_JSR:
8922    case TYPE_CALLPAL:
8923      return EV5_E1;
8924
8925    case TYPE_FCPYS:
8926      return EV5_FAM;
8927
8928    case TYPE_FBR:
8929    case TYPE_FCMOV:
8930    case TYPE_FADD:
8931    case TYPE_FDIV:
8932    case TYPE_FSQRT:		/* fake */
8933      return EV5_FA;
8934
8935    case TYPE_FMUL:
8936      return EV5_FM;
8937
8938    default:
8939      gcc_unreachable ();
8940    }
8941}
8942
8943/* IN_USE is a mask of the slots currently filled within the insn group.
8944   The mask bits come from alphaev4_pipe above.  If EV4_IBX is set, then
8945   the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8946
8947   LEN is, of course, the length of the group in bytes.  */
8948
8949static rtx_insn *
8950alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen)
8951{
8952  int len, in_use;
8953
8954  len = in_use = 0;
8955
8956  if (! INSN_P (insn)
8957      || GET_CODE (PATTERN (insn)) == CLOBBER
8958      || GET_CODE (PATTERN (insn)) == USE)
8959    goto next_and_done;
8960
8961  while (1)
8962    {
8963      enum alphaev4_pipe pipe;
8964
8965      pipe = alphaev4_insn_pipe (insn);
8966      switch (pipe)
8967	{
8968	case EV4_STOP:
8969	  /* Force complex instructions to start new groups.  */
8970	  if (in_use)
8971	    goto done;
8972
8973	  /* If this is a completely unrecognized insn, it's an asm.
8974	     We don't know how long it is, so record length as -1 to
8975	     signal a needed realignment.  */
8976	  if (recog_memoized (insn) < 0)
8977	    len = -1;
8978	  else
8979	    len = get_attr_length (insn);
8980	  goto next_and_done;
8981
8982	case EV4_IBX:
8983	  if (in_use & EV4_IB0)
8984	    {
8985	      if (in_use & EV4_IB1)
8986		goto done;
8987	      in_use |= EV4_IB1;
8988	    }
8989	  else
8990	    in_use |= EV4_IB0 | EV4_IBX;
8991	  break;
8992
8993	case EV4_IB0:
8994	  if (in_use & EV4_IB0)
8995	    {
8996	      if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
8997		goto done;
8998	      in_use |= EV4_IB1;
8999	    }
9000	  in_use |= EV4_IB0;
9001	  break;
9002
9003	case EV4_IB1:
9004	  if (in_use & EV4_IB1)
9005	    goto done;
9006	  in_use |= EV4_IB1;
9007	  break;
9008
9009	default:
9010	  gcc_unreachable ();
9011	}
9012      len += 4;
9013
9014      /* Haifa doesn't do well scheduling branches.  */
9015      if (JUMP_P (insn))
9016	goto next_and_done;
9017
9018    next:
9019      insn = next_nonnote_insn (insn);
9020
9021      if (!insn || ! INSN_P (insn))
9022	goto done;
9023
9024      /* Let Haifa tell us where it thinks insn group boundaries are.  */
9025      if (GET_MODE (insn) == TImode)
9026	goto done;
9027
9028      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9029	goto next;
9030    }
9031
9032 next_and_done:
9033  insn = next_nonnote_insn (insn);
9034
9035 done:
9036  *plen = len;
9037  *pin_use = in_use;
9038  return insn;
9039}
9040
9041/* IN_USE is a mask of the slots currently filled within the insn group.
9042   The mask bits come from alphaev5_pipe above.  If EV5_E01 is set, then
9043   the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9044
9045   LEN is, of course, the length of the group in bytes.  */
9046
9047static rtx_insn *
9048alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen)
9049{
9050  int len, in_use;
9051
9052  len = in_use = 0;
9053
9054  if (! INSN_P (insn)
9055      || GET_CODE (PATTERN (insn)) == CLOBBER
9056      || GET_CODE (PATTERN (insn)) == USE)
9057    goto next_and_done;
9058
9059  while (1)
9060    {
9061      enum alphaev5_pipe pipe;
9062
9063      pipe = alphaev5_insn_pipe (insn);
9064      switch (pipe)
9065	{
9066	case EV5_STOP:
9067	  /* Force complex instructions to start new groups.  */
9068	  if (in_use)
9069	    goto done;
9070
9071	  /* If this is a completely unrecognized insn, it's an asm.
9072	     We don't know how long it is, so record length as -1 to
9073	     signal a needed realignment.  */
9074	  if (recog_memoized (insn) < 0)
9075	    len = -1;
9076	  else
9077	    len = get_attr_length (insn);
9078	  goto next_and_done;
9079
9080	/* ??? Most of the places below, we would like to assert never
9081	   happen, as it would indicate an error either in Haifa, or
9082	   in the scheduling description.  Unfortunately, Haifa never
9083	   schedules the last instruction of the BB, so we don't have
9084	   an accurate TI bit to go off.  */
9085	case EV5_E01:
9086	  if (in_use & EV5_E0)
9087	    {
9088	      if (in_use & EV5_E1)
9089		goto done;
9090	      in_use |= EV5_E1;
9091	    }
9092	  else
9093	    in_use |= EV5_E0 | EV5_E01;
9094	  break;
9095
9096	case EV5_E0:
9097	  if (in_use & EV5_E0)
9098	    {
9099	      if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9100		goto done;
9101	      in_use |= EV5_E1;
9102	    }
9103	  in_use |= EV5_E0;
9104	  break;
9105
9106	case EV5_E1:
9107	  if (in_use & EV5_E1)
9108	    goto done;
9109	  in_use |= EV5_E1;
9110	  break;
9111
9112	case EV5_FAM:
9113	  if (in_use & EV5_FA)
9114	    {
9115	      if (in_use & EV5_FM)
9116		goto done;
9117	      in_use |= EV5_FM;
9118	    }
9119	  else
9120	    in_use |= EV5_FA | EV5_FAM;
9121	  break;
9122
9123	case EV5_FA:
9124	  if (in_use & EV5_FA)
9125	    goto done;
9126	  in_use |= EV5_FA;
9127	  break;
9128
9129	case EV5_FM:
9130	  if (in_use & EV5_FM)
9131	    goto done;
9132	  in_use |= EV5_FM;
9133	  break;
9134
9135	case EV5_NONE:
9136	  break;
9137
9138	default:
9139	  gcc_unreachable ();
9140	}
9141      len += 4;
9142
9143      /* Haifa doesn't do well scheduling branches.  */
9144      /* ??? If this is predicted not-taken, slotting continues, except
9145	 that no more IBR, FBR, or JSR insns may be slotted.  */
9146      if (JUMP_P (insn))
9147	goto next_and_done;
9148
9149    next:
9150      insn = next_nonnote_insn (insn);
9151
9152      if (!insn || ! INSN_P (insn))
9153	goto done;
9154
9155      /* Let Haifa tell us where it thinks insn group boundaries are.  */
9156      if (GET_MODE (insn) == TImode)
9157	goto done;
9158
9159      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9160	goto next;
9161    }
9162
9163 next_and_done:
9164  insn = next_nonnote_insn (insn);
9165
9166 done:
9167  *plen = len;
9168  *pin_use = in_use;
9169  return insn;
9170}
9171
9172static rtx
9173alphaev4_next_nop (int *pin_use)
9174{
9175  int in_use = *pin_use;
9176  rtx nop;
9177
9178  if (!(in_use & EV4_IB0))
9179    {
9180      in_use |= EV4_IB0;
9181      nop = gen_nop ();
9182    }
9183  else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9184    {
9185      in_use |= EV4_IB1;
9186      nop = gen_nop ();
9187    }
9188  else if (TARGET_FP && !(in_use & EV4_IB1))
9189    {
9190      in_use |= EV4_IB1;
9191      nop = gen_fnop ();
9192    }
9193  else
9194    nop = gen_unop ();
9195
9196  *pin_use = in_use;
9197  return nop;
9198}
9199
9200static rtx
9201alphaev5_next_nop (int *pin_use)
9202{
9203  int in_use = *pin_use;
9204  rtx nop;
9205
9206  if (!(in_use & EV5_E1))
9207    {
9208      in_use |= EV5_E1;
9209      nop = gen_nop ();
9210    }
9211  else if (TARGET_FP && !(in_use & EV5_FA))
9212    {
9213      in_use |= EV5_FA;
9214      nop = gen_fnop ();
9215    }
9216  else if (TARGET_FP && !(in_use & EV5_FM))
9217    {
9218      in_use |= EV5_FM;
9219      nop = gen_fnop ();
9220    }
9221  else
9222    nop = gen_unop ();
9223
9224  *pin_use = in_use;
9225  return nop;
9226}
9227
9228/* The instruction group alignment main loop.  */
9229
9230static void
9231alpha_align_insns_1 (unsigned int max_align,
9232		     rtx_insn *(*next_group) (rtx_insn *, int *, int *),
9233		     rtx (*next_nop) (int *))
9234{
9235  /* ALIGN is the known alignment for the insn group.  */
9236  unsigned int align;
9237  /* OFS is the offset of the current insn in the insn group.  */
9238  int ofs;
9239  int prev_in_use, in_use, len, ldgp;
9240  rtx_insn *i, *next;
9241
9242  /* Let shorten branches care for assigning alignments to code labels.  */
9243  shorten_branches (get_insns ());
9244
9245  unsigned int option_alignment = align_functions.levels[0].get_value ();
9246  if (option_alignment < 4)
9247    align = 4;
9248  else if ((unsigned int) option_alignment < max_align)
9249    align = option_alignment;
9250  else
9251    align = max_align;
9252
9253  ofs = prev_in_use = 0;
9254  i = get_insns ();
9255  if (NOTE_P (i))
9256    i = next_nonnote_insn (i);
9257
9258  ldgp = alpha_function_needs_gp ? 8 : 0;
9259
9260  while (i)
9261    {
9262      next = (*next_group) (i, &in_use, &len);
9263
9264      /* When we see a label, resync alignment etc.  */
9265      if (LABEL_P (i))
9266	{
9267	  unsigned int new_align
9268	    = label_to_alignment (i).levels[0].get_value ();
9269
9270	  if (new_align >= align)
9271	    {
9272	      align = new_align < max_align ? new_align : max_align;
9273	      ofs = 0;
9274	    }
9275
9276	  else if (ofs & (new_align-1))
9277	    ofs = (ofs | (new_align-1)) + 1;
9278	  gcc_assert (!len);
9279	}
9280
9281      /* Handle complex instructions special.  */
9282      else if (in_use == 0)
9283	{
9284	  /* Asms will have length < 0.  This is a signal that we have
9285	     lost alignment knowledge.  Assume, however, that the asm
9286	     will not mis-align instructions.  */
9287	  if (len < 0)
9288	    {
9289	      ofs = 0;
9290	      align = 4;
9291	      len = 0;
9292	    }
9293	}
9294
9295      /* If the known alignment is smaller than the recognized insn group,
9296	 realign the output.  */
9297      else if ((int) align < len)
9298	{
9299	  unsigned int new_log_align = len > 8 ? 4 : 3;
9300	  rtx_insn *prev, *where;
9301
9302	  where = prev = prev_nonnote_insn (i);
9303	  if (!where || !LABEL_P (where))
9304	    where = i;
9305
9306	  /* Can't realign between a call and its gp reload.  */
9307	  if (! (TARGET_EXPLICIT_RELOCS
9308		 && prev && CALL_P (prev)))
9309	    {
9310	      emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9311	      align = 1 << new_log_align;
9312	      ofs = 0;
9313	    }
9314	}
9315
9316      /* We may not insert padding inside the initial ldgp sequence.  */
9317      else if (ldgp > 0)
9318	ldgp -= len;
9319
9320      /* If the group won't fit in the same INT16 as the previous,
9321	 we need to add padding to keep the group together.  Rather
9322	 than simply leaving the insn filling to the assembler, we
9323	 can make use of the knowledge of what sorts of instructions
9324	 were issued in the previous group to make sure that all of
9325	 the added nops are really free.  */
9326      else if (ofs + len > (int) align)
9327	{
9328	  int nop_count = (align - ofs) / 4;
9329	  rtx_insn *where;
9330
9331	  /* Insert nops before labels, branches, and calls to truly merge
9332	     the execution of the nops with the previous instruction group.  */
9333	  where = prev_nonnote_insn (i);
9334	  if (where)
9335	    {
9336	      if (LABEL_P (where))
9337		{
9338		  rtx_insn *where2 = prev_nonnote_insn (where);
9339		  if (where2 && JUMP_P (where2))
9340		    where = where2;
9341		}
9342	      else if (NONJUMP_INSN_P (where))
9343		where = i;
9344	    }
9345	  else
9346	    where = i;
9347
9348	  do
9349	    emit_insn_before ((*next_nop)(&prev_in_use), where);
9350	  while (--nop_count);
9351	  ofs = 0;
9352	}
9353
9354      ofs = (ofs + len) & (align - 1);
9355      prev_in_use = in_use;
9356      i = next;
9357    }
9358}
9359
9360static void
9361alpha_align_insns (void)
9362{
9363  if (alpha_tune == PROCESSOR_EV4)
9364    alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop);
9365  else if (alpha_tune == PROCESSOR_EV5)
9366    alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop);
9367  else
9368    gcc_unreachable ();
9369}
9370
9371/* Insert an unop between sibcall or noreturn function call and GP load.  */
9372
9373static void
9374alpha_pad_function_end (void)
9375{
9376  rtx_insn *insn, *next;
9377
9378  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9379    {
9380      if (!CALL_P (insn)
9381	  || !(SIBLING_CALL_P (insn)
9382	       || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9383        continue;
9384
9385      next = next_active_insn (insn);
9386      if (next)
9387	{
9388	  rtx pat = PATTERN (next);
9389
9390	  if (GET_CODE (pat) == SET
9391	      && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9392	      && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9393	    emit_insn_after (gen_unop (), insn);
9394	}
9395    }
9396}
9397
9398/* Machine dependent reorg pass.  */
9399
9400static void
9401alpha_reorg (void)
9402{
9403  /* Workaround for a linker error that triggers when an exception
9404     handler immediatelly follows a sibcall or a noreturn function.
9405
9406In the sibcall case:
9407
9408     The instruction stream from an object file:
9409
9410 1d8:   00 00 fb 6b     jmp     (t12)
9411 1dc:   00 00 ba 27     ldah    gp,0(ra)
9412 1e0:   00 00 bd 23     lda     gp,0(gp)
9413 1e4:   00 00 7d a7     ldq     t12,0(gp)
9414 1e8:   00 40 5b 6b     jsr     ra,(t12),1ec <__funcZ+0x1ec>
9415
9416     was converted in the final link pass to:
9417
9418   12003aa88:   67 fa ff c3     br      120039428 <...>
9419   12003aa8c:   00 00 fe 2f     unop
9420   12003aa90:   00 00 fe 2f     unop
9421   12003aa94:   48 83 7d a7     ldq     t12,-31928(gp)
9422   12003aa98:   00 40 5b 6b     jsr     ra,(t12),12003aa9c <__func+0x1ec>
9423
9424And in the noreturn case:
9425
9426     The instruction stream from an object file:
9427
9428  54:   00 40 5b 6b     jsr     ra,(t12),58 <__func+0x58>
9429  58:   00 00 ba 27     ldah    gp,0(ra)
9430  5c:   00 00 bd 23     lda     gp,0(gp)
9431  60:   00 00 7d a7     ldq     t12,0(gp)
9432  64:   00 40 5b 6b     jsr     ra,(t12),68 <__func+0x68>
9433
9434     was converted in the final link pass to:
9435
9436   fdb24:       a0 03 40 d3     bsr     ra,fe9a8 <_called_func+0x8>
9437   fdb28:       00 00 fe 2f     unop
9438   fdb2c:       00 00 fe 2f     unop
9439   fdb30:       30 82 7d a7     ldq     t12,-32208(gp)
9440   fdb34:       00 40 5b 6b     jsr     ra,(t12),fdb38 <__func+0x68>
9441
9442     GP load instructions were wrongly cleared by the linker relaxation
9443     pass.  This workaround prevents removal of GP loads by inserting
9444     an unop instruction between a sibcall or noreturn function call and
9445     exception handler prologue.  */
9446
9447  if (current_function_has_exception_handlers ())
9448    alpha_pad_function_end ();
9449
9450  /* CALL_PAL that implements trap insn, updates program counter to point
9451     after the insn.  In case trap is the last insn in the function,
9452     emit NOP to guarantee that PC remains inside function boundaries.
9453     This workaround is needed to get reliable backtraces.  */
9454
9455  rtx_insn *insn = prev_active_insn (get_last_insn ());
9456
9457  if (insn && NONJUMP_INSN_P (insn))
9458    {
9459      rtx pat = PATTERN (insn);
9460      if (GET_CODE (pat) == PARALLEL)
9461	{
9462	  rtx vec = XVECEXP (pat, 0, 0);
9463	  if (GET_CODE (vec) == TRAP_IF
9464	      && XEXP (vec, 0) == const1_rtx)
9465	    emit_insn_after (gen_unop (), insn);
9466	}
9467    }
9468}
9469
9470static void
9471alpha_file_start (void)
9472{
9473  default_file_start ();
9474
9475  fputs ("\t.set noreorder\n", asm_out_file);
9476  fputs ("\t.set volatile\n", asm_out_file);
9477  if (TARGET_ABI_OSF)
9478    fputs ("\t.set noat\n", asm_out_file);
9479  if (TARGET_EXPLICIT_RELOCS)
9480    fputs ("\t.set nomacro\n", asm_out_file);
9481  if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9482    {
9483      const char *arch;
9484
9485      if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9486	arch = "ev6";
9487      else if (TARGET_MAX)
9488	arch = "pca56";
9489      else if (TARGET_BWX)
9490	arch = "ev56";
9491      else if (alpha_cpu == PROCESSOR_EV5)
9492	arch = "ev5";
9493      else
9494	arch = "ev4";
9495
9496      fprintf (asm_out_file, "\t.arch %s\n", arch);
9497    }
9498}
9499
9500/* Since we don't have a .dynbss section, we should not allow global
9501   relocations in the .rodata section.  */
9502
9503static int
9504alpha_elf_reloc_rw_mask (void)
9505{
9506  return flag_pic ? 3 : 2;
9507}
9508
9509/* Return a section for X.  The only special thing we do here is to
9510   honor small data.  */
9511
9512static section *
9513alpha_elf_select_rtx_section (machine_mode mode, rtx x,
9514			      unsigned HOST_WIDE_INT align)
9515{
9516  if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9517    /* ??? Consider using mergeable sdata sections.  */
9518    return sdata_section;
9519  else
9520    return default_elf_select_rtx_section (mode, x, align);
9521}
9522
9523static unsigned int
9524alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9525{
9526  unsigned int flags = 0;
9527
9528  if (strcmp (name, ".sdata") == 0
9529      || startswith (name, ".sdata.")
9530      || startswith (name, ".gnu.linkonce.s.")
9531      || strcmp (name, ".sbss") == 0
9532      || startswith (name, ".sbss.")
9533      || startswith (name, ".gnu.linkonce.sb."))
9534    flags = SECTION_SMALL;
9535
9536  flags |= default_section_type_flags (decl, name, reloc);
9537  return flags;
9538}
9539
9540/* Structure to collect function names for final output in link section.  */
9541/* Note that items marked with GTY can't be ifdef'ed out.  */
9542
9543enum reloc_kind
9544{
9545  KIND_LINKAGE,
9546  KIND_CODEADDR
9547};
9548
9549struct GTY(()) alpha_links
9550{
9551  rtx func;
9552  rtx linkage;
9553  enum reloc_kind rkind;
9554};
9555
9556#if TARGET_ABI_OPEN_VMS
9557
9558/* Return the VMS argument type corresponding to MODE.  */
9559
9560enum avms_arg_type
9561alpha_arg_type (machine_mode mode)
9562{
9563  switch (mode)
9564    {
9565    case E_SFmode:
9566      return TARGET_FLOAT_VAX ? FF : FS;
9567    case E_DFmode:
9568      return TARGET_FLOAT_VAX ? FD : FT;
9569    default:
9570      return I64;
9571    }
9572}
9573
9574/* Return an rtx for an integer representing the VMS Argument Information
9575   register value.  */
9576
9577rtx
9578alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9579{
9580  unsigned HOST_WIDE_INT regval = cum.num_args;
9581  int i;
9582
9583  for (i = 0; i < 6; i++)
9584    regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9585
9586  return GEN_INT (regval);
9587}
9588
9589
9590/* Return a SYMBOL_REF representing the reference to the .linkage entry
9591   of function FUNC built for calls made from CFUNDECL.  LFLAG is 1 if
9592   this is the reference to the linkage pointer value, 0 if this is the
9593   reference to the function entry value.  RFLAG is 1 if this a reduced
9594   reference (code address only), 0 if this is a full reference.  */
9595
9596rtx
9597alpha_use_linkage (rtx func, bool lflag, bool rflag)
9598{
9599  struct alpha_links *al = NULL;
9600  const char *name = XSTR (func, 0);
9601
9602  if (cfun->machine->links)
9603    {
9604      /* Is this name already defined?  */
9605      alpha_links **slot = cfun->machine->links->get (name);
9606      if (slot)
9607	al = *slot;
9608    }
9609  else
9610    cfun->machine->links
9611      = hash_map<nofree_string_hash, alpha_links *>::create_ggc (64);
9612
9613  if (al == NULL)
9614    {
9615      size_t buf_len;
9616      char *linksym;
9617      tree id;
9618
9619      if (name[0] == '*')
9620	name++;
9621
9622      /* Follow transparent alias, as this is used for CRTL translations.  */
9623      id = maybe_get_identifier (name);
9624      if (id)
9625        {
9626          while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9627            id = TREE_CHAIN (id);
9628          name = IDENTIFIER_POINTER (id);
9629        }
9630
9631      buf_len = strlen (name) + 8 + 9;
9632      linksym = (char *) alloca (buf_len);
9633      snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9634
9635      al = ggc_alloc<alpha_links> ();
9636      al->func = func;
9637      al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9638
9639      cfun->machine->links->put (ggc_strdup (name), al);
9640    }
9641
9642  al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9643
9644  if (lflag)
9645    return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9646  else
9647    return al->linkage;
9648}
9649
9650static int
9651alpha_write_one_linkage (const char *name, alpha_links *link, FILE *stream)
9652{
9653  ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9654  if (link->rkind == KIND_CODEADDR)
9655    {
9656      /* External and used, request code address.  */
9657      fprintf (stream, "\t.code_address ");
9658    }
9659  else
9660    {
9661      if (!SYMBOL_REF_EXTERNAL_P (link->func)
9662          && SYMBOL_REF_LOCAL_P (link->func))
9663	{
9664	  /* Locally defined, build linkage pair.  */
9665	  fprintf (stream, "\t.quad %s..en\n", name);
9666	  fprintf (stream, "\t.quad ");
9667	}
9668      else
9669	{
9670	  /* External, request linkage pair.  */
9671	  fprintf (stream, "\t.linkage ");
9672	}
9673    }
9674  assemble_name (stream, name);
9675  fputs ("\n", stream);
9676
9677  return 0;
9678}
9679
9680static void
9681alpha_write_linkage (FILE *stream, const char *funname)
9682{
9683  fprintf (stream, "\t.link\n");
9684  fprintf (stream, "\t.align 3\n");
9685  in_section = NULL;
9686
9687#ifdef TARGET_VMS_CRASH_DEBUG
9688  fputs ("\t.name ", stream);
9689  assemble_name (stream, funname);
9690  fputs ("..na\n", stream);
9691#endif
9692
9693  ASM_OUTPUT_LABEL (stream, funname);
9694  fprintf (stream, "\t.pdesc ");
9695  assemble_name (stream, funname);
9696  fprintf (stream, "..en,%s\n",
9697	   alpha_procedure_type == PT_STACK ? "stack"
9698	   : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9699
9700  if (cfun->machine->links)
9701    {
9702      hash_map<nofree_string_hash, alpha_links *>::iterator iter
9703	= cfun->machine->links->begin ();
9704      for (; iter != cfun->machine->links->end (); ++iter)
9705	alpha_write_one_linkage ((*iter).first, (*iter).second, stream);
9706    }
9707}
9708
9709/* Switch to an arbitrary section NAME with attributes as specified
9710   by FLAGS.  ALIGN specifies any known alignment requirements for
9711   the section; 0 if the default should be used.  */
9712
9713static void
9714vms_asm_named_section (const char *name, unsigned int flags,
9715		       tree decl ATTRIBUTE_UNUSED)
9716{
9717  fputc ('\n', asm_out_file);
9718  fprintf (asm_out_file, ".section\t%s", name);
9719
9720  if (flags & SECTION_DEBUG)
9721    fprintf (asm_out_file, ",NOWRT");
9722
9723  fputc ('\n', asm_out_file);
9724}
9725
9726/* Record an element in the table of global constructors.  SYMBOL is
9727   a SYMBOL_REF of the function to be called; PRIORITY is a number
9728   between 0 and MAX_INIT_PRIORITY.
9729
9730   Differs from default_ctors_section_asm_out_constructor in that the
9731   width of the .ctors entry is always 64 bits, rather than the 32 bits
9732   used by a normal pointer.  */
9733
9734static void
9735vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9736{
9737  switch_to_section (ctors_section);
9738  assemble_align (BITS_PER_WORD);
9739  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9740}
9741
9742static void
9743vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9744{
9745  switch_to_section (dtors_section);
9746  assemble_align (BITS_PER_WORD);
9747  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9748}
9749#else
9750rtx
9751alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9752		   bool lflag ATTRIBUTE_UNUSED,
9753		   bool rflag ATTRIBUTE_UNUSED)
9754{
9755  return NULL_RTX;
9756}
9757
9758#endif /* TARGET_ABI_OPEN_VMS */
9759
9760static void
9761alpha_init_libfuncs (void)
9762{
9763  if (TARGET_ABI_OPEN_VMS)
9764    {
9765      /* Use the VMS runtime library functions for division and
9766	 remainder.  */
9767      set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9768      set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9769      set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9770      set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9771      set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9772      set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9773      set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9774      set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9775#ifdef MEM_LIBFUNCS_INIT
9776      MEM_LIBFUNCS_INIT;
9777#endif
9778    }
9779}
9780
9781/* On the Alpha, we use this to disable the floating-point registers
9782   when they don't exist.  */
9783
9784static void
9785alpha_conditional_register_usage (void)
9786{
9787  int i;
9788  if (! TARGET_FPREGS)
9789    for (i = 32; i < 63; i++)
9790      fixed_regs[i] = call_used_regs[i] = 1;
9791}
9792
9793/* Canonicalize a comparison from one we don't have to one we do have.  */
9794
9795static void
9796alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9797			       bool op0_preserve_value)
9798{
9799  if (!op0_preserve_value
9800      && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9801      && (REG_P (*op1) || *op1 == const0_rtx))
9802    {
9803      std::swap (*op0, *op1);
9804      *code = (int)swap_condition ((enum rtx_code)*code);
9805    }
9806
9807  if ((*code == LT || *code == LTU)
9808      && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9809    {
9810      *code = *code == LT ? LE : LEU;
9811      *op1 = GEN_INT (255);
9812    }
9813}
9814
9815/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
9816
9817static void
9818alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
9819{
9820  const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
9821
9822  tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
9823  tree new_fenv_var, reload_fenv, restore_fnenv;
9824  tree update_call, atomic_feraiseexcept, hold_fnclex;
9825
9826  /* Assume OSF/1 compatible interfaces.  */
9827  if (!TARGET_ABI_OSF)
9828    return;
9829
9830  /* Generate the equivalent of :
9831       unsigned long fenv_var;
9832       fenv_var = __ieee_get_fp_control ();
9833
9834       unsigned long masked_fenv;
9835       masked_fenv = fenv_var & mask;
9836
9837       __ieee_set_fp_control (masked_fenv);  */
9838
9839  fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9840  get_fpscr
9841    = build_fn_decl ("__ieee_get_fp_control",
9842		     build_function_type_list (long_unsigned_type_node, NULL));
9843  set_fpscr
9844    = build_fn_decl ("__ieee_set_fp_control",
9845		     build_function_type_list (void_type_node, NULL));
9846  mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
9847  ld_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, fenv_var,
9848		    build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
9849  masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
9850  hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
9851  *hold = build2 (COMPOUND_EXPR, void_type_node,
9852		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
9853		  hold_fnclex);
9854
9855  /* Store the value of masked_fenv to clear the exceptions:
9856     __ieee_set_fp_control (masked_fenv);  */
9857
9858  *clear = build_call_expr (set_fpscr, 1, masked_fenv);
9859
9860  /* Generate the equivalent of :
9861       unsigned long new_fenv_var;
9862       new_fenv_var = __ieee_get_fp_control ();
9863
9864       __ieee_set_fp_control (fenv_var);
9865
9866       __atomic_feraiseexcept (new_fenv_var);  */
9867
9868  new_fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9869  reload_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, new_fenv_var,
9870			build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
9871  restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
9872  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
9873  update_call
9874    = build_call_expr (atomic_feraiseexcept, 1,
9875		       fold_convert (integer_type_node, new_fenv_var));
9876  *update = build2 (COMPOUND_EXPR, void_type_node,
9877		    build2 (COMPOUND_EXPR, void_type_node,
9878			    reload_fenv, restore_fnenv), update_call);
9879}
9880
9881/* Implement TARGET_HARD_REGNO_MODE_OK.  On Alpha, the integer registers
9882   can hold any mode.  The floating-point registers can hold 64-bit
9883   integers as well, but not smaller values.  */
9884
9885static bool
9886alpha_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9887{
9888  if (IN_RANGE (regno, 32, 62))
9889    return (mode == SFmode
9890	    || mode == DFmode
9891	    || mode == DImode
9892	    || mode == SCmode
9893	    || mode == DCmode);
9894  return true;
9895}
9896
9897/* Implement TARGET_MODES_TIEABLE_P.  This asymmetric test is true when
9898   MODE1 could be put in an FP register but MODE2 could not.  */
9899
9900static bool
9901alpha_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9902{
9903  return (alpha_hard_regno_mode_ok (32, mode1)
9904	  ? alpha_hard_regno_mode_ok (32, mode2)
9905	  : true);
9906}
9907
9908/* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
9909
9910static bool
9911alpha_can_change_mode_class (machine_mode from, machine_mode to,
9912			     reg_class_t rclass)
9913{
9914  return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9915	  || !reg_classes_intersect_p (FLOAT_REGS, rclass));
9916}
9917
9918/* Initialize the GCC target structure.  */
9919#if TARGET_ABI_OPEN_VMS
9920# undef TARGET_ATTRIBUTE_TABLE
9921# define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9922# undef TARGET_CAN_ELIMINATE
9923# define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9924#endif
9925
9926#undef TARGET_IN_SMALL_DATA_P
9927#define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9928
9929#undef TARGET_ASM_ALIGNED_HI_OP
9930#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9931#undef TARGET_ASM_ALIGNED_DI_OP
9932#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9933
9934/* Default unaligned ops are provided for ELF systems.  To get unaligned
9935   data for non-ELF systems, we have to turn off auto alignment.  */
9936#if TARGET_ABI_OPEN_VMS
9937#undef TARGET_ASM_UNALIGNED_HI_OP
9938#define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9939#undef TARGET_ASM_UNALIGNED_SI_OP
9940#define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9941#undef TARGET_ASM_UNALIGNED_DI_OP
9942#define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9943#endif
9944
9945#undef  TARGET_ASM_RELOC_RW_MASK
9946#define TARGET_ASM_RELOC_RW_MASK  alpha_elf_reloc_rw_mask
9947#undef	TARGET_ASM_SELECT_RTX_SECTION
9948#define	TARGET_ASM_SELECT_RTX_SECTION  alpha_elf_select_rtx_section
9949#undef  TARGET_SECTION_TYPE_FLAGS
9950#define TARGET_SECTION_TYPE_FLAGS  alpha_elf_section_type_flags
9951
9952#undef TARGET_ASM_FUNCTION_END_PROLOGUE
9953#define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9954
9955#undef TARGET_INIT_LIBFUNCS
9956#define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9957
9958#undef TARGET_LEGITIMIZE_ADDRESS
9959#define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9960#undef TARGET_MODE_DEPENDENT_ADDRESS_P
9961#define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
9962
9963#undef TARGET_ASM_FILE_START
9964#define TARGET_ASM_FILE_START alpha_file_start
9965
9966#undef TARGET_SCHED_ADJUST_COST
9967#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
9968#undef TARGET_SCHED_ISSUE_RATE
9969#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
9970#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
9971#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
9972  alpha_multipass_dfa_lookahead
9973
9974#undef TARGET_HAVE_TLS
9975#define TARGET_HAVE_TLS HAVE_AS_TLS
9976
9977#undef  TARGET_BUILTIN_DECL
9978#define TARGET_BUILTIN_DECL  alpha_builtin_decl
9979#undef  TARGET_INIT_BUILTINS
9980#define TARGET_INIT_BUILTINS alpha_init_builtins
9981#undef  TARGET_EXPAND_BUILTIN
9982#define TARGET_EXPAND_BUILTIN alpha_expand_builtin
9983#undef  TARGET_FOLD_BUILTIN
9984#define TARGET_FOLD_BUILTIN alpha_fold_builtin
9985#undef  TARGET_GIMPLE_FOLD_BUILTIN
9986#define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin
9987
9988#undef TARGET_FUNCTION_OK_FOR_SIBCALL
9989#define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
9990#undef TARGET_CANNOT_COPY_INSN_P
9991#define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
9992#undef TARGET_LEGITIMATE_CONSTANT_P
9993#define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
9994#undef TARGET_CANNOT_FORCE_CONST_MEM
9995#define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
9996
9997#if TARGET_ABI_OSF
9998#undef TARGET_ASM_OUTPUT_MI_THUNK
9999#define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
10000#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10001#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
10002#undef TARGET_STDARG_OPTIMIZE_HOOK
10003#define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
10004#endif
10005
10006#undef TARGET_PRINT_OPERAND
10007#define TARGET_PRINT_OPERAND alpha_print_operand
10008#undef TARGET_PRINT_OPERAND_ADDRESS
10009#define TARGET_PRINT_OPERAND_ADDRESS alpha_print_operand_address
10010#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
10011#define TARGET_PRINT_OPERAND_PUNCT_VALID_P alpha_print_operand_punct_valid_p
10012
10013/* Use 16-bits anchor.  */
10014#undef TARGET_MIN_ANCHOR_OFFSET
10015#define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
10016#undef TARGET_MAX_ANCHOR_OFFSET
10017#define TARGET_MAX_ANCHOR_OFFSET 0x7fff
10018#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10019#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
10020
10021#undef TARGET_REGISTER_MOVE_COST
10022#define TARGET_REGISTER_MOVE_COST alpha_register_move_cost
10023#undef TARGET_MEMORY_MOVE_COST
10024#define TARGET_MEMORY_MOVE_COST alpha_memory_move_cost
10025#undef TARGET_RTX_COSTS
10026#define TARGET_RTX_COSTS alpha_rtx_costs
10027#undef TARGET_ADDRESS_COST
10028#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
10029
10030#undef TARGET_MACHINE_DEPENDENT_REORG
10031#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
10032
10033#undef TARGET_PROMOTE_FUNCTION_MODE
10034#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
10035#undef TARGET_PROMOTE_PROTOTYPES
10036#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
10037
10038#undef TARGET_FUNCTION_VALUE
10039#define TARGET_FUNCTION_VALUE alpha_function_value
10040#undef TARGET_LIBCALL_VALUE
10041#define TARGET_LIBCALL_VALUE alpha_libcall_value
10042#undef TARGET_FUNCTION_VALUE_REGNO_P
10043#define TARGET_FUNCTION_VALUE_REGNO_P alpha_function_value_regno_p
10044#undef TARGET_RETURN_IN_MEMORY
10045#define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10046#undef TARGET_PASS_BY_REFERENCE
10047#define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10048#undef TARGET_SETUP_INCOMING_VARARGS
10049#define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10050#undef TARGET_STRICT_ARGUMENT_NAMING
10051#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10052#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10053#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10054#undef TARGET_SPLIT_COMPLEX_ARG
10055#define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10056#undef TARGET_GIMPLIFY_VA_ARG_EXPR
10057#define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10058#undef TARGET_ARG_PARTIAL_BYTES
10059#define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10060#undef TARGET_FUNCTION_ARG
10061#define TARGET_FUNCTION_ARG alpha_function_arg
10062#undef TARGET_FUNCTION_ARG_ADVANCE
10063#define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
10064#undef TARGET_TRAMPOLINE_INIT
10065#define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
10066
10067#undef TARGET_INSTANTIATE_DECLS
10068#define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
10069
10070#undef TARGET_SECONDARY_RELOAD
10071#define TARGET_SECONDARY_RELOAD alpha_secondary_reload
10072#undef TARGET_SECONDARY_MEMORY_NEEDED
10073#define TARGET_SECONDARY_MEMORY_NEEDED alpha_secondary_memory_needed
10074#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
10075#define TARGET_SECONDARY_MEMORY_NEEDED_MODE alpha_secondary_memory_needed_mode
10076
10077#undef TARGET_SCALAR_MODE_SUPPORTED_P
10078#define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10079#undef TARGET_VECTOR_MODE_SUPPORTED_P
10080#define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10081
10082#undef TARGET_BUILD_BUILTIN_VA_LIST
10083#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10084
10085#undef TARGET_EXPAND_BUILTIN_VA_START
10086#define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
10087
10088#undef TARGET_OPTION_OVERRIDE
10089#define TARGET_OPTION_OVERRIDE alpha_option_override
10090
10091#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10092#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10093  alpha_override_options_after_change
10094
10095#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10096#undef TARGET_MANGLE_TYPE
10097#define TARGET_MANGLE_TYPE alpha_mangle_type
10098#endif
10099
10100#undef TARGET_LRA_P
10101#define TARGET_LRA_P hook_bool_void_false
10102
10103#undef TARGET_LEGITIMATE_ADDRESS_P
10104#define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
10105
10106#undef TARGET_CONDITIONAL_REGISTER_USAGE
10107#define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
10108
10109#undef TARGET_CANONICALIZE_COMPARISON
10110#define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
10111
10112#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10113#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
10114
10115#undef TARGET_HARD_REGNO_MODE_OK
10116#define TARGET_HARD_REGNO_MODE_OK alpha_hard_regno_mode_ok
10117
10118#undef TARGET_MODES_TIEABLE_P
10119#define TARGET_MODES_TIEABLE_P alpha_modes_tieable_p
10120
10121#undef TARGET_CAN_CHANGE_MODE_CLASS
10122#define TARGET_CAN_CHANGE_MODE_CLASS alpha_can_change_mode_class
10123
10124struct gcc_target targetm = TARGET_INITIALIZER;
10125
10126
10127#include "gt-alpha.h"
10128