1/* Subroutines used for code generation on the DEC Alpha.
2   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
3   2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GCC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING.  If not, write to
20the Free Software Foundation, 51 Franklin Street, Fifth Floor,
21Boston, MA 02110-1301, USA.  */
22
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "regs.h"
31#include "hard-reg-set.h"
32#include "real.h"
33#include "insn-config.h"
34#include "conditions.h"
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "recog.h"
39#include "expr.h"
40#include "optabs.h"
41#include "reload.h"
42#include "obstack.h"
43#include "except.h"
44#include "function.h"
45#include "toplev.h"
46#include "ggc.h"
47#include "integrate.h"
48#include "tm_p.h"
49#include "target.h"
50#include "target-def.h"
51#include "debug.h"
52#include "langhooks.h"
53#include <splay-tree.h>
54#include "cfglayout.h"
55#include "tree-gimple.h"
56#include "tree-flow.h"
57#include "tree-stdarg.h"
58
59/* Specify which cpu to schedule for.  */
60enum processor_type alpha_tune;
61
62/* Which cpu we're generating code for.  */
63enum processor_type alpha_cpu;
64
65static const char * const alpha_cpu_name[] =
66{
67  "ev4", "ev5", "ev6"
68};
69
70/* Specify how accurate floating-point traps need to be.  */
71
72enum alpha_trap_precision alpha_tp;
73
74/* Specify the floating-point rounding mode.  */
75
76enum alpha_fp_rounding_mode alpha_fprm;
77
78/* Specify which things cause traps.  */
79
80enum alpha_fp_trap_mode alpha_fptm;
81
82/* Save information from a "cmpxx" operation until the branch or scc is
83   emitted.  */
84
85struct alpha_compare alpha_compare;
86
87/* Nonzero if inside of a function, because the Alpha asm can't
88   handle .files inside of functions.  */
89
90static int inside_function = FALSE;
91
92/* The number of cycles of latency we should assume on memory reads.  */
93
94int alpha_memory_latency = 3;
95
96/* Whether the function needs the GP.  */
97
98static int alpha_function_needs_gp;
99
100/* The alias set for prologue/epilogue register save/restore.  */
101
102static GTY(()) int alpha_sr_alias_set;
103
104/* The assembler name of the current function.  */
105
106static const char *alpha_fnname;
107
108/* The next explicit relocation sequence number.  */
109extern GTY(()) int alpha_next_sequence_number;
110int alpha_next_sequence_number = 1;
111
112/* The literal and gpdisp sequence numbers for this insn, as printed
113   by %# and %* respectively.  */
114extern GTY(()) int alpha_this_literal_sequence_number;
115extern GTY(()) int alpha_this_gpdisp_sequence_number;
116int alpha_this_literal_sequence_number;
117int alpha_this_gpdisp_sequence_number;
118
119/* Costs of various operations on the different architectures.  */
120
121struct alpha_rtx_cost_data
122{
123  unsigned char fp_add;
124  unsigned char fp_mult;
125  unsigned char fp_div_sf;
126  unsigned char fp_div_df;
127  unsigned char int_mult_si;
128  unsigned char int_mult_di;
129  unsigned char int_shift;
130  unsigned char int_cmov;
131  unsigned short int_div;
132};
133
134static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
135{
136  { /* EV4 */
137    COSTS_N_INSNS (6),		/* fp_add */
138    COSTS_N_INSNS (6),		/* fp_mult */
139    COSTS_N_INSNS (34),		/* fp_div_sf */
140    COSTS_N_INSNS (63),		/* fp_div_df */
141    COSTS_N_INSNS (23),		/* int_mult_si */
142    COSTS_N_INSNS (23),		/* int_mult_di */
143    COSTS_N_INSNS (2),		/* int_shift */
144    COSTS_N_INSNS (2),		/* int_cmov */
145    COSTS_N_INSNS (97),		/* int_div */
146  },
147  { /* EV5 */
148    COSTS_N_INSNS (4),		/* fp_add */
149    COSTS_N_INSNS (4),		/* fp_mult */
150    COSTS_N_INSNS (15),		/* fp_div_sf */
151    COSTS_N_INSNS (22),		/* fp_div_df */
152    COSTS_N_INSNS (8),		/* int_mult_si */
153    COSTS_N_INSNS (12),		/* int_mult_di */
154    COSTS_N_INSNS (1) + 1,	/* int_shift */
155    COSTS_N_INSNS (1),		/* int_cmov */
156    COSTS_N_INSNS (83),		/* int_div */
157  },
158  { /* EV6 */
159    COSTS_N_INSNS (4),		/* fp_add */
160    COSTS_N_INSNS (4),		/* fp_mult */
161    COSTS_N_INSNS (12),		/* fp_div_sf */
162    COSTS_N_INSNS (15),		/* fp_div_df */
163    COSTS_N_INSNS (7),		/* int_mult_si */
164    COSTS_N_INSNS (7),		/* int_mult_di */
165    COSTS_N_INSNS (1),		/* int_shift */
166    COSTS_N_INSNS (2),		/* int_cmov */
167    COSTS_N_INSNS (86),		/* int_div */
168  },
169};
170
171/* Similar but tuned for code size instead of execution latency.  The
172   extra +N is fractional cost tuning based on latency.  It's used to
173   encourage use of cheaper insns like shift, but only if there's just
174   one of them.  */
175
176static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
177{
178  COSTS_N_INSNS (1),		/* fp_add */
179  COSTS_N_INSNS (1),		/* fp_mult */
180  COSTS_N_INSNS (1),		/* fp_div_sf */
181  COSTS_N_INSNS (1) + 1,	/* fp_div_df */
182  COSTS_N_INSNS (1) + 1,	/* int_mult_si */
183  COSTS_N_INSNS (1) + 2,	/* int_mult_di */
184  COSTS_N_INSNS (1),		/* int_shift */
185  COSTS_N_INSNS (1),		/* int_cmov */
186  COSTS_N_INSNS (6),		/* int_div */
187};
188
189/* Get the number of args of a function in one of two ways.  */
190#if TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK
191#define NUM_ARGS current_function_args_info.num_args
192#else
193#define NUM_ARGS current_function_args_info
194#endif
195
196#define REG_PV 27
197#define REG_RA 26
198
199/* Declarations of static functions.  */
200static struct machine_function *alpha_init_machine_status (void);
201static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
202
203#if TARGET_ABI_OPEN_VMS
204static void alpha_write_linkage (FILE *, const char *, tree);
205#endif
206
207static void unicosmk_output_deferred_case_vectors (FILE *);
208static void unicosmk_gen_dsib (unsigned long *);
209static void unicosmk_output_ssib (FILE *, const char *);
210static int unicosmk_need_dex (rtx);
211
212/* Implement TARGET_HANDLE_OPTION.  */
213
214static bool
215alpha_handle_option (size_t code, const char *arg, int value)
216{
217  switch (code)
218    {
219    case OPT_mfp_regs:
220      if (value == 0)
221	target_flags |= MASK_SOFT_FP;
222      break;
223
224    case OPT_mieee:
225    case OPT_mieee_with_inexact:
226      target_flags |= MASK_IEEE_CONFORMANT;
227      break;
228
229    case OPT_mtls_size_:
230      if (value != 16 && value != 32 && value != 64)
231	error ("bad value %qs for -mtls-size switch", arg);
232      break;
233    }
234
235  return true;
236}
237
238#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
239/* Implement TARGET_MANGLE_FUNDAMENTAL_TYPE.  */
240
241static const char *
242alpha_mangle_fundamental_type (tree type)
243{
244  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
245      && TARGET_LONG_DOUBLE_128)
246    return "g";
247
248  /* For all other types, use normal C++ mangling.  */
249  return NULL;
250}
251#endif
252
253/* Parse target option strings.  */
254
255void
256override_options (void)
257{
258  static const struct cpu_table {
259    const char *const name;
260    const enum processor_type processor;
261    const int flags;
262  } cpu_table[] = {
263    { "ev4",	PROCESSOR_EV4, 0 },
264    { "ev45",	PROCESSOR_EV4, 0 },
265    { "21064",	PROCESSOR_EV4, 0 },
266    { "ev5",	PROCESSOR_EV5, 0 },
267    { "21164",	PROCESSOR_EV5, 0 },
268    { "ev56",	PROCESSOR_EV5, MASK_BWX },
269    { "21164a",	PROCESSOR_EV5, MASK_BWX },
270    { "pca56",	PROCESSOR_EV5, MASK_BWX|MASK_MAX },
271    { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
272    { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
273    { "ev6",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
274    { "21264",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
275    { "ev67",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX },
276    { "21264a",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX },
277    { 0, 0, 0 }
278  };
279
280  int i;
281
282  /* Unicos/Mk doesn't have shared libraries.  */
283  if (TARGET_ABI_UNICOSMK && flag_pic)
284    {
285      warning (0, "-f%s ignored for Unicos/Mk (not supported)",
286	       (flag_pic > 1) ? "PIC" : "pic");
287      flag_pic = 0;
288    }
289
290  /* On Unicos/Mk, the native compiler consistently generates /d suffices for
291     floating-point instructions.  Make that the default for this target.  */
292  if (TARGET_ABI_UNICOSMK)
293    alpha_fprm = ALPHA_FPRM_DYN;
294  else
295    alpha_fprm = ALPHA_FPRM_NORM;
296
297  alpha_tp = ALPHA_TP_PROG;
298  alpha_fptm = ALPHA_FPTM_N;
299
300  /* We cannot use su and sui qualifiers for conversion instructions on
301     Unicos/Mk.  I'm not sure if this is due to assembler or hardware
302     limitations.  Right now, we issue a warning if -mieee is specified
303     and then ignore it; eventually, we should either get it right or
304     disable the option altogether.  */
305
306  if (TARGET_IEEE)
307    {
308      if (TARGET_ABI_UNICOSMK)
309	warning (0, "-mieee not supported on Unicos/Mk");
310      else
311	{
312	  alpha_tp = ALPHA_TP_INSN;
313	  alpha_fptm = ALPHA_FPTM_SU;
314	}
315    }
316
317  if (TARGET_IEEE_WITH_INEXACT)
318    {
319      if (TARGET_ABI_UNICOSMK)
320	warning (0, "-mieee-with-inexact not supported on Unicos/Mk");
321      else
322	{
323	  alpha_tp = ALPHA_TP_INSN;
324	  alpha_fptm = ALPHA_FPTM_SUI;
325	}
326    }
327
328  if (alpha_tp_string)
329    {
330      if (! strcmp (alpha_tp_string, "p"))
331	alpha_tp = ALPHA_TP_PROG;
332      else if (! strcmp (alpha_tp_string, "f"))
333	alpha_tp = ALPHA_TP_FUNC;
334      else if (! strcmp (alpha_tp_string, "i"))
335	alpha_tp = ALPHA_TP_INSN;
336      else
337	error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
338    }
339
340  if (alpha_fprm_string)
341    {
342      if (! strcmp (alpha_fprm_string, "n"))
343	alpha_fprm = ALPHA_FPRM_NORM;
344      else if (! strcmp (alpha_fprm_string, "m"))
345	alpha_fprm = ALPHA_FPRM_MINF;
346      else if (! strcmp (alpha_fprm_string, "c"))
347	alpha_fprm = ALPHA_FPRM_CHOP;
348      else if (! strcmp (alpha_fprm_string,"d"))
349	alpha_fprm = ALPHA_FPRM_DYN;
350      else
351	error ("bad value %qs for -mfp-rounding-mode switch",
352	       alpha_fprm_string);
353    }
354
355  if (alpha_fptm_string)
356    {
357      if (strcmp (alpha_fptm_string, "n") == 0)
358	alpha_fptm = ALPHA_FPTM_N;
359      else if (strcmp (alpha_fptm_string, "u") == 0)
360	alpha_fptm = ALPHA_FPTM_U;
361      else if (strcmp (alpha_fptm_string, "su") == 0)
362	alpha_fptm = ALPHA_FPTM_SU;
363      else if (strcmp (alpha_fptm_string, "sui") == 0)
364	alpha_fptm = ALPHA_FPTM_SUI;
365      else
366	error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
367    }
368
369  if (alpha_cpu_string)
370    {
371      for (i = 0; cpu_table [i].name; i++)
372	if (! strcmp (alpha_cpu_string, cpu_table [i].name))
373	  {
374	    alpha_tune = alpha_cpu = cpu_table [i].processor;
375	    target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
376	    target_flags |= cpu_table [i].flags;
377	    break;
378	  }
379      if (! cpu_table [i].name)
380	error ("bad value %qs for -mcpu switch", alpha_cpu_string);
381    }
382
383  if (alpha_tune_string)
384    {
385      for (i = 0; cpu_table [i].name; i++)
386	if (! strcmp (alpha_tune_string, cpu_table [i].name))
387	  {
388	    alpha_tune = cpu_table [i].processor;
389	    break;
390	  }
391      if (! cpu_table [i].name)
392	error ("bad value %qs for -mcpu switch", alpha_tune_string);
393    }
394
395  /* Do some sanity checks on the above options.  */
396
397  if (TARGET_ABI_UNICOSMK && alpha_fptm != ALPHA_FPTM_N)
398    {
399      warning (0, "trap mode not supported on Unicos/Mk");
400      alpha_fptm = ALPHA_FPTM_N;
401    }
402
403  if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
404      && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
405    {
406      warning (0, "fp software completion requires -mtrap-precision=i");
407      alpha_tp = ALPHA_TP_INSN;
408    }
409
410  if (alpha_cpu == PROCESSOR_EV6)
411    {
412      /* Except for EV6 pass 1 (not released), we always have precise
413	 arithmetic traps.  Which means we can do software completion
414	 without minding trap shadows.  */
415      alpha_tp = ALPHA_TP_PROG;
416    }
417
418  if (TARGET_FLOAT_VAX)
419    {
420      if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
421	{
422	  warning (0, "rounding mode not supported for VAX floats");
423	  alpha_fprm = ALPHA_FPRM_NORM;
424	}
425      if (alpha_fptm == ALPHA_FPTM_SUI)
426	{
427	  warning (0, "trap mode not supported for VAX floats");
428	  alpha_fptm = ALPHA_FPTM_SU;
429	}
430      if (target_flags_explicit & MASK_LONG_DOUBLE_128)
431	warning (0, "128-bit long double not supported for VAX floats");
432      target_flags &= ~MASK_LONG_DOUBLE_128;
433    }
434
435  {
436    char *end;
437    int lat;
438
439    if (!alpha_mlat_string)
440      alpha_mlat_string = "L1";
441
442    if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
443	&& (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
444      ;
445    else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
446	     && ISDIGIT ((unsigned char)alpha_mlat_string[1])
447	     && alpha_mlat_string[2] == '\0')
448      {
449	static int const cache_latency[][4] =
450	{
451	  { 3, 30, -1 },	/* ev4 -- Bcache is a guess */
452	  { 2, 12, 38 },	/* ev5 -- Bcache from PC164 LMbench numbers */
453	  { 3, 12, 30 },	/* ev6 -- Bcache from DS20 LMbench.  */
454	};
455
456	lat = alpha_mlat_string[1] - '0';
457	if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
458	  {
459	    warning (0, "L%d cache latency unknown for %s",
460		     lat, alpha_cpu_name[alpha_tune]);
461	    lat = 3;
462	  }
463	else
464	  lat = cache_latency[alpha_tune][lat-1];
465      }
466    else if (! strcmp (alpha_mlat_string, "main"))
467      {
468	/* Most current memories have about 370ns latency.  This is
469	   a reasonable guess for a fast cpu.  */
470	lat = 150;
471      }
472    else
473      {
474	warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
475	lat = 3;
476      }
477
478    alpha_memory_latency = lat;
479  }
480
481  /* Default the definition of "small data" to 8 bytes.  */
482  if (!g_switch_set)
483    g_switch_value = 8;
484
485  /* Infer TARGET_SMALL_DATA from -fpic/-fPIC.  */
486  if (flag_pic == 1)
487    target_flags |= MASK_SMALL_DATA;
488  else if (flag_pic == 2)
489    target_flags &= ~MASK_SMALL_DATA;
490
491  /* Align labels and loops for optimal branching.  */
492  /* ??? Kludge these by not doing anything if we don't optimize and also if
493     we are writing ECOFF symbols to work around a bug in DEC's assembler.  */
494  if (optimize > 0 && write_symbols != SDB_DEBUG)
495    {
496      if (align_loops <= 0)
497	align_loops = 16;
498      if (align_jumps <= 0)
499	align_jumps = 16;
500    }
501  if (align_functions <= 0)
502    align_functions = 16;
503
504  /* Acquire a unique set number for our register saves and restores.  */
505  alpha_sr_alias_set = new_alias_set ();
506
507  /* Register variables and functions with the garbage collector.  */
508
509  /* Set up function hooks.  */
510  init_machine_status = alpha_init_machine_status;
511
512  /* Tell the compiler when we're using VAX floating point.  */
513  if (TARGET_FLOAT_VAX)
514    {
515      REAL_MODE_FORMAT (SFmode) = &vax_f_format;
516      REAL_MODE_FORMAT (DFmode) = &vax_g_format;
517      REAL_MODE_FORMAT (TFmode) = NULL;
518    }
519}
520
521/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
522
523int
524zap_mask (HOST_WIDE_INT value)
525{
526  int i;
527
528  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
529       i++, value >>= 8)
530    if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
531      return 0;
532
533  return 1;
534}
535
536/* Return true if OP is valid for a particular TLS relocation.
537   We are already guaranteed that OP is a CONST.  */
538
539int
540tls_symbolic_operand_1 (rtx op, int size, int unspec)
541{
542  op = XEXP (op, 0);
543
544  if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
545    return 0;
546  op = XVECEXP (op, 0, 0);
547
548  if (GET_CODE (op) != SYMBOL_REF)
549    return 0;
550
551  switch (SYMBOL_REF_TLS_MODEL (op))
552    {
553    case TLS_MODEL_LOCAL_DYNAMIC:
554      return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
555    case TLS_MODEL_INITIAL_EXEC:
556      return unspec == UNSPEC_TPREL && size == 64;
557    case TLS_MODEL_LOCAL_EXEC:
558      return unspec == UNSPEC_TPREL && size == alpha_tls_size;
559    default:
560      gcc_unreachable ();
561    }
562}
563
564/* Used by aligned_memory_operand and unaligned_memory_operand to
565   resolve what reload is going to do with OP if it's a register.  */
566
567rtx
568resolve_reload_operand (rtx op)
569{
570  if (reload_in_progress)
571    {
572      rtx tmp = op;
573      if (GET_CODE (tmp) == SUBREG)
574	tmp = SUBREG_REG (tmp);
575      if (GET_CODE (tmp) == REG
576	  && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
577	{
578	  op = reg_equiv_memory_loc[REGNO (tmp)];
579	  if (op == 0)
580	    return 0;
581	}
582    }
583  return op;
584}
585
586/* Implements CONST_OK_FOR_LETTER_P.  Return true if the value matches
587   the range defined for C in [I-P].  */
588
589bool
590alpha_const_ok_for_letter_p (HOST_WIDE_INT value, int c)
591{
592  switch (c)
593    {
594    case 'I':
595      /* An unsigned 8 bit constant.  */
596      return (unsigned HOST_WIDE_INT) value < 0x100;
597    case 'J':
598      /* The constant zero.  */
599      return value == 0;
600    case 'K':
601      /* A signed 16 bit constant.  */
602      return (unsigned HOST_WIDE_INT) (value + 0x8000) < 0x10000;
603    case 'L':
604      /* A shifted signed 16 bit constant appropriate for LDAH.  */
605      return ((value & 0xffff) == 0
606              && ((value) >> 31 == -1 || value >> 31 == 0));
607    case 'M':
608      /* A constant that can be AND'ed with using a ZAP insn.  */
609      return zap_mask (value);
610    case 'N':
611      /* A complemented unsigned 8 bit constant.  */
612      return (unsigned HOST_WIDE_INT) (~ value) < 0x100;
613    case 'O':
614      /* A negated unsigned 8 bit constant.  */
615      return (unsigned HOST_WIDE_INT) (- value) < 0x100;
616    case 'P':
617      /* The constant 1, 2 or 3.  */
618      return value == 1 || value == 2 || value == 3;
619
620    default:
621      return false;
622    }
623}
624
625/* Implements CONST_DOUBLE_OK_FOR_LETTER_P.  Return true if VALUE
626   matches for C in [GH].  */
627
628bool
629alpha_const_double_ok_for_letter_p (rtx value, int c)
630{
631  switch (c)
632    {
633    case 'G':
634      /* The floating point zero constant.  */
635      return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
636	      && value == CONST0_RTX (GET_MODE (value)));
637
638    case 'H':
639      /* A valid operand of a ZAP insn.  */
640      return (GET_MODE (value) == VOIDmode
641	      && zap_mask (CONST_DOUBLE_LOW (value))
642	      && zap_mask (CONST_DOUBLE_HIGH (value)));
643
644    default:
645      return false;
646    }
647}
648
649/* Implements CONST_DOUBLE_OK_FOR_LETTER_P.  Return true if VALUE
650   matches for C.  */
651
652bool
653alpha_extra_constraint (rtx value, int c)
654{
655  switch (c)
656    {
657    case 'Q':
658      return normal_memory_operand (value, VOIDmode);
659    case 'R':
660      return direct_call_operand (value, Pmode);
661    case 'S':
662      return (GET_CODE (value) == CONST_INT
663	      && (unsigned HOST_WIDE_INT) INTVAL (value) < 64);
664    case 'T':
665      return GET_CODE (value) == HIGH;
666    case 'U':
667      return TARGET_ABI_UNICOSMK && symbolic_operand (value, VOIDmode);
668    case 'W':
669      return (GET_CODE (value) == CONST_VECTOR
670	      && value == CONST0_RTX (GET_MODE (value)));
671    default:
672      return false;
673    }
674}
675
676/* The scalar modes supported differs from the default check-what-c-supports
677   version in that sometimes TFmode is available even when long double
678   indicates only DFmode.  On unicosmk, we have the situation that HImode
679   doesn't map to any C type, but of course we still support that.  */
680
681static bool
682alpha_scalar_mode_supported_p (enum machine_mode mode)
683{
684  switch (mode)
685    {
686    case QImode:
687    case HImode:
688    case SImode:
689    case DImode:
690    case TImode: /* via optabs.c */
691      return true;
692
693    case SFmode:
694    case DFmode:
695      return true;
696
697    case TFmode:
698      return TARGET_HAS_XFLOATING_LIBS;
699
700    default:
701      return false;
702    }
703}
704
705/* Alpha implements a couple of integer vector mode operations when
706   TARGET_MAX is enabled.  We do not check TARGET_MAX here, however,
707   which allows the vectorizer to operate on e.g. move instructions,
708   or when expand_vector_operations can do something useful.  */
709
710static bool
711alpha_vector_mode_supported_p (enum machine_mode mode)
712{
713  return mode == V8QImode || mode == V4HImode || mode == V2SImode;
714}
715
716/* Return 1 if this function can directly return via $26.  */
717
718int
719direct_return (void)
720{
721  return (! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK
722	  && reload_completed
723	  && alpha_sa_size () == 0
724	  && get_frame_size () == 0
725	  && current_function_outgoing_args_size == 0
726	  && current_function_pretend_args_size == 0);
727}
728
729/* Return the ADDR_VEC associated with a tablejump insn.  */
730
731rtx
732alpha_tablejump_addr_vec (rtx insn)
733{
734  rtx tmp;
735
736  tmp = JUMP_LABEL (insn);
737  if (!tmp)
738    return NULL_RTX;
739  tmp = NEXT_INSN (tmp);
740  if (!tmp)
741    return NULL_RTX;
742  if (GET_CODE (tmp) == JUMP_INSN
743      && GET_CODE (PATTERN (tmp)) == ADDR_DIFF_VEC)
744    return PATTERN (tmp);
745  return NULL_RTX;
746}
747
748/* Return the label of the predicted edge, or CONST0_RTX if we don't know.  */
749
750rtx
751alpha_tablejump_best_label (rtx insn)
752{
753  rtx jump_table = alpha_tablejump_addr_vec (insn);
754  rtx best_label = NULL_RTX;
755
756  /* ??? Once the CFG doesn't keep getting completely rebuilt, look
757     there for edge frequency counts from profile data.  */
758
759  if (jump_table)
760    {
761      int n_labels = XVECLEN (jump_table, 1);
762      int best_count = -1;
763      int i, j;
764
765      for (i = 0; i < n_labels; i++)
766	{
767	  int count = 1;
768
769	  for (j = i + 1; j < n_labels; j++)
770	    if (XEXP (XVECEXP (jump_table, 1, i), 0)
771		== XEXP (XVECEXP (jump_table, 1, j), 0))
772	      count++;
773
774	  if (count > best_count)
775	    best_count = count, best_label = XVECEXP (jump_table, 1, i);
776	}
777    }
778
779  return best_label ? best_label : const0_rtx;
780}
781
782/* Return the TLS model to use for SYMBOL.  */
783
784static enum tls_model
785tls_symbolic_operand_type (rtx symbol)
786{
787  enum tls_model model;
788
789  if (GET_CODE (symbol) != SYMBOL_REF)
790    return 0;
791  model = SYMBOL_REF_TLS_MODEL (symbol);
792
793  /* Local-exec with a 64-bit size is the same code as initial-exec.  */
794  if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
795    model = TLS_MODEL_INITIAL_EXEC;
796
797  return model;
798}
799
800/* Return true if the function DECL will share the same GP as any
801   function in the current unit of translation.  */
802
803static bool
804decl_has_samegp (tree decl)
805{
806  /* Functions that are not local can be overridden, and thus may
807     not share the same gp.  */
808  if (!(*targetm.binds_local_p) (decl))
809    return false;
810
811  /* If -msmall-data is in effect, assume that there is only one GP
812     for the module, and so any local symbol has this property.  We
813     need explicit relocations to be able to enforce this for symbols
814     not defined in this unit of translation, however.  */
815  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
816    return true;
817
818  /* Functions that are not external are defined in this UoT.  */
819  /* ??? Irritatingly, static functions not yet emitted are still
820     marked "external".  Apply this to non-static functions only.  */
821  return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
822}
823
824/* Return true if EXP should be placed in the small data section.  */
825
826static bool
827alpha_in_small_data_p (tree exp)
828{
829  /* We want to merge strings, so we never consider them small data.  */
830  if (TREE_CODE (exp) == STRING_CST)
831    return false;
832
833  /* Functions are never in the small data area.  Duh.  */
834  if (TREE_CODE (exp) == FUNCTION_DECL)
835    return false;
836
837  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
838    {
839      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
840      if (strcmp (section, ".sdata") == 0
841	  || strcmp (section, ".sbss") == 0)
842	return true;
843    }
844  else
845    {
846      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
847
848      /* If this is an incomplete type with size 0, then we can't put it
849	 in sdata because it might be too big when completed.  */
850      if (size > 0 && (unsigned HOST_WIDE_INT) size <= g_switch_value)
851	return true;
852    }
853
854  return false;
855}
856
857#if TARGET_ABI_OPEN_VMS
858static bool
859alpha_linkage_symbol_p (const char *symname)
860{
861  int symlen = strlen (symname);
862
863  if (symlen > 4)
864    return strcmp (&symname [symlen - 4], "..lk") == 0;
865
866  return false;
867}
868
869#define LINKAGE_SYMBOL_REF_P(X) \
870  ((GET_CODE (X) == SYMBOL_REF   \
871    && alpha_linkage_symbol_p (XSTR (X, 0))) \
872   || (GET_CODE (X) == CONST                 \
873       && GET_CODE (XEXP (X, 0)) == PLUS     \
874       && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
875       && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
876#endif
877
878/* legitimate_address_p recognizes an RTL expression that is a valid
879   memory address for an instruction.  The MODE argument is the
880   machine mode for the MEM expression that wants to use this address.
881
882   For Alpha, we have either a constant address or the sum of a
883   register and a constant address, or just a register.  For DImode,
884   any of those forms can be surrounded with an AND that clear the
885   low-order three bits; this is an "unaligned" access.  */
886
887bool
888alpha_legitimate_address_p (enum machine_mode mode, rtx x, int strict)
889{
890  /* If this is an ldq_u type address, discard the outer AND.  */
891  if (mode == DImode
892      && GET_CODE (x) == AND
893      && GET_CODE (XEXP (x, 1)) == CONST_INT
894      && INTVAL (XEXP (x, 1)) == -8)
895    x = XEXP (x, 0);
896
897  /* Discard non-paradoxical subregs.  */
898  if (GET_CODE (x) == SUBREG
899      && (GET_MODE_SIZE (GET_MODE (x))
900	  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
901    x = SUBREG_REG (x);
902
903  /* Unadorned general registers are valid.  */
904  if (REG_P (x)
905      && (strict
906	  ? STRICT_REG_OK_FOR_BASE_P (x)
907	  : NONSTRICT_REG_OK_FOR_BASE_P (x)))
908    return true;
909
910  /* Constant addresses (i.e. +/- 32k) are valid.  */
911  if (CONSTANT_ADDRESS_P (x))
912    return true;
913
914#if TARGET_ABI_OPEN_VMS
915  if (LINKAGE_SYMBOL_REF_P (x))
916    return true;
917#endif
918
919  /* Register plus a small constant offset is valid.  */
920  if (GET_CODE (x) == PLUS)
921    {
922      rtx ofs = XEXP (x, 1);
923      x = XEXP (x, 0);
924
925      /* Discard non-paradoxical subregs.  */
926      if (GET_CODE (x) == SUBREG
927          && (GET_MODE_SIZE (GET_MODE (x))
928	      < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
929	x = SUBREG_REG (x);
930
931      if (REG_P (x))
932	{
933	  if (! strict
934	      && NONSTRICT_REG_OK_FP_BASE_P (x)
935	      && GET_CODE (ofs) == CONST_INT)
936	    return true;
937	  if ((strict
938	       ? STRICT_REG_OK_FOR_BASE_P (x)
939	       : NONSTRICT_REG_OK_FOR_BASE_P (x))
940	      && CONSTANT_ADDRESS_P (ofs))
941	    return true;
942	}
943    }
944
945  /* If we're managing explicit relocations, LO_SUM is valid, as
946     are small data symbols.  */
947  else if (TARGET_EXPLICIT_RELOCS)
948    {
949      if (small_symbolic_operand (x, Pmode))
950	return true;
951
952      if (GET_CODE (x) == LO_SUM)
953	{
954	  rtx ofs = XEXP (x, 1);
955	  x = XEXP (x, 0);
956
957	  /* Discard non-paradoxical subregs.  */
958	  if (GET_CODE (x) == SUBREG
959	      && (GET_MODE_SIZE (GET_MODE (x))
960		  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
961	    x = SUBREG_REG (x);
962
963	  /* Must have a valid base register.  */
964	  if (! (REG_P (x)
965		 && (strict
966		     ? STRICT_REG_OK_FOR_BASE_P (x)
967		     : NONSTRICT_REG_OK_FOR_BASE_P (x))))
968	    return false;
969
970	  /* The symbol must be local.  */
971	  if (local_symbolic_operand (ofs, Pmode)
972	      || dtp32_symbolic_operand (ofs, Pmode)
973	      || tp32_symbolic_operand (ofs, Pmode))
974	    return true;
975	}
976    }
977
978  return false;
979}
980
981/* Build the SYMBOL_REF for __tls_get_addr.  */
982
983static GTY(()) rtx tls_get_addr_libfunc;
984
985static rtx
986get_tls_get_addr (void)
987{
988  if (!tls_get_addr_libfunc)
989    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
990  return tls_get_addr_libfunc;
991}
992
993/* Try machine-dependent ways of modifying an illegitimate address
994   to be legitimate.  If we find one, return the new, valid address.  */
995
996rtx
997alpha_legitimize_address (rtx x, rtx scratch,
998			  enum machine_mode mode ATTRIBUTE_UNUSED)
999{
1000  HOST_WIDE_INT addend;
1001
1002  /* If the address is (plus reg const_int) and the CONST_INT is not a
1003     valid offset, compute the high part of the constant and add it to
1004     the register.  Then our address is (plus temp low-part-const).  */
1005  if (GET_CODE (x) == PLUS
1006      && GET_CODE (XEXP (x, 0)) == REG
1007      && GET_CODE (XEXP (x, 1)) == CONST_INT
1008      && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
1009    {
1010      addend = INTVAL (XEXP (x, 1));
1011      x = XEXP (x, 0);
1012      goto split_addend;
1013    }
1014
1015  /* If the address is (const (plus FOO const_int)), find the low-order
1016     part of the CONST_INT.  Then load FOO plus any high-order part of the
1017     CONST_INT into a register.  Our address is (plus reg low-part-const).
1018     This is done to reduce the number of GOT entries.  */
1019  if (!no_new_pseudos
1020      && GET_CODE (x) == CONST
1021      && GET_CODE (XEXP (x, 0)) == PLUS
1022      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
1023    {
1024      addend = INTVAL (XEXP (XEXP (x, 0), 1));
1025      x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
1026      goto split_addend;
1027    }
1028
1029  /* If we have a (plus reg const), emit the load as in (2), then add
1030     the two registers, and finally generate (plus reg low-part-const) as
1031     our address.  */
1032  if (!no_new_pseudos
1033      && GET_CODE (x) == PLUS
1034      && GET_CODE (XEXP (x, 0)) == REG
1035      && GET_CODE (XEXP (x, 1)) == CONST
1036      && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
1037      && GET_CODE (XEXP (XEXP (XEXP (x, 1), 0), 1)) == CONST_INT)
1038    {
1039      addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1040      x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1041			       XEXP (XEXP (XEXP (x, 1), 0), 0),
1042			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
1043      goto split_addend;
1044    }
1045
1046  /* If this is a local symbol, split the address into HIGH/LO_SUM parts.  */
1047  if (TARGET_EXPLICIT_RELOCS && symbolic_operand (x, Pmode))
1048    {
1049      rtx r0, r16, eqv, tga, tp, insn, dest, seq;
1050
1051      switch (tls_symbolic_operand_type (x))
1052	{
1053	case TLS_MODEL_NONE:
1054	  break;
1055
1056	case TLS_MODEL_GLOBAL_DYNAMIC:
1057	  start_sequence ();
1058
1059	  r0 = gen_rtx_REG (Pmode, 0);
1060	  r16 = gen_rtx_REG (Pmode, 16);
1061	  tga = get_tls_get_addr ();
1062	  dest = gen_reg_rtx (Pmode);
1063	  seq = GEN_INT (alpha_next_sequence_number++);
1064
1065	  emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1066	  insn = gen_call_value_osf_tlsgd (r0, tga, seq);
1067	  insn = emit_call_insn (insn);
1068	  CONST_OR_PURE_CALL_P (insn) = 1;
1069	  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1070
1071          insn = get_insns ();
1072	  end_sequence ();
1073
1074	  emit_libcall_block (insn, dest, r0, x);
1075	  return dest;
1076
1077	case TLS_MODEL_LOCAL_DYNAMIC:
1078	  start_sequence ();
1079
1080	  r0 = gen_rtx_REG (Pmode, 0);
1081	  r16 = gen_rtx_REG (Pmode, 16);
1082	  tga = get_tls_get_addr ();
1083	  scratch = gen_reg_rtx (Pmode);
1084	  seq = GEN_INT (alpha_next_sequence_number++);
1085
1086	  emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1087	  insn = gen_call_value_osf_tlsldm (r0, tga, seq);
1088	  insn = emit_call_insn (insn);
1089	  CONST_OR_PURE_CALL_P (insn) = 1;
1090	  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1091
1092          insn = get_insns ();
1093	  end_sequence ();
1094
1095	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1096				UNSPEC_TLSLDM_CALL);
1097	  emit_libcall_block (insn, scratch, r0, eqv);
1098
1099	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1100	  eqv = gen_rtx_CONST (Pmode, eqv);
1101
1102	  if (alpha_tls_size == 64)
1103	    {
1104	      dest = gen_reg_rtx (Pmode);
1105	      emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
1106	      emit_insn (gen_adddi3 (dest, dest, scratch));
1107	      return dest;
1108	    }
1109	  if (alpha_tls_size == 32)
1110	    {
1111	      insn = gen_rtx_HIGH (Pmode, eqv);
1112	      insn = gen_rtx_PLUS (Pmode, scratch, insn);
1113	      scratch = gen_reg_rtx (Pmode);
1114	      emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
1115	    }
1116	  return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1117
1118	case TLS_MODEL_INITIAL_EXEC:
1119	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1120	  eqv = gen_rtx_CONST (Pmode, eqv);
1121	  tp = gen_reg_rtx (Pmode);
1122	  scratch = gen_reg_rtx (Pmode);
1123	  dest = gen_reg_rtx (Pmode);
1124
1125	  emit_insn (gen_load_tp (tp));
1126	  emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
1127	  emit_insn (gen_adddi3 (dest, tp, scratch));
1128	  return dest;
1129
1130	case TLS_MODEL_LOCAL_EXEC:
1131	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1132	  eqv = gen_rtx_CONST (Pmode, eqv);
1133	  tp = gen_reg_rtx (Pmode);
1134
1135	  emit_insn (gen_load_tp (tp));
1136	  if (alpha_tls_size == 32)
1137	    {
1138	      insn = gen_rtx_HIGH (Pmode, eqv);
1139	      insn = gen_rtx_PLUS (Pmode, tp, insn);
1140	      tp = gen_reg_rtx (Pmode);
1141	      emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
1142	    }
1143	  return gen_rtx_LO_SUM (Pmode, tp, eqv);
1144
1145	default:
1146	  gcc_unreachable ();
1147	}
1148
1149      if (local_symbolic_operand (x, Pmode))
1150	{
1151	  if (small_symbolic_operand (x, Pmode))
1152	    return x;
1153	  else
1154	    {
1155	      if (!no_new_pseudos)
1156	        scratch = gen_reg_rtx (Pmode);
1157	      emit_insn (gen_rtx_SET (VOIDmode, scratch,
1158				      gen_rtx_HIGH (Pmode, x)));
1159	      return gen_rtx_LO_SUM (Pmode, scratch, x);
1160	    }
1161	}
1162    }
1163
1164  return NULL;
1165
1166 split_addend:
1167  {
1168    HOST_WIDE_INT low, high;
1169
1170    low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1171    addend -= low;
1172    high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1173    addend -= high;
1174
1175    if (addend)
1176      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1177			       (no_new_pseudos ? scratch : NULL_RTX),
1178			       1, OPTAB_LIB_WIDEN);
1179    if (high)
1180      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1181			       (no_new_pseudos ? scratch : NULL_RTX),
1182			       1, OPTAB_LIB_WIDEN);
1183
1184    return plus_constant (x, low);
1185  }
1186}
1187
1188/* Primarily this is required for TLS symbols, but given that our move
1189   patterns *ought* to be able to handle any symbol at any time, we
1190   should never be spilling symbolic operands to the constant pool, ever.  */
1191
1192static bool
1193alpha_cannot_force_const_mem (rtx x)
1194{
1195  enum rtx_code code = GET_CODE (x);
1196  return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1197}
1198
1199/* We do not allow indirect calls to be optimized into sibling calls, nor
1200   can we allow a call to a function with a different GP to be optimized
1201   into a sibcall.  */
1202
1203static bool
1204alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1205{
1206  /* Can't do indirect tail calls, since we don't know if the target
1207     uses the same GP.  */
1208  if (!decl)
1209    return false;
1210
1211  /* Otherwise, we can make a tail call if the target function shares
1212     the same GP.  */
1213  return decl_has_samegp (decl);
1214}
1215
1216int
1217some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
1218{
1219  rtx x = *px;
1220
1221  /* Don't re-split.  */
1222  if (GET_CODE (x) == LO_SUM)
1223    return -1;
1224
1225  return small_symbolic_operand (x, Pmode) != 0;
1226}
1227
1228static int
1229split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
1230{
1231  rtx x = *px;
1232
1233  /* Don't re-split.  */
1234  if (GET_CODE (x) == LO_SUM)
1235    return -1;
1236
1237  if (small_symbolic_operand (x, Pmode))
1238    {
1239      x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1240      *px = x;
1241      return -1;
1242    }
1243
1244  return 0;
1245}
1246
1247rtx
1248split_small_symbolic_operand (rtx x)
1249{
1250  x = copy_insn (x);
1251  for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
1252  return x;
1253}
1254
1255/* Indicate that INSN cannot be duplicated.  This is true for any insn
1256   that we've marked with gpdisp relocs, since those have to stay in
1257   1-1 correspondence with one another.
1258
1259   Technically we could copy them if we could set up a mapping from one
1260   sequence number to another, across the set of insns to be duplicated.
1261   This seems overly complicated and error-prone since interblock motion
1262   from sched-ebb could move one of the pair of insns to a different block.
1263
1264   Also cannot allow jsr insns to be duplicated.  If they throw exceptions,
1265   then they'll be in a different block from their ldgp.  Which could lead
1266   the bb reorder code to think that it would be ok to copy just the block
1267   containing the call and branch to the block containing the ldgp.  */
1268
1269static bool
1270alpha_cannot_copy_insn_p (rtx insn)
1271{
1272  if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1273    return false;
1274  if (recog_memoized (insn) >= 0)
1275    return get_attr_cannot_copy (insn);
1276  else
1277    return false;
1278}
1279
1280
1281/* Try a machine-dependent way of reloading an illegitimate address
1282   operand.  If we find one, push the reload and return the new rtx.  */
1283
1284rtx
1285alpha_legitimize_reload_address (rtx x,
1286				 enum machine_mode mode ATTRIBUTE_UNUSED,
1287				 int opnum, int type,
1288				 int ind_levels ATTRIBUTE_UNUSED)
1289{
1290  /* We must recognize output that we have already generated ourselves.  */
1291  if (GET_CODE (x) == PLUS
1292      && GET_CODE (XEXP (x, 0)) == PLUS
1293      && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
1294      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1295      && GET_CODE (XEXP (x, 1)) == CONST_INT)
1296    {
1297      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1298		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1299		   opnum, type);
1300      return x;
1301    }
1302
1303  /* We wish to handle large displacements off a base register by
1304     splitting the addend across an ldah and the mem insn.  This
1305     cuts number of extra insns needed from 3 to 1.  */
1306  if (GET_CODE (x) == PLUS
1307      && GET_CODE (XEXP (x, 0)) == REG
1308      && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1309      && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1310      && GET_CODE (XEXP (x, 1)) == CONST_INT)
1311    {
1312      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1313      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1314      HOST_WIDE_INT high
1315	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1316
1317      /* Check for 32-bit overflow.  */
1318      if (high + low != val)
1319	return NULL_RTX;
1320
1321      /* Reload the high part into a base reg; leave the low part
1322	 in the mem directly.  */
1323      x = gen_rtx_PLUS (GET_MODE (x),
1324			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1325				      GEN_INT (high)),
1326			GEN_INT (low));
1327
1328      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1329		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1330		   opnum, type);
1331      return x;
1332    }
1333
1334  return NULL_RTX;
1335}
1336
1337/* Compute a (partial) cost for rtx X.  Return true if the complete
1338   cost has been computed, and false if subexpressions should be
1339   scanned.  In either case, *TOTAL contains the cost result.  */
1340
1341static bool
1342alpha_rtx_costs (rtx x, int code, int outer_code, int *total)
1343{
1344  enum machine_mode mode = GET_MODE (x);
1345  bool float_mode_p = FLOAT_MODE_P (mode);
1346  const struct alpha_rtx_cost_data *cost_data;
1347
1348  if (optimize_size)
1349    cost_data = &alpha_rtx_cost_size;
1350  else
1351    cost_data = &alpha_rtx_cost_data[alpha_tune];
1352
1353  switch (code)
1354    {
1355    case CONST_INT:
1356      /* If this is an 8-bit constant, return zero since it can be used
1357	 nearly anywhere with no cost.  If it is a valid operand for an
1358	 ADD or AND, likewise return 0 if we know it will be used in that
1359	 context.  Otherwise, return 2 since it might be used there later.
1360	 All other constants take at least two insns.  */
1361      if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1362	{
1363	  *total = 0;
1364	  return true;
1365	}
1366      /* FALLTHRU */
1367
1368    case CONST_DOUBLE:
1369      if (x == CONST0_RTX (mode))
1370	*total = 0;
1371      else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1372	       || (outer_code == AND && and_operand (x, VOIDmode)))
1373	*total = 0;
1374      else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1375	*total = 2;
1376      else
1377	*total = COSTS_N_INSNS (2);
1378      return true;
1379
1380    case CONST:
1381    case SYMBOL_REF:
1382    case LABEL_REF:
1383      if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1384	*total = COSTS_N_INSNS (outer_code != MEM);
1385      else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1386	*total = COSTS_N_INSNS (1 + (outer_code != MEM));
1387      else if (tls_symbolic_operand_type (x))
1388	/* Estimate of cost for call_pal rduniq.  */
1389	/* ??? How many insns do we emit here?  More than one...  */
1390	*total = COSTS_N_INSNS (15);
1391      else
1392	/* Otherwise we do a load from the GOT.  */
1393	*total = COSTS_N_INSNS (optimize_size ? 1 : alpha_memory_latency);
1394      return true;
1395
1396    case HIGH:
1397      /* This is effectively an add_operand.  */
1398      *total = 2;
1399      return true;
1400
1401    case PLUS:
1402    case MINUS:
1403      if (float_mode_p)
1404	*total = cost_data->fp_add;
1405      else if (GET_CODE (XEXP (x, 0)) == MULT
1406	       && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1407	{
1408	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
1409		    + rtx_cost (XEXP (x, 1), outer_code) + COSTS_N_INSNS (1));
1410	  return true;
1411	}
1412      return false;
1413
1414    case MULT:
1415      if (float_mode_p)
1416	*total = cost_data->fp_mult;
1417      else if (mode == DImode)
1418	*total = cost_data->int_mult_di;
1419      else
1420	*total = cost_data->int_mult_si;
1421      return false;
1422
1423    case ASHIFT:
1424      if (GET_CODE (XEXP (x, 1)) == CONST_INT
1425	  && INTVAL (XEXP (x, 1)) <= 3)
1426	{
1427	  *total = COSTS_N_INSNS (1);
1428	  return false;
1429	}
1430      /* FALLTHRU */
1431
1432    case ASHIFTRT:
1433    case LSHIFTRT:
1434      *total = cost_data->int_shift;
1435      return false;
1436
1437    case IF_THEN_ELSE:
1438      if (float_mode_p)
1439        *total = cost_data->fp_add;
1440      else
1441        *total = cost_data->int_cmov;
1442      return false;
1443
1444    case DIV:
1445    case UDIV:
1446    case MOD:
1447    case UMOD:
1448      if (!float_mode_p)
1449	*total = cost_data->int_div;
1450      else if (mode == SFmode)
1451        *total = cost_data->fp_div_sf;
1452      else
1453        *total = cost_data->fp_div_df;
1454      return false;
1455
1456    case MEM:
1457      *total = COSTS_N_INSNS (optimize_size ? 1 : alpha_memory_latency);
1458      return true;
1459
1460    case NEG:
1461      if (! float_mode_p)
1462	{
1463	  *total = COSTS_N_INSNS (1);
1464	  return false;
1465	}
1466      /* FALLTHRU */
1467
1468    case ABS:
1469      if (! float_mode_p)
1470	{
1471	  *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1472	  return false;
1473	}
1474      /* FALLTHRU */
1475
1476    case FLOAT:
1477    case UNSIGNED_FLOAT:
1478    case FIX:
1479    case UNSIGNED_FIX:
1480    case FLOAT_TRUNCATE:
1481      *total = cost_data->fp_add;
1482      return false;
1483
1484    case FLOAT_EXTEND:
1485      if (GET_CODE (XEXP (x, 0)) == MEM)
1486	*total = 0;
1487      else
1488	*total = cost_data->fp_add;
1489      return false;
1490
1491    default:
1492      return false;
1493    }
1494}
1495
1496/* REF is an alignable memory location.  Place an aligned SImode
1497   reference into *PALIGNED_MEM and the number of bits to shift into
1498   *PBITNUM.  SCRATCH is a free register for use in reloading out
1499   of range stack slots.  */
1500
1501void
1502get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1503{
1504  rtx base;
1505  HOST_WIDE_INT disp, offset;
1506
1507  gcc_assert (GET_CODE (ref) == MEM);
1508
1509  if (reload_in_progress
1510      && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1511    {
1512      base = find_replacement (&XEXP (ref, 0));
1513      gcc_assert (memory_address_p (GET_MODE (ref), base));
1514    }
1515  else
1516    base = XEXP (ref, 0);
1517
1518  if (GET_CODE (base) == PLUS)
1519    disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1520  else
1521    disp = 0;
1522
1523  /* Find the byte offset within an aligned word.  If the memory itself is
1524     claimed to be aligned, believe it.  Otherwise, aligned_memory_operand
1525     will have examined the base register and determined it is aligned, and
1526     thus displacements from it are naturally alignable.  */
1527  if (MEM_ALIGN (ref) >= 32)
1528    offset = 0;
1529  else
1530    offset = disp & 3;
1531
1532  /* Access the entire aligned word.  */
1533  *paligned_mem = widen_memory_access (ref, SImode, -offset);
1534
1535  /* Convert the byte offset within the word to a bit offset.  */
1536  if (WORDS_BIG_ENDIAN)
1537    offset = 32 - (GET_MODE_BITSIZE (GET_MODE (ref)) + offset * 8);
1538  else
1539    offset *= 8;
1540  *pbitnum = GEN_INT (offset);
1541}
1542
1543/* Similar, but just get the address.  Handle the two reload cases.
1544   Add EXTRA_OFFSET to the address we return.  */
1545
1546rtx
1547get_unaligned_address (rtx ref)
1548{
1549  rtx base;
1550  HOST_WIDE_INT offset = 0;
1551
1552  gcc_assert (GET_CODE (ref) == MEM);
1553
1554  if (reload_in_progress
1555      && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1556    {
1557      base = find_replacement (&XEXP (ref, 0));
1558
1559      gcc_assert (memory_address_p (GET_MODE (ref), base));
1560    }
1561  else
1562    base = XEXP (ref, 0);
1563
1564  if (GET_CODE (base) == PLUS)
1565    offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1566
1567  return plus_constant (base, offset);
1568}
1569
1570/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1571   X is always returned in a register.  */
1572
1573rtx
1574get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1575{
1576  if (GET_CODE (addr) == PLUS)
1577    {
1578      ofs += INTVAL (XEXP (addr, 1));
1579      addr = XEXP (addr, 0);
1580    }
1581
1582  return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1583			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1584}
1585
1586/* On the Alpha, all (non-symbolic) constants except zero go into
1587   a floating-point register via memory.  Note that we cannot
1588   return anything that is not a subset of CLASS, and that some
1589   symbolic constants cannot be dropped to memory.  */
1590
1591enum reg_class
1592alpha_preferred_reload_class(rtx x, enum reg_class class)
1593{
1594  /* Zero is present in any register class.  */
1595  if (x == CONST0_RTX (GET_MODE (x)))
1596    return class;
1597
1598  /* These sorts of constants we can easily drop to memory.  */
1599  if (GET_CODE (x) == CONST_INT
1600      || GET_CODE (x) == CONST_DOUBLE
1601      || GET_CODE (x) == CONST_VECTOR)
1602    {
1603      if (class == FLOAT_REGS)
1604	return NO_REGS;
1605      if (class == ALL_REGS)
1606	return GENERAL_REGS;
1607      return class;
1608    }
1609
1610  /* All other kinds of constants should not (and in the case of HIGH
1611     cannot) be dropped to memory -- instead we use a GENERAL_REGS
1612     secondary reload.  */
1613  if (CONSTANT_P (x))
1614    return (class == ALL_REGS ? GENERAL_REGS : class);
1615
1616  return class;
1617}
1618
1619/* Loading and storing HImode or QImode values to and from memory
1620   usually requires a scratch register.  The exceptions are loading
1621   QImode and HImode from an aligned address to a general register
1622   unless byte instructions are permitted.
1623
1624   We also cannot load an unaligned address or a paradoxical SUBREG
1625   into an FP register.
1626
1627   We also cannot do integral arithmetic into FP regs, as might result
1628   from register elimination into a DImode fp register.  */
1629
1630enum reg_class
1631secondary_reload_class (enum reg_class class, enum machine_mode mode,
1632			rtx x, int in)
1633{
1634  if ((mode == QImode || mode == HImode) && ! TARGET_BWX)
1635    {
1636      if (GET_CODE (x) == MEM
1637	  || (GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
1638	  || (GET_CODE (x) == SUBREG
1639	      && (GET_CODE (SUBREG_REG (x)) == MEM
1640		  || (GET_CODE (SUBREG_REG (x)) == REG
1641		      && REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER))))
1642	{
1643	  if (!in || !aligned_memory_operand(x, mode))
1644	    return GENERAL_REGS;
1645	}
1646    }
1647
1648  if (class == FLOAT_REGS)
1649    {
1650      if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == AND)
1651	return GENERAL_REGS;
1652
1653      if (GET_CODE (x) == SUBREG
1654	  && (GET_MODE_SIZE (GET_MODE (x))
1655	      > GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
1656	return GENERAL_REGS;
1657
1658      if (in && INTEGRAL_MODE_P (mode)
1659	  && ! (memory_operand (x, mode) || x == const0_rtx))
1660	return GENERAL_REGS;
1661    }
1662
1663  return NO_REGS;
1664}
1665
1666/* Subfunction of the following function.  Update the flags of any MEM
1667   found in part of X.  */
1668
1669static int
1670alpha_set_memflags_1 (rtx *xp, void *data)
1671{
1672  rtx x = *xp, orig = (rtx) data;
1673
1674  if (GET_CODE (x) != MEM)
1675    return 0;
1676
1677  MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
1678  MEM_IN_STRUCT_P (x) = MEM_IN_STRUCT_P (orig);
1679  MEM_SCALAR_P (x) = MEM_SCALAR_P (orig);
1680  MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
1681  MEM_READONLY_P (x) = MEM_READONLY_P (orig);
1682
1683  /* Sadly, we cannot use alias sets because the extra aliasing
1684     produced by the AND interferes.  Given that two-byte quantities
1685     are the only thing we would be able to differentiate anyway,
1686     there does not seem to be any point in convoluting the early
1687     out of the alias check.  */
1688
1689  return -1;
1690}
1691
1692/* Given INSN, which is an INSN list or the PATTERN of a single insn
1693   generated to perform a memory operation, look for any MEMs in either
1694   a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1695   volatile flags from REF into each of the MEMs found.  If REF is not
1696   a MEM, don't do anything.  */
1697
1698void
1699alpha_set_memflags (rtx insn, rtx ref)
1700{
1701  rtx *base_ptr;
1702
1703  if (GET_CODE (ref) != MEM)
1704    return;
1705
1706  /* This is only called from alpha.md, after having had something
1707     generated from one of the insn patterns.  So if everything is
1708     zero, the pattern is already up-to-date.  */
1709  if (!MEM_VOLATILE_P (ref)
1710      && !MEM_IN_STRUCT_P (ref)
1711      && !MEM_SCALAR_P (ref)
1712      && !MEM_NOTRAP_P (ref)
1713      && !MEM_READONLY_P (ref))
1714    return;
1715
1716  if (INSN_P (insn))
1717    base_ptr = &PATTERN (insn);
1718  else
1719    base_ptr = &insn;
1720  for_each_rtx (base_ptr, alpha_set_memflags_1, (void *) ref);
1721}
1722
1723static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
1724				 int, bool);
1725
1726/* Internal routine for alpha_emit_set_const to check for N or below insns.
1727   If NO_OUTPUT is true, then we only check to see if N insns are possible,
1728   and return pc_rtx if successful.  */
1729
1730static rtx
1731alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
1732			HOST_WIDE_INT c, int n, bool no_output)
1733{
1734  HOST_WIDE_INT new;
1735  int i, bits;
1736  /* Use a pseudo if highly optimizing and still generating RTL.  */
1737  rtx subtarget
1738    = (flag_expensive_optimizations && !no_new_pseudos ? 0 : target);
1739  rtx temp, insn;
1740
1741  /* If this is a sign-extended 32-bit constant, we can do this in at most
1742     three insns, so do it if we have enough insns left.  We always have
1743     a sign-extended 32-bit constant when compiling on a narrow machine.  */
1744
1745  if (HOST_BITS_PER_WIDE_INT != 64
1746      || c >> 31 == -1 || c >> 31 == 0)
1747    {
1748      HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1749      HOST_WIDE_INT tmp1 = c - low;
1750      HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1751      HOST_WIDE_INT extra = 0;
1752
1753      /* If HIGH will be interpreted as negative but the constant is
1754	 positive, we must adjust it to do two ldha insns.  */
1755
1756      if ((high & 0x8000) != 0 && c >= 0)
1757	{
1758	  extra = 0x4000;
1759	  tmp1 -= 0x40000000;
1760	  high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1761	}
1762
1763      if (c == low || (low == 0 && extra == 0))
1764	{
1765	  /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1766	     but that meant that we can't handle INT_MIN on 32-bit machines
1767	     (like NT/Alpha), because we recurse indefinitely through
1768	     emit_move_insn to gen_movdi.  So instead, since we know exactly
1769	     what we want, create it explicitly.  */
1770
1771	  if (no_output)
1772	    return pc_rtx;
1773	  if (target == NULL)
1774	    target = gen_reg_rtx (mode);
1775	  emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1776	  return target;
1777	}
1778      else if (n >= 2 + (extra != 0))
1779	{
1780	  if (no_output)
1781	    return pc_rtx;
1782	  if (no_new_pseudos)
1783	    {
1784	      emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1785	      temp = target;
1786	    }
1787	  else
1788	    temp = copy_to_suggested_reg (GEN_INT (high << 16),
1789					  subtarget, mode);
1790
1791	  /* As of 2002-02-23, addsi3 is only available when not optimizing.
1792	     This means that if we go through expand_binop, we'll try to
1793	     generate extensions, etc, which will require new pseudos, which
1794	     will fail during some split phases.  The SImode add patterns
1795	     still exist, but are not named.  So build the insns by hand.  */
1796
1797	  if (extra != 0)
1798	    {
1799	      if (! subtarget)
1800		subtarget = gen_reg_rtx (mode);
1801	      insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1802	      insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1803	      emit_insn (insn);
1804	      temp = subtarget;
1805	    }
1806
1807	  if (target == NULL)
1808	    target = gen_reg_rtx (mode);
1809	  insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1810	  insn = gen_rtx_SET (VOIDmode, target, insn);
1811	  emit_insn (insn);
1812	  return target;
1813	}
1814    }
1815
1816  /* If we couldn't do it that way, try some other methods.  But if we have
1817     no instructions left, don't bother.  Likewise, if this is SImode and
1818     we can't make pseudos, we can't do anything since the expand_binop
1819     and expand_unop calls will widen and try to make pseudos.  */
1820
1821  if (n == 1 || (mode == SImode && no_new_pseudos))
1822    return 0;
1823
1824  /* Next, see if we can load a related constant and then shift and possibly
1825     negate it to get the constant we want.  Try this once each increasing
1826     numbers of insns.  */
1827
1828  for (i = 1; i < n; i++)
1829    {
1830      /* First, see if minus some low bits, we've an easy load of
1831	 high bits.  */
1832
1833      new = ((c & 0xffff) ^ 0x8000) - 0x8000;
1834      if (new != 0)
1835	{
1836          temp = alpha_emit_set_const (subtarget, mode, c - new, i, no_output);
1837	  if (temp)
1838	    {
1839	      if (no_output)
1840		return temp;
1841	      return expand_binop (mode, add_optab, temp, GEN_INT (new),
1842				   target, 0, OPTAB_WIDEN);
1843	    }
1844	}
1845
1846      /* Next try complementing.  */
1847      temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1848      if (temp)
1849	{
1850	  if (no_output)
1851	    return temp;
1852	  return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1853	}
1854
1855      /* Next try to form a constant and do a left shift.  We can do this
1856	 if some low-order bits are zero; the exact_log2 call below tells
1857	 us that information.  The bits we are shifting out could be any
1858	 value, but here we'll just try the 0- and sign-extended forms of
1859	 the constant.  To try to increase the chance of having the same
1860	 constant in more than one insn, start at the highest number of
1861	 bits to shift, but try all possibilities in case a ZAPNOT will
1862	 be useful.  */
1863
1864      bits = exact_log2 (c & -c);
1865      if (bits > 0)
1866	for (; bits > 0; bits--)
1867	  {
1868	    new = c >> bits;
1869	    temp = alpha_emit_set_const (subtarget, mode, new, i, no_output);
1870	    if (!temp && c < 0)
1871	      {
1872		new = (unsigned HOST_WIDE_INT)c >> bits;
1873		temp = alpha_emit_set_const (subtarget, mode, new,
1874					     i, no_output);
1875	      }
1876	    if (temp)
1877	      {
1878		if (no_output)
1879		  return temp;
1880	        return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1881				     target, 0, OPTAB_WIDEN);
1882	      }
1883	  }
1884
1885      /* Now try high-order zero bits.  Here we try the shifted-in bits as
1886	 all zero and all ones.  Be careful to avoid shifting outside the
1887	 mode and to avoid shifting outside the host wide int size.  */
1888      /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1889	 confuse the recursive call and set all of the high 32 bits.  */
1890
1891      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1892	      - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1893      if (bits > 0)
1894	for (; bits > 0; bits--)
1895	  {
1896	    new = c << bits;
1897	    temp = alpha_emit_set_const (subtarget, mode, new, i, no_output);
1898	    if (!temp)
1899	      {
1900		new = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1901	        temp = alpha_emit_set_const (subtarget, mode, new,
1902					     i, no_output);
1903	      }
1904	    if (temp)
1905	      {
1906		if (no_output)
1907		  return temp;
1908		return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1909				     target, 1, OPTAB_WIDEN);
1910	      }
1911	  }
1912
1913      /* Now try high-order 1 bits.  We get that with a sign-extension.
1914	 But one bit isn't enough here.  Be careful to avoid shifting outside
1915	 the mode and to avoid shifting outside the host wide int size.  */
1916
1917      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1918	      - floor_log2 (~ c) - 2);
1919      if (bits > 0)
1920	for (; bits > 0; bits--)
1921	  {
1922	    new = c << bits;
1923	    temp = alpha_emit_set_const (subtarget, mode, new, i, no_output);
1924	    if (!temp)
1925	      {
1926		new = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1927	        temp = alpha_emit_set_const (subtarget, mode, new,
1928					     i, no_output);
1929	      }
1930	    if (temp)
1931	      {
1932		if (no_output)
1933		  return temp;
1934		return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1935				     target, 0, OPTAB_WIDEN);
1936	      }
1937	  }
1938    }
1939
1940#if HOST_BITS_PER_WIDE_INT == 64
1941  /* Finally, see if can load a value into the target that is the same as the
1942     constant except that all bytes that are 0 are changed to be 0xff.  If we
1943     can, then we can do a ZAPNOT to obtain the desired constant.  */
1944
1945  new = c;
1946  for (i = 0; i < 64; i += 8)
1947    if ((new & ((HOST_WIDE_INT) 0xff << i)) == 0)
1948      new |= (HOST_WIDE_INT) 0xff << i;
1949
1950  /* We are only called for SImode and DImode.  If this is SImode, ensure that
1951     we are sign extended to a full word.  */
1952
1953  if (mode == SImode)
1954    new = ((new & 0xffffffff) ^ 0x80000000) - 0x80000000;
1955
1956  if (new != c)
1957    {
1958      temp = alpha_emit_set_const (subtarget, mode, new, n - 1, no_output);
1959      if (temp)
1960	{
1961	  if (no_output)
1962	    return temp;
1963	  return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new),
1964			       target, 0, OPTAB_WIDEN);
1965	}
1966    }
1967#endif
1968
1969  return 0;
1970}
1971
1972/* Try to output insns to set TARGET equal to the constant C if it can be
1973   done in less than N insns.  Do all computations in MODE.  Returns the place
1974   where the output has been placed if it can be done and the insns have been
1975   emitted.  If it would take more than N insns, zero is returned and no
1976   insns and emitted.  */
1977
1978static rtx
1979alpha_emit_set_const (rtx target, enum machine_mode mode,
1980		      HOST_WIDE_INT c, int n, bool no_output)
1981{
1982  enum machine_mode orig_mode = mode;
1983  rtx orig_target = target;
1984  rtx result = 0;
1985  int i;
1986
1987  /* If we can't make any pseudos, TARGET is an SImode hard register, we
1988     can't load this constant in one insn, do this in DImode.  */
1989  if (no_new_pseudos && mode == SImode
1990      && GET_CODE (target) == REG && REGNO (target) < FIRST_PSEUDO_REGISTER)
1991    {
1992      result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1993      if (result)
1994	return result;
1995
1996      target = no_output ? NULL : gen_lowpart (DImode, target);
1997      mode = DImode;
1998    }
1999  else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2000    {
2001      target = no_output ? NULL : gen_lowpart (DImode, target);
2002      mode = DImode;
2003    }
2004
2005  /* Try 1 insn, then 2, then up to N.  */
2006  for (i = 1; i <= n; i++)
2007    {
2008      result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2009      if (result)
2010	{
2011	  rtx insn, set;
2012
2013	  if (no_output)
2014	    return result;
2015
2016	  insn = get_last_insn ();
2017	  set = single_set (insn);
2018	  if (! CONSTANT_P (SET_SRC (set)))
2019	    set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2020	  break;
2021	}
2022    }
2023
2024  /* Allow for the case where we changed the mode of TARGET.  */
2025  if (result)
2026    {
2027      if (result == target)
2028	result = orig_target;
2029      else if (mode != orig_mode)
2030	result = gen_lowpart (orig_mode, result);
2031    }
2032
2033  return result;
2034}
2035
2036/* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2037   fall back to a straight forward decomposition.  We do this to avoid
2038   exponential run times encountered when looking for longer sequences
2039   with alpha_emit_set_const.  */
2040
2041static rtx
2042alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
2043{
2044  HOST_WIDE_INT d1, d2, d3, d4;
2045
2046  /* Decompose the entire word */
2047#if HOST_BITS_PER_WIDE_INT >= 64
2048  gcc_assert (c2 == -(c1 < 0));
2049  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2050  c1 -= d1;
2051  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2052  c1 = (c1 - d2) >> 32;
2053  d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2054  c1 -= d3;
2055  d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2056  gcc_assert (c1 == d4);
2057#else
2058  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2059  c1 -= d1;
2060  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2061  gcc_assert (c1 == d2);
2062  c2 += (d2 < 0);
2063  d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
2064  c2 -= d3;
2065  d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2066  gcc_assert (c2 == d4);
2067#endif
2068
2069  /* Construct the high word */
2070  if (d4)
2071    {
2072      emit_move_insn (target, GEN_INT (d4));
2073      if (d3)
2074	emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2075    }
2076  else
2077    emit_move_insn (target, GEN_INT (d3));
2078
2079  /* Shift it into place */
2080  emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2081
2082  /* Add in the low bits.  */
2083  if (d2)
2084    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2085  if (d1)
2086    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2087
2088  return target;
2089}
2090
2091/* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
2092   the low 64 bits.  */
2093
2094static void
2095alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
2096{
2097  HOST_WIDE_INT i0, i1;
2098
2099  if (GET_CODE (x) == CONST_VECTOR)
2100    x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2101
2102
2103  if (GET_CODE (x) == CONST_INT)
2104    {
2105      i0 = INTVAL (x);
2106      i1 = -(i0 < 0);
2107    }
2108  else if (HOST_BITS_PER_WIDE_INT >= 64)
2109    {
2110      i0 = CONST_DOUBLE_LOW (x);
2111      i1 = -(i0 < 0);
2112    }
2113  else
2114    {
2115      i0 = CONST_DOUBLE_LOW (x);
2116      i1 = CONST_DOUBLE_HIGH (x);
2117    }
2118
2119  *p0 = i0;
2120  *p1 = i1;
2121}
2122
2123/* Implement LEGITIMATE_CONSTANT_P.  This is all constants for which we
2124   are willing to load the value into a register via a move pattern.
2125   Normally this is all symbolic constants, integral constants that
2126   take three or fewer instructions, and floating-point zero.  */
2127
2128bool
2129alpha_legitimate_constant_p (rtx x)
2130{
2131  enum machine_mode mode = GET_MODE (x);
2132  HOST_WIDE_INT i0, i1;
2133
2134  switch (GET_CODE (x))
2135    {
2136    case CONST:
2137    case LABEL_REF:
2138    case HIGH:
2139      return true;
2140
2141    case SYMBOL_REF:
2142      /* TLS symbols are never valid.  */
2143      return SYMBOL_REF_TLS_MODEL (x) == 0;
2144
2145    case CONST_DOUBLE:
2146      if (x == CONST0_RTX (mode))
2147	return true;
2148      if (FLOAT_MODE_P (mode))
2149	return false;
2150      goto do_integer;
2151
2152    case CONST_VECTOR:
2153      if (x == CONST0_RTX (mode))
2154	return true;
2155      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2156	return false;
2157      if (GET_MODE_SIZE (mode) != 8)
2158	return false;
2159      goto do_integer;
2160
2161    case CONST_INT:
2162    do_integer:
2163      if (TARGET_BUILD_CONSTANTS)
2164	return true;
2165      alpha_extract_integer (x, &i0, &i1);
2166      if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
2167        return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
2168      return false;
2169
2170    default:
2171      return false;
2172    }
2173}
2174
2175/* Operand 1 is known to be a constant, and should require more than one
2176   instruction to load.  Emit that multi-part load.  */
2177
2178bool
2179alpha_split_const_mov (enum machine_mode mode, rtx *operands)
2180{
2181  HOST_WIDE_INT i0, i1;
2182  rtx temp = NULL_RTX;
2183
2184  alpha_extract_integer (operands[1], &i0, &i1);
2185
2186  if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2187    temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2188
2189  if (!temp && TARGET_BUILD_CONSTANTS)
2190    temp = alpha_emit_set_long_const (operands[0], i0, i1);
2191
2192  if (temp)
2193    {
2194      if (!rtx_equal_p (operands[0], temp))
2195	emit_move_insn (operands[0], temp);
2196      return true;
2197    }
2198
2199  return false;
2200}
2201
2202/* Expand a move instruction; return true if all work is done.
2203   We don't handle non-bwx subword loads here.  */
2204
2205bool
2206alpha_expand_mov (enum machine_mode mode, rtx *operands)
2207{
2208  /* If the output is not a register, the input must be.  */
2209  if (GET_CODE (operands[0]) == MEM
2210      && ! reg_or_0_operand (operands[1], mode))
2211    operands[1] = force_reg (mode, operands[1]);
2212
2213  /* Allow legitimize_address to perform some simplifications.  */
2214  if (mode == Pmode && symbolic_operand (operands[1], mode))
2215    {
2216      rtx tmp;
2217
2218      tmp = alpha_legitimize_address (operands[1], operands[0], mode);
2219      if (tmp)
2220	{
2221	  if (tmp == operands[0])
2222	    return true;
2223	  operands[1] = tmp;
2224	  return false;
2225	}
2226    }
2227
2228  /* Early out for non-constants and valid constants.  */
2229  if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2230    return false;
2231
2232  /* Split large integers.  */
2233  if (GET_CODE (operands[1]) == CONST_INT
2234      || GET_CODE (operands[1]) == CONST_DOUBLE
2235      || GET_CODE (operands[1]) == CONST_VECTOR)
2236    {
2237      if (alpha_split_const_mov (mode, operands))
2238	return true;
2239    }
2240
2241  /* Otherwise we've nothing left but to drop the thing to memory.  */
2242  operands[1] = force_const_mem (mode, operands[1]);
2243  if (reload_in_progress)
2244    {
2245      emit_move_insn (operands[0], XEXP (operands[1], 0));
2246      operands[1] = copy_rtx (operands[1]);
2247      XEXP (operands[1], 0) = operands[0];
2248    }
2249  else
2250    operands[1] = validize_mem (operands[1]);
2251  return false;
2252}
2253
2254/* Expand a non-bwx QImode or HImode move instruction;
2255   return true if all work is done.  */
2256
2257bool
2258alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
2259{
2260  /* If the output is not a register, the input must be.  */
2261  if (GET_CODE (operands[0]) == MEM)
2262    operands[1] = force_reg (mode, operands[1]);
2263
2264  /* Handle four memory cases, unaligned and aligned for either the input
2265     or the output.  The only case where we can be called during reload is
2266     for aligned loads; all other cases require temporaries.  */
2267
2268  if (GET_CODE (operands[1]) == MEM
2269      || (GET_CODE (operands[1]) == SUBREG
2270	  && GET_CODE (SUBREG_REG (operands[1])) == MEM)
2271      || (reload_in_progress && GET_CODE (operands[1]) == REG
2272	  && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER)
2273      || (reload_in_progress && GET_CODE (operands[1]) == SUBREG
2274	  && GET_CODE (SUBREG_REG (operands[1])) == REG
2275	  && REGNO (SUBREG_REG (operands[1])) >= FIRST_PSEUDO_REGISTER))
2276    {
2277      if (aligned_memory_operand (operands[1], mode))
2278	{
2279	  if (reload_in_progress)
2280	    {
2281	      emit_insn ((mode == QImode
2282			  ? gen_reload_inqi_help
2283			  : gen_reload_inhi_help)
2284		         (operands[0], operands[1],
2285			  gen_rtx_REG (SImode, REGNO (operands[0]))));
2286	    }
2287	  else
2288	    {
2289	      rtx aligned_mem, bitnum;
2290	      rtx scratch = gen_reg_rtx (SImode);
2291	      rtx subtarget;
2292	      bool copyout;
2293
2294	      get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2295
2296	      subtarget = operands[0];
2297	      if (GET_CODE (subtarget) == REG)
2298		subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2299	      else
2300		subtarget = gen_reg_rtx (DImode), copyout = true;
2301
2302	      emit_insn ((mode == QImode
2303			  ? gen_aligned_loadqi
2304			  : gen_aligned_loadhi)
2305			 (subtarget, aligned_mem, bitnum, scratch));
2306
2307	      if (copyout)
2308		emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2309	    }
2310	}
2311      else
2312	{
2313	  /* Don't pass these as parameters since that makes the generated
2314	     code depend on parameter evaluation order which will cause
2315	     bootstrap failures.  */
2316
2317	  rtx temp1, temp2, seq, subtarget;
2318	  bool copyout;
2319
2320	  temp1 = gen_reg_rtx (DImode);
2321	  temp2 = gen_reg_rtx (DImode);
2322
2323	  subtarget = operands[0];
2324	  if (GET_CODE (subtarget) == REG)
2325	    subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2326	  else
2327	    subtarget = gen_reg_rtx (DImode), copyout = true;
2328
2329	  seq = ((mode == QImode
2330		  ? gen_unaligned_loadqi
2331		  : gen_unaligned_loadhi)
2332		 (subtarget, get_unaligned_address (operands[1]),
2333		  temp1, temp2));
2334	  alpha_set_memflags (seq, operands[1]);
2335	  emit_insn (seq);
2336
2337	  if (copyout)
2338	    emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2339	}
2340      return true;
2341    }
2342
2343  if (GET_CODE (operands[0]) == MEM
2344      || (GET_CODE (operands[0]) == SUBREG
2345	  && GET_CODE (SUBREG_REG (operands[0])) == MEM)
2346      || (reload_in_progress && GET_CODE (operands[0]) == REG
2347	  && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)
2348      || (reload_in_progress && GET_CODE (operands[0]) == SUBREG
2349	  && GET_CODE (SUBREG_REG (operands[0])) == REG
2350	  && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
2351    {
2352      if (aligned_memory_operand (operands[0], mode))
2353	{
2354	  rtx aligned_mem, bitnum;
2355	  rtx temp1 = gen_reg_rtx (SImode);
2356	  rtx temp2 = gen_reg_rtx (SImode);
2357
2358	  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2359
2360	  emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2361					temp1, temp2));
2362	}
2363      else
2364	{
2365	  rtx temp1 = gen_reg_rtx (DImode);
2366	  rtx temp2 = gen_reg_rtx (DImode);
2367	  rtx temp3 = gen_reg_rtx (DImode);
2368	  rtx seq = ((mode == QImode
2369		      ? gen_unaligned_storeqi
2370		      : gen_unaligned_storehi)
2371		     (get_unaligned_address (operands[0]),
2372		      operands[1], temp1, temp2, temp3));
2373
2374	  alpha_set_memflags (seq, operands[0]);
2375	  emit_insn (seq);
2376	}
2377      return true;
2378    }
2379
2380  return false;
2381}
2382
2383/* Implement the movmisalign patterns.  One of the operands is a memory
2384   that is not naturally aligned.  Emit instructions to load it.  */
2385
2386void
2387alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
2388{
2389  /* Honor misaligned loads, for those we promised to do so.  */
2390  if (MEM_P (operands[1]))
2391    {
2392      rtx tmp;
2393
2394      if (register_operand (operands[0], mode))
2395	tmp = operands[0];
2396      else
2397	tmp = gen_reg_rtx (mode);
2398
2399      alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2400      if (tmp != operands[0])
2401	emit_move_insn (operands[0], tmp);
2402    }
2403  else if (MEM_P (operands[0]))
2404    {
2405      if (!reg_or_0_operand (operands[1], mode))
2406	operands[1] = force_reg (mode, operands[1]);
2407      alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2408    }
2409  else
2410    gcc_unreachable ();
2411}
2412
2413/* Generate an unsigned DImode to FP conversion.  This is the same code
2414   optabs would emit if we didn't have TFmode patterns.
2415
2416   For SFmode, this is the only construction I've found that can pass
2417   gcc.c-torture/execute/ieee/rbug.c.  No scenario that uses DFmode
2418   intermediates will work, because you'll get intermediate rounding
2419   that ruins the end result.  Some of this could be fixed by turning
2420   on round-to-positive-infinity, but that requires diddling the fpsr,
2421   which kills performance.  I tried turning this around and converting
2422   to a negative number, so that I could turn on /m, but either I did
2423   it wrong or there's something else cause I wound up with the exact
2424   same single-bit error.  There is a branch-less form of this same code:
2425
2426	srl     $16,1,$1
2427	and     $16,1,$2
2428	cmplt   $16,0,$3
2429	or      $1,$2,$2
2430	cmovge  $16,$16,$2
2431	itoft	$3,$f10
2432	itoft	$2,$f11
2433	cvtqs   $f11,$f11
2434	adds    $f11,$f11,$f0
2435	fcmoveq $f10,$f11,$f0
2436
2437   I'm not using it because it's the same number of instructions as
2438   this branch-full form, and it has more serialized long latency
2439   instructions on the critical path.
2440
2441   For DFmode, we can avoid rounding errors by breaking up the word
2442   into two pieces, converting them separately, and adding them back:
2443
2444   LC0: .long 0,0x5f800000
2445
2446	itoft	$16,$f11
2447	lda	$2,LC0
2448	cmplt	$16,0,$1
2449	cpyse	$f11,$f31,$f10
2450	cpyse	$f31,$f11,$f11
2451	s4addq	$1,$2,$1
2452	lds	$f12,0($1)
2453	cvtqt	$f10,$f10
2454	cvtqt	$f11,$f11
2455	addt	$f12,$f10,$f0
2456	addt	$f0,$f11,$f0
2457
2458   This doesn't seem to be a clear-cut win over the optabs form.
2459   It probably all depends on the distribution of numbers being
2460   converted -- in the optabs form, all but high-bit-set has a
2461   much lower minimum execution time.  */
2462
2463void
2464alpha_emit_floatuns (rtx operands[2])
2465{
2466  rtx neglab, donelab, i0, i1, f0, in, out;
2467  enum machine_mode mode;
2468
2469  out = operands[0];
2470  in = force_reg (DImode, operands[1]);
2471  mode = GET_MODE (out);
2472  neglab = gen_label_rtx ();
2473  donelab = gen_label_rtx ();
2474  i0 = gen_reg_rtx (DImode);
2475  i1 = gen_reg_rtx (DImode);
2476  f0 = gen_reg_rtx (mode);
2477
2478  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2479
2480  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2481  emit_jump_insn (gen_jump (donelab));
2482  emit_barrier ();
2483
2484  emit_label (neglab);
2485
2486  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2487  emit_insn (gen_anddi3 (i1, in, const1_rtx));
2488  emit_insn (gen_iordi3 (i0, i0, i1));
2489  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2490  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2491
2492  emit_label (donelab);
2493}
2494
2495/* Generate the comparison for a conditional branch.  */
2496
2497rtx
2498alpha_emit_conditional_branch (enum rtx_code code)
2499{
2500  enum rtx_code cmp_code, branch_code;
2501  enum machine_mode cmp_mode, branch_mode = VOIDmode;
2502  rtx op0 = alpha_compare.op0, op1 = alpha_compare.op1;
2503  rtx tem;
2504
2505  if (alpha_compare.fp_p && GET_MODE (op0) == TFmode)
2506    {
2507      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2508      op1 = const0_rtx;
2509      alpha_compare.fp_p = 0;
2510    }
2511
2512  /* The general case: fold the comparison code to the types of compares
2513     that we have, choosing the branch as necessary.  */
2514  switch (code)
2515    {
2516    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2517    case UNORDERED:
2518      /* We have these compares: */
2519      cmp_code = code, branch_code = NE;
2520      break;
2521
2522    case NE:
2523    case ORDERED:
2524      /* These must be reversed.  */
2525      cmp_code = reverse_condition (code), branch_code = EQ;
2526      break;
2527
2528    case GE:  case GT: case GEU:  case GTU:
2529      /* For FP, we swap them, for INT, we reverse them.  */
2530      if (alpha_compare.fp_p)
2531	{
2532	  cmp_code = swap_condition (code);
2533	  branch_code = NE;
2534	  tem = op0, op0 = op1, op1 = tem;
2535	}
2536      else
2537	{
2538	  cmp_code = reverse_condition (code);
2539	  branch_code = EQ;
2540	}
2541      break;
2542
2543    default:
2544      gcc_unreachable ();
2545    }
2546
2547  if (alpha_compare.fp_p)
2548    {
2549      cmp_mode = DFmode;
2550      if (flag_unsafe_math_optimizations)
2551	{
2552	  /* When we are not as concerned about non-finite values, and we
2553	     are comparing against zero, we can branch directly.  */
2554	  if (op1 == CONST0_RTX (DFmode))
2555	    cmp_code = UNKNOWN, branch_code = code;
2556	  else if (op0 == CONST0_RTX (DFmode))
2557	    {
2558	      /* Undo the swap we probably did just above.  */
2559	      tem = op0, op0 = op1, op1 = tem;
2560	      branch_code = swap_condition (cmp_code);
2561	      cmp_code = UNKNOWN;
2562	    }
2563	}
2564      else
2565	{
2566	  /* ??? We mark the branch mode to be CCmode to prevent the
2567	     compare and branch from being combined, since the compare
2568	     insn follows IEEE rules that the branch does not.  */
2569	  branch_mode = CCmode;
2570	}
2571    }
2572  else
2573    {
2574      cmp_mode = DImode;
2575
2576      /* The following optimizations are only for signed compares.  */
2577      if (code != LEU && code != LTU && code != GEU && code != GTU)
2578	{
2579	  /* Whee.  Compare and branch against 0 directly.  */
2580	  if (op1 == const0_rtx)
2581	    cmp_code = UNKNOWN, branch_code = code;
2582
2583	  /* If the constants doesn't fit into an immediate, but can
2584 	     be generated by lda/ldah, we adjust the argument and
2585 	     compare against zero, so we can use beq/bne directly.  */
2586	  /* ??? Don't do this when comparing against symbols, otherwise
2587	     we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2588	     be declared false out of hand (at least for non-weak).  */
2589	  else if (GET_CODE (op1) == CONST_INT
2590		   && (code == EQ || code == NE)
2591		   && !(symbolic_operand (op0, VOIDmode)
2592			|| (GET_CODE (op0) == REG && REG_POINTER (op0))))
2593	    {
2594	      HOST_WIDE_INT v = INTVAL (op1), n = -v;
2595
2596	      if (! CONST_OK_FOR_LETTER_P (v, 'I')
2597		  && (CONST_OK_FOR_LETTER_P (n, 'K')
2598		      || CONST_OK_FOR_LETTER_P (n, 'L')))
2599		{
2600		  cmp_code = PLUS, branch_code = code;
2601		  op1 = GEN_INT (n);
2602		}
2603	    }
2604	}
2605
2606      if (!reg_or_0_operand (op0, DImode))
2607	op0 = force_reg (DImode, op0);
2608      if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2609	op1 = force_reg (DImode, op1);
2610    }
2611
2612  /* Emit an initial compare instruction, if necessary.  */
2613  tem = op0;
2614  if (cmp_code != UNKNOWN)
2615    {
2616      tem = gen_reg_rtx (cmp_mode);
2617      emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2618    }
2619
2620  /* Zero the operands.  */
2621  memset (&alpha_compare, 0, sizeof (alpha_compare));
2622
2623  /* Return the branch comparison.  */
2624  return gen_rtx_fmt_ee (branch_code, branch_mode, tem, CONST0_RTX (cmp_mode));
2625}
2626
2627/* Certain simplifications can be done to make invalid setcc operations
2628   valid.  Return the final comparison, or NULL if we can't work.  */
2629
2630rtx
2631alpha_emit_setcc (enum rtx_code code)
2632{
2633  enum rtx_code cmp_code;
2634  rtx op0 = alpha_compare.op0, op1 = alpha_compare.op1;
2635  int fp_p = alpha_compare.fp_p;
2636  rtx tmp;
2637
2638  /* Zero the operands.  */
2639  memset (&alpha_compare, 0, sizeof (alpha_compare));
2640
2641  if (fp_p && GET_MODE (op0) == TFmode)
2642    {
2643      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2644      op1 = const0_rtx;
2645      fp_p = 0;
2646    }
2647
2648  if (fp_p && !TARGET_FIX)
2649    return NULL_RTX;
2650
2651  /* The general case: fold the comparison code to the types of compares
2652     that we have, choosing the branch as necessary.  */
2653
2654  cmp_code = UNKNOWN;
2655  switch (code)
2656    {
2657    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2658    case UNORDERED:
2659      /* We have these compares.  */
2660      if (fp_p)
2661	cmp_code = code, code = NE;
2662      break;
2663
2664    case NE:
2665      if (!fp_p && op1 == const0_rtx)
2666	break;
2667      /* FALLTHRU */
2668
2669    case ORDERED:
2670      cmp_code = reverse_condition (code);
2671      code = EQ;
2672      break;
2673
2674    case GE:  case GT: case GEU:  case GTU:
2675      /* These normally need swapping, but for integer zero we have
2676	 special patterns that recognize swapped operands.  */
2677      if (!fp_p && op1 == const0_rtx)
2678	break;
2679      code = swap_condition (code);
2680      if (fp_p)
2681	cmp_code = code, code = NE;
2682      tmp = op0, op0 = op1, op1 = tmp;
2683      break;
2684
2685    default:
2686      gcc_unreachable ();
2687    }
2688
2689  if (!fp_p)
2690    {
2691      if (!register_operand (op0, DImode))
2692	op0 = force_reg (DImode, op0);
2693      if (!reg_or_8bit_operand (op1, DImode))
2694	op1 = force_reg (DImode, op1);
2695    }
2696
2697  /* Emit an initial compare instruction, if necessary.  */
2698  if (cmp_code != UNKNOWN)
2699    {
2700      enum machine_mode mode = fp_p ? DFmode : DImode;
2701
2702      tmp = gen_reg_rtx (mode);
2703      emit_insn (gen_rtx_SET (VOIDmode, tmp,
2704			      gen_rtx_fmt_ee (cmp_code, mode, op0, op1)));
2705
2706      op0 = fp_p ? gen_lowpart (DImode, tmp) : tmp;
2707      op1 = const0_rtx;
2708    }
2709
2710  /* Return the setcc comparison.  */
2711  return gen_rtx_fmt_ee (code, DImode, op0, op1);
2712}
2713
2714
2715/* Rewrite a comparison against zero CMP of the form
2716   (CODE (cc0) (const_int 0)) so it can be written validly in
2717   a conditional move (if_then_else CMP ...).
2718   If both of the operands that set cc0 are nonzero we must emit
2719   an insn to perform the compare (it can't be done within
2720   the conditional move).  */
2721
2722rtx
2723alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
2724{
2725  enum rtx_code code = GET_CODE (cmp);
2726  enum rtx_code cmov_code = NE;
2727  rtx op0 = alpha_compare.op0;
2728  rtx op1 = alpha_compare.op1;
2729  int fp_p = alpha_compare.fp_p;
2730  enum machine_mode cmp_mode
2731    = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2732  enum machine_mode cmp_op_mode = fp_p ? DFmode : DImode;
2733  enum machine_mode cmov_mode = VOIDmode;
2734  int local_fast_math = flag_unsafe_math_optimizations;
2735  rtx tem;
2736
2737  /* Zero the operands.  */
2738  memset (&alpha_compare, 0, sizeof (alpha_compare));
2739
2740  if (fp_p != FLOAT_MODE_P (mode))
2741    {
2742      enum rtx_code cmp_code;
2743
2744      if (! TARGET_FIX)
2745	return 0;
2746
2747      /* If we have fp<->int register move instructions, do a cmov by
2748	 performing the comparison in fp registers, and move the
2749	 zero/nonzero value to integer registers, where we can then
2750	 use a normal cmov, or vice-versa.  */
2751
2752      switch (code)
2753	{
2754	case EQ: case LE: case LT: case LEU: case LTU:
2755	  /* We have these compares.  */
2756	  cmp_code = code, code = NE;
2757	  break;
2758
2759	case NE:
2760	  /* This must be reversed.  */
2761	  cmp_code = EQ, code = EQ;
2762	  break;
2763
2764	case GE: case GT: case GEU: case GTU:
2765	  /* These normally need swapping, but for integer zero we have
2766	     special patterns that recognize swapped operands.  */
2767	  if (!fp_p && op1 == const0_rtx)
2768	    cmp_code = code, code = NE;
2769	  else
2770	    {
2771	      cmp_code = swap_condition (code);
2772	      code = NE;
2773	      tem = op0, op0 = op1, op1 = tem;
2774	    }
2775	  break;
2776
2777	default:
2778	  gcc_unreachable ();
2779	}
2780
2781      tem = gen_reg_rtx (cmp_op_mode);
2782      emit_insn (gen_rtx_SET (VOIDmode, tem,
2783			      gen_rtx_fmt_ee (cmp_code, cmp_op_mode,
2784					      op0, op1)));
2785
2786      cmp_mode = cmp_op_mode = fp_p ? DImode : DFmode;
2787      op0 = gen_lowpart (cmp_op_mode, tem);
2788      op1 = CONST0_RTX (cmp_op_mode);
2789      fp_p = !fp_p;
2790      local_fast_math = 1;
2791    }
2792
2793  /* We may be able to use a conditional move directly.
2794     This avoids emitting spurious compares.  */
2795  if (signed_comparison_operator (cmp, VOIDmode)
2796      && (!fp_p || local_fast_math)
2797      && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2798    return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2799
2800  /* We can't put the comparison inside the conditional move;
2801     emit a compare instruction and put that inside the
2802     conditional move.  Make sure we emit only comparisons we have;
2803     swap or reverse as necessary.  */
2804
2805  if (no_new_pseudos)
2806    return NULL_RTX;
2807
2808  switch (code)
2809    {
2810    case EQ:  case LE:  case LT:  case LEU:  case LTU:
2811      /* We have these compares: */
2812      break;
2813
2814    case NE:
2815      /* This must be reversed.  */
2816      code = reverse_condition (code);
2817      cmov_code = EQ;
2818      break;
2819
2820    case GE:  case GT:  case GEU:  case GTU:
2821      /* These must be swapped.  */
2822      if (op1 != CONST0_RTX (cmp_mode))
2823	{
2824	  code = swap_condition (code);
2825	  tem = op0, op0 = op1, op1 = tem;
2826	}
2827      break;
2828
2829    default:
2830      gcc_unreachable ();
2831    }
2832
2833  if (!fp_p)
2834    {
2835      if (!reg_or_0_operand (op0, DImode))
2836	op0 = force_reg (DImode, op0);
2837      if (!reg_or_8bit_operand (op1, DImode))
2838	op1 = force_reg (DImode, op1);
2839    }
2840
2841  /* ??? We mark the branch mode to be CCmode to prevent the compare
2842     and cmov from being combined, since the compare insn follows IEEE
2843     rules that the cmov does not.  */
2844  if (fp_p && !local_fast_math)
2845    cmov_mode = CCmode;
2846
2847  tem = gen_reg_rtx (cmp_op_mode);
2848  emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_op_mode, op0, op1));
2849  return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_op_mode));
2850}
2851
2852/* Simplify a conditional move of two constants into a setcc with
2853   arithmetic.  This is done with a splitter since combine would
2854   just undo the work if done during code generation.  It also catches
2855   cases we wouldn't have before cse.  */
2856
2857int
2858alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2859			      rtx t_rtx, rtx f_rtx)
2860{
2861  HOST_WIDE_INT t, f, diff;
2862  enum machine_mode mode;
2863  rtx target, subtarget, tmp;
2864
2865  mode = GET_MODE (dest);
2866  t = INTVAL (t_rtx);
2867  f = INTVAL (f_rtx);
2868  diff = t - f;
2869
2870  if (((code == NE || code == EQ) && diff < 0)
2871      || (code == GE || code == GT))
2872    {
2873      code = reverse_condition (code);
2874      diff = t, t = f, f = diff;
2875      diff = t - f;
2876    }
2877
2878  subtarget = target = dest;
2879  if (mode != DImode)
2880    {
2881      target = gen_lowpart (DImode, dest);
2882      if (! no_new_pseudos)
2883        subtarget = gen_reg_rtx (DImode);
2884      else
2885	subtarget = target;
2886    }
2887  /* Below, we must be careful to use copy_rtx on target and subtarget
2888     in intermediate insns, as they may be a subreg rtx, which may not
2889     be shared.  */
2890
2891  if (f == 0 && exact_log2 (diff) > 0
2892      /* On EV6, we've got enough shifters to make non-arithmetic shifts
2893	 viable over a longer latency cmove.  On EV5, the E0 slot is a
2894	 scarce resource, and on EV4 shift has the same latency as a cmove.  */
2895      && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2896    {
2897      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2898      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2899
2900      tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2901			    GEN_INT (exact_log2 (t)));
2902      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2903    }
2904  else if (f == 0 && t == -1)
2905    {
2906      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2907      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2908
2909      emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2910    }
2911  else if (diff == 1 || diff == 4 || diff == 8)
2912    {
2913      rtx add_op;
2914
2915      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2916      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2917
2918      if (diff == 1)
2919	emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2920      else
2921	{
2922	  add_op = GEN_INT (f);
2923	  if (sext_add_operand (add_op, mode))
2924	    {
2925	      tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2926				  GEN_INT (diff));
2927	      tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2928	      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2929	    }
2930	  else
2931	    return 0;
2932	}
2933    }
2934  else
2935    return 0;
2936
2937  return 1;
2938}
2939
2940/* Look up the function X_floating library function name for the
2941   given operation.  */
2942
2943struct xfloating_op GTY(())
2944{
2945  const enum rtx_code code;
2946  const char *const GTY((skip)) osf_func;
2947  const char *const GTY((skip)) vms_func;
2948  rtx libcall;
2949};
2950
2951static GTY(()) struct xfloating_op xfloating_ops[] =
2952{
2953  { PLUS,		"_OtsAddX", "OTS$ADD_X", 0 },
2954  { MINUS,		"_OtsSubX", "OTS$SUB_X", 0 },
2955  { MULT,		"_OtsMulX", "OTS$MUL_X", 0 },
2956  { DIV,		"_OtsDivX", "OTS$DIV_X", 0 },
2957  { EQ,			"_OtsEqlX", "OTS$EQL_X", 0 },
2958  { NE,			"_OtsNeqX", "OTS$NEQ_X", 0 },
2959  { LT,			"_OtsLssX", "OTS$LSS_X", 0 },
2960  { LE,			"_OtsLeqX", "OTS$LEQ_X", 0 },
2961  { GT,			"_OtsGtrX", "OTS$GTR_X", 0 },
2962  { GE,			"_OtsGeqX", "OTS$GEQ_X", 0 },
2963  { FIX,		"_OtsCvtXQ", "OTS$CVTXQ", 0 },
2964  { FLOAT,		"_OtsCvtQX", "OTS$CVTQX", 0 },
2965  { UNSIGNED_FLOAT,	"_OtsCvtQUX", "OTS$CVTQUX", 0 },
2966  { FLOAT_EXTEND,	"_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2967  { FLOAT_TRUNCATE,	"_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2968};
2969
2970static GTY(()) struct xfloating_op vax_cvt_ops[] =
2971{
2972  { FLOAT_EXTEND,	"_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2973  { FLOAT_TRUNCATE,	"_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2974};
2975
2976static rtx
2977alpha_lookup_xfloating_lib_func (enum rtx_code code)
2978{
2979  struct xfloating_op *ops = xfloating_ops;
2980  long n = ARRAY_SIZE (xfloating_ops);
2981  long i;
2982
2983  gcc_assert (TARGET_HAS_XFLOATING_LIBS);
2984
2985  /* How irritating.  Nothing to key off for the main table.  */
2986  if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
2987    {
2988      ops = vax_cvt_ops;
2989      n = ARRAY_SIZE (vax_cvt_ops);
2990    }
2991
2992  for (i = 0; i < n; ++i, ++ops)
2993    if (ops->code == code)
2994      {
2995	rtx func = ops->libcall;
2996	if (!func)
2997	  {
2998	    func = init_one_libfunc (TARGET_ABI_OPEN_VMS
2999				     ? ops->vms_func : ops->osf_func);
3000	    ops->libcall = func;
3001	  }
3002        return func;
3003      }
3004
3005  gcc_unreachable ();
3006}
3007
3008/* Most X_floating operations take the rounding mode as an argument.
3009   Compute that here.  */
3010
3011static int
3012alpha_compute_xfloating_mode_arg (enum rtx_code code,
3013				  enum alpha_fp_rounding_mode round)
3014{
3015  int mode;
3016
3017  switch (round)
3018    {
3019    case ALPHA_FPRM_NORM:
3020      mode = 2;
3021      break;
3022    case ALPHA_FPRM_MINF:
3023      mode = 1;
3024      break;
3025    case ALPHA_FPRM_CHOP:
3026      mode = 0;
3027      break;
3028    case ALPHA_FPRM_DYN:
3029      mode = 4;
3030      break;
3031    default:
3032      gcc_unreachable ();
3033
3034    /* XXX For reference, round to +inf is mode = 3.  */
3035    }
3036
3037  if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3038    mode |= 0x10000;
3039
3040  return mode;
3041}
3042
3043/* Emit an X_floating library function call.
3044
3045   Note that these functions do not follow normal calling conventions:
3046   TFmode arguments are passed in two integer registers (as opposed to
3047   indirect); TFmode return values appear in R16+R17.
3048
3049   FUNC is the function to call.
3050   TARGET is where the output belongs.
3051   OPERANDS are the inputs.
3052   NOPERANDS is the count of inputs.
3053   EQUIV is the expression equivalent for the function.
3054*/
3055
3056static void
3057alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3058			      int noperands, rtx equiv)
3059{
3060  rtx usage = NULL_RTX, tmp, reg;
3061  int regno = 16, i;
3062
3063  start_sequence ();
3064
3065  for (i = 0; i < noperands; ++i)
3066    {
3067      switch (GET_MODE (operands[i]))
3068	{
3069	case TFmode:
3070	  reg = gen_rtx_REG (TFmode, regno);
3071	  regno += 2;
3072	  break;
3073
3074	case DFmode:
3075	  reg = gen_rtx_REG (DFmode, regno + 32);
3076	  regno += 1;
3077	  break;
3078
3079	case VOIDmode:
3080	  gcc_assert (GET_CODE (operands[i]) == CONST_INT);
3081	  /* FALLTHRU */
3082	case DImode:
3083	  reg = gen_rtx_REG (DImode, regno);
3084	  regno += 1;
3085	  break;
3086
3087	default:
3088	  gcc_unreachable ();
3089	}
3090
3091      emit_move_insn (reg, operands[i]);
3092      usage = alloc_EXPR_LIST (0, gen_rtx_USE (VOIDmode, reg), usage);
3093    }
3094
3095  switch (GET_MODE (target))
3096    {
3097    case TFmode:
3098      reg = gen_rtx_REG (TFmode, 16);
3099      break;
3100    case DFmode:
3101      reg = gen_rtx_REG (DFmode, 32);
3102      break;
3103    case DImode:
3104      reg = gen_rtx_REG (DImode, 0);
3105      break;
3106    default:
3107      gcc_unreachable ();
3108    }
3109
3110  tmp = gen_rtx_MEM (QImode, func);
3111  tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
3112					const0_rtx, const0_rtx));
3113  CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3114  CONST_OR_PURE_CALL_P (tmp) = 1;
3115
3116  tmp = get_insns ();
3117  end_sequence ();
3118
3119  emit_libcall_block (tmp, target, reg, equiv);
3120}
3121
3122/* Emit an X_floating library function call for arithmetic (+,-,*,/).  */
3123
3124void
3125alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3126{
3127  rtx func;
3128  int mode;
3129  rtx out_operands[3];
3130
3131  func = alpha_lookup_xfloating_lib_func (code);
3132  mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3133
3134  out_operands[0] = operands[1];
3135  out_operands[1] = operands[2];
3136  out_operands[2] = GEN_INT (mode);
3137  alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3138				gen_rtx_fmt_ee (code, TFmode, operands[1],
3139						operands[2]));
3140}
3141
3142/* Emit an X_floating library function call for a comparison.  */
3143
3144static rtx
3145alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3146{
3147  enum rtx_code cmp_code, res_code;
3148  rtx func, out, operands[2];
3149
3150  /* X_floating library comparison functions return
3151	   -1  unordered
3152	    0  false
3153	    1  true
3154     Convert the compare against the raw return value.  */
3155
3156  cmp_code = *pcode;
3157  switch (cmp_code)
3158    {
3159    case UNORDERED:
3160      cmp_code = EQ;
3161      res_code = LT;
3162      break;
3163    case ORDERED:
3164      cmp_code = EQ;
3165      res_code = GE;
3166      break;
3167    case NE:
3168      res_code = NE;
3169      break;
3170    case EQ:
3171    case LT:
3172    case GT:
3173    case LE:
3174    case GE:
3175      res_code = GT;
3176      break;
3177    default:
3178      gcc_unreachable ();
3179    }
3180  *pcode = res_code;
3181
3182  func = alpha_lookup_xfloating_lib_func (cmp_code);
3183
3184  operands[0] = op0;
3185  operands[1] = op1;
3186  out = gen_reg_rtx (DImode);
3187
3188  /* ??? Strange mode for equiv because what's actually returned
3189     is -1,0,1, not a proper boolean value.  */
3190  alpha_emit_xfloating_libcall (func, out, operands, 2,
3191				gen_rtx_fmt_ee (cmp_code, CCmode, op0, op1));
3192
3193  return out;
3194}
3195
3196/* Emit an X_floating library function call for a conversion.  */
3197
3198void
3199alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3200{
3201  int noperands = 1, mode;
3202  rtx out_operands[2];
3203  rtx func;
3204  enum rtx_code code = orig_code;
3205
3206  if (code == UNSIGNED_FIX)
3207    code = FIX;
3208
3209  func = alpha_lookup_xfloating_lib_func (code);
3210
3211  out_operands[0] = operands[1];
3212
3213  switch (code)
3214    {
3215    case FIX:
3216      mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3217      out_operands[1] = GEN_INT (mode);
3218      noperands = 2;
3219      break;
3220    case FLOAT_TRUNCATE:
3221      mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3222      out_operands[1] = GEN_INT (mode);
3223      noperands = 2;
3224      break;
3225    default:
3226      break;
3227    }
3228
3229  alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3230				gen_rtx_fmt_e (orig_code,
3231					       GET_MODE (operands[0]),
3232					       operands[1]));
3233}
3234
3235/* Split a TFmode OP[1] into DImode OP[2,3] and likewise for
3236   OP[0] into OP[0,1].  Naturally, output operand ordering is
3237   little-endian.  */
3238
3239void
3240alpha_split_tfmode_pair (rtx operands[4])
3241{
3242  switch (GET_CODE (operands[1]))
3243    {
3244    case REG:
3245      operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3246      operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3247      break;
3248
3249    case MEM:
3250      operands[3] = adjust_address (operands[1], DImode, 8);
3251      operands[2] = adjust_address (operands[1], DImode, 0);
3252      break;
3253
3254    case CONST_DOUBLE:
3255      gcc_assert (operands[1] == CONST0_RTX (TFmode));
3256      operands[2] = operands[3] = const0_rtx;
3257      break;
3258
3259    default:
3260      gcc_unreachable ();
3261    }
3262
3263  switch (GET_CODE (operands[0]))
3264    {
3265    case REG:
3266      operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3267      operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3268      break;
3269
3270    case MEM:
3271      operands[1] = adjust_address (operands[0], DImode, 8);
3272      operands[0] = adjust_address (operands[0], DImode, 0);
3273      break;
3274
3275    default:
3276      gcc_unreachable ();
3277    }
3278}
3279
3280/* Implement negtf2 or abstf2.  Op0 is destination, op1 is source,
3281   op2 is a register containing the sign bit, operation is the
3282   logical operation to be performed.  */
3283
3284void
3285alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3286{
3287  rtx high_bit = operands[2];
3288  rtx scratch;
3289  int move;
3290
3291  alpha_split_tfmode_pair (operands);
3292
3293  /* Detect three flavors of operand overlap.  */
3294  move = 1;
3295  if (rtx_equal_p (operands[0], operands[2]))
3296    move = 0;
3297  else if (rtx_equal_p (operands[1], operands[2]))
3298    {
3299      if (rtx_equal_p (operands[0], high_bit))
3300	move = 2;
3301      else
3302	move = -1;
3303    }
3304
3305  if (move < 0)
3306    emit_move_insn (operands[0], operands[2]);
3307
3308  /* ??? If the destination overlaps both source tf and high_bit, then
3309     assume source tf is dead in its entirety and use the other half
3310     for a scratch register.  Otherwise "scratch" is just the proper
3311     destination register.  */
3312  scratch = operands[move < 2 ? 1 : 3];
3313
3314  emit_insn ((*operation) (scratch, high_bit, operands[3]));
3315
3316  if (move > 0)
3317    {
3318      emit_move_insn (operands[0], operands[2]);
3319      if (move > 1)
3320	emit_move_insn (operands[1], scratch);
3321    }
3322}
3323
3324/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3325   unaligned data:
3326
3327           unsigned:                       signed:
3328   word:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
3329           ldq_u  r2,X+1(r11)              ldq_u  r2,X+1(r11)
3330           lda    r3,X(r11)                lda    r3,X+2(r11)
3331           extwl  r1,r3,r1                 extql  r1,r3,r1
3332           extwh  r2,r3,r2                 extqh  r2,r3,r2
3333           or     r1.r2.r1                 or     r1,r2,r1
3334                                           sra    r1,48,r1
3335
3336   long:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
3337           ldq_u  r2,X+3(r11)              ldq_u  r2,X+3(r11)
3338           lda    r3,X(r11)                lda    r3,X(r11)
3339           extll  r1,r3,r1                 extll  r1,r3,r1
3340           extlh  r2,r3,r2                 extlh  r2,r3,r2
3341           or     r1.r2.r1                 addl   r1,r2,r1
3342
3343   quad:   ldq_u  r1,X(r11)
3344           ldq_u  r2,X+7(r11)
3345           lda    r3,X(r11)
3346           extql  r1,r3,r1
3347           extqh  r2,r3,r2
3348           or     r1.r2.r1
3349*/
3350
3351void
3352alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3353			     HOST_WIDE_INT ofs, int sign)
3354{
3355  rtx meml, memh, addr, extl, exth, tmp, mema;
3356  enum machine_mode mode;
3357
3358  if (TARGET_BWX && size == 2)
3359    {
3360      meml = adjust_address (mem, QImode, ofs);
3361      memh = adjust_address (mem, QImode, ofs+1);
3362      if (BYTES_BIG_ENDIAN)
3363	tmp = meml, meml = memh, memh = tmp;
3364      extl = gen_reg_rtx (DImode);
3365      exth = gen_reg_rtx (DImode);
3366      emit_insn (gen_zero_extendqidi2 (extl, meml));
3367      emit_insn (gen_zero_extendqidi2 (exth, memh));
3368      exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3369				  NULL, 1, OPTAB_LIB_WIDEN);
3370      addr = expand_simple_binop (DImode, IOR, extl, exth,
3371				  NULL, 1, OPTAB_LIB_WIDEN);
3372
3373      if (sign && GET_MODE (tgt) != HImode)
3374	{
3375	  addr = gen_lowpart (HImode, addr);
3376	  emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3377	}
3378      else
3379	{
3380	  if (GET_MODE (tgt) != DImode)
3381	    addr = gen_lowpart (GET_MODE (tgt), addr);
3382	  emit_move_insn (tgt, addr);
3383	}
3384      return;
3385    }
3386
3387  meml = gen_reg_rtx (DImode);
3388  memh = gen_reg_rtx (DImode);
3389  addr = gen_reg_rtx (DImode);
3390  extl = gen_reg_rtx (DImode);
3391  exth = gen_reg_rtx (DImode);
3392
3393  mema = XEXP (mem, 0);
3394  if (GET_CODE (mema) == LO_SUM)
3395    mema = force_reg (Pmode, mema);
3396
3397  /* AND addresses cannot be in any alias set, since they may implicitly
3398     alias surrounding code.  Ideally we'd have some alias set that
3399     covered all types except those with alignment 8 or higher.  */
3400
3401  tmp = change_address (mem, DImode,
3402			gen_rtx_AND (DImode,
3403				     plus_constant (mema, ofs),
3404				     GEN_INT (-8)));
3405  set_mem_alias_set (tmp, 0);
3406  emit_move_insn (meml, tmp);
3407
3408  tmp = change_address (mem, DImode,
3409			gen_rtx_AND (DImode,
3410				     plus_constant (mema, ofs + size - 1),
3411				     GEN_INT (-8)));
3412  set_mem_alias_set (tmp, 0);
3413  emit_move_insn (memh, tmp);
3414
3415  if (WORDS_BIG_ENDIAN && sign && (size == 2 || size == 4))
3416    {
3417      emit_move_insn (addr, plus_constant (mema, -1));
3418
3419      emit_insn (gen_extqh_be (extl, meml, addr));
3420      emit_insn (gen_extxl_be (exth, memh, GEN_INT (64), addr));
3421
3422      addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3423      addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (64 - size*8),
3424			   addr, 1, OPTAB_WIDEN);
3425    }
3426  else if (sign && size == 2)
3427    {
3428      emit_move_insn (addr, plus_constant (mema, ofs+2));
3429
3430      emit_insn (gen_extxl_le (extl, meml, GEN_INT (64), addr));
3431      emit_insn (gen_extqh_le (exth, memh, addr));
3432
3433      /* We must use tgt here for the target.  Alpha-vms port fails if we use
3434	 addr for the target, because addr is marked as a pointer and combine
3435	 knows that pointers are always sign-extended 32 bit values.  */
3436      addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3437      addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3438			   addr, 1, OPTAB_WIDEN);
3439    }
3440  else
3441    {
3442      if (WORDS_BIG_ENDIAN)
3443	{
3444	  emit_move_insn (addr, plus_constant (mema, ofs+size-1));
3445	  switch ((int) size)
3446	    {
3447	    case 2:
3448	      emit_insn (gen_extwh_be (extl, meml, addr));
3449	      mode = HImode;
3450	      break;
3451
3452	    case 4:
3453	      emit_insn (gen_extlh_be (extl, meml, addr));
3454	      mode = SImode;
3455	      break;
3456
3457	    case 8:
3458	      emit_insn (gen_extqh_be (extl, meml, addr));
3459	      mode = DImode;
3460	      break;
3461
3462	    default:
3463	      gcc_unreachable ();
3464	    }
3465	  emit_insn (gen_extxl_be (exth, memh, GEN_INT (size*8), addr));
3466	}
3467      else
3468	{
3469	  emit_move_insn (addr, plus_constant (mema, ofs));
3470	  emit_insn (gen_extxl_le (extl, meml, GEN_INT (size*8), addr));
3471	  switch ((int) size)
3472	    {
3473	    case 2:
3474	      emit_insn (gen_extwh_le (exth, memh, addr));
3475	      mode = HImode;
3476	      break;
3477
3478	    case 4:
3479	      emit_insn (gen_extlh_le (exth, memh, addr));
3480	      mode = SImode;
3481	      break;
3482
3483	    case 8:
3484	      emit_insn (gen_extqh_le (exth, memh, addr));
3485	      mode = DImode;
3486	      break;
3487
3488	    default:
3489	      gcc_unreachable ();
3490	    }
3491	}
3492
3493      addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3494			   gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3495			   sign, OPTAB_WIDEN);
3496    }
3497
3498  if (addr != tgt)
3499    emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3500}
3501
3502/* Similarly, use ins and msk instructions to perform unaligned stores.  */
3503
3504void
3505alpha_expand_unaligned_store (rtx dst, rtx src,
3506			      HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3507{
3508  rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3509
3510  if (TARGET_BWX && size == 2)
3511    {
3512      if (src != const0_rtx)
3513	{
3514	  dstl = gen_lowpart (QImode, src);
3515	  dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3516				      NULL, 1, OPTAB_LIB_WIDEN);
3517	  dsth = gen_lowpart (QImode, dsth);
3518	}
3519      else
3520	dstl = dsth = const0_rtx;
3521
3522      meml = adjust_address (dst, QImode, ofs);
3523      memh = adjust_address (dst, QImode, ofs+1);
3524      if (BYTES_BIG_ENDIAN)
3525	addr = meml, meml = memh, memh = addr;
3526
3527      emit_move_insn (meml, dstl);
3528      emit_move_insn (memh, dsth);
3529      return;
3530    }
3531
3532  dstl = gen_reg_rtx (DImode);
3533  dsth = gen_reg_rtx (DImode);
3534  insl = gen_reg_rtx (DImode);
3535  insh = gen_reg_rtx (DImode);
3536
3537  dsta = XEXP (dst, 0);
3538  if (GET_CODE (dsta) == LO_SUM)
3539    dsta = force_reg (Pmode, dsta);
3540
3541  /* AND addresses cannot be in any alias set, since they may implicitly
3542     alias surrounding code.  Ideally we'd have some alias set that
3543     covered all types except those with alignment 8 or higher.  */
3544
3545  meml = change_address (dst, DImode,
3546			 gen_rtx_AND (DImode,
3547				      plus_constant (dsta, ofs),
3548				      GEN_INT (-8)));
3549  set_mem_alias_set (meml, 0);
3550
3551  memh = change_address (dst, DImode,
3552			 gen_rtx_AND (DImode,
3553				      plus_constant (dsta, ofs + size - 1),
3554				      GEN_INT (-8)));
3555  set_mem_alias_set (memh, 0);
3556
3557  emit_move_insn (dsth, memh);
3558  emit_move_insn (dstl, meml);
3559  if (WORDS_BIG_ENDIAN)
3560    {
3561      addr = copy_addr_to_reg (plus_constant (dsta, ofs+size-1));
3562
3563      if (src != const0_rtx)
3564	{
3565	  switch ((int) size)
3566	    {
3567	    case 2:
3568	      emit_insn (gen_inswl_be (insh, gen_lowpart (HImode,src), addr));
3569	      break;
3570	    case 4:
3571	      emit_insn (gen_insll_be (insh, gen_lowpart (SImode,src), addr));
3572	      break;
3573	    case 8:
3574	      emit_insn (gen_insql_be (insh, gen_lowpart (DImode,src), addr));
3575	      break;
3576	    }
3577	  emit_insn (gen_insxh (insl, gen_lowpart (DImode, src),
3578				GEN_INT (size*8), addr));
3579	}
3580
3581      switch ((int) size)
3582	{
3583	case 2:
3584	  emit_insn (gen_mskxl_be (dsth, dsth, GEN_INT (0xffff), addr));
3585	  break;
3586	case 4:
3587	  {
3588	    rtx msk = immed_double_const (0xffffffff, 0, DImode);
3589	    emit_insn (gen_mskxl_be (dsth, dsth, msk, addr));
3590	    break;
3591	  }
3592	case 8:
3593	  emit_insn (gen_mskxl_be (dsth, dsth, constm1_rtx, addr));
3594	  break;
3595	}
3596
3597      emit_insn (gen_mskxh (dstl, dstl, GEN_INT (size*8), addr));
3598    }
3599  else
3600    {
3601      addr = copy_addr_to_reg (plus_constant (dsta, ofs));
3602
3603      if (src != CONST0_RTX (GET_MODE (src)))
3604	{
3605	  emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3606				GEN_INT (size*8), addr));
3607
3608	  switch ((int) size)
3609	    {
3610	    case 2:
3611	      emit_insn (gen_inswl_le (insl, gen_lowpart (HImode, src), addr));
3612	      break;
3613	    case 4:
3614	      emit_insn (gen_insll_le (insl, gen_lowpart (SImode, src), addr));
3615	      break;
3616	    case 8:
3617	      emit_insn (gen_insql_le (insl, src, addr));
3618	      break;
3619	    }
3620	}
3621
3622      emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3623
3624      switch ((int) size)
3625	{
3626	case 2:
3627	  emit_insn (gen_mskxl_le (dstl, dstl, GEN_INT (0xffff), addr));
3628	  break;
3629	case 4:
3630	  {
3631	    rtx msk = immed_double_const (0xffffffff, 0, DImode);
3632	    emit_insn (gen_mskxl_le (dstl, dstl, msk, addr));
3633	    break;
3634	  }
3635	case 8:
3636	  emit_insn (gen_mskxl_le (dstl, dstl, constm1_rtx, addr));
3637	  break;
3638	}
3639    }
3640
3641  if (src != CONST0_RTX (GET_MODE (src)))
3642    {
3643      dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3644      dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3645    }
3646
3647  if (WORDS_BIG_ENDIAN)
3648    {
3649      emit_move_insn (meml, dstl);
3650      emit_move_insn (memh, dsth);
3651    }
3652  else
3653    {
3654      /* Must store high before low for degenerate case of aligned.  */
3655      emit_move_insn (memh, dsth);
3656      emit_move_insn (meml, dstl);
3657    }
3658}
3659
3660/* The block move code tries to maximize speed by separating loads and
3661   stores at the expense of register pressure: we load all of the data
3662   before we store it back out.  There are two secondary effects worth
3663   mentioning, that this speeds copying to/from aligned and unaligned
3664   buffers, and that it makes the code significantly easier to write.  */
3665
3666#define MAX_MOVE_WORDS	8
3667
3668/* Load an integral number of consecutive unaligned quadwords.  */
3669
3670static void
3671alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3672				   HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3673{
3674  rtx const im8 = GEN_INT (-8);
3675  rtx const i64 = GEN_INT (64);
3676  rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3677  rtx sreg, areg, tmp, smema;
3678  HOST_WIDE_INT i;
3679
3680  smema = XEXP (smem, 0);
3681  if (GET_CODE (smema) == LO_SUM)
3682    smema = force_reg (Pmode, smema);
3683
3684  /* Generate all the tmp registers we need.  */
3685  for (i = 0; i < words; ++i)
3686    {
3687      data_regs[i] = out_regs[i];
3688      ext_tmps[i] = gen_reg_rtx (DImode);
3689    }
3690  data_regs[words] = gen_reg_rtx (DImode);
3691
3692  if (ofs != 0)
3693    smem = adjust_address (smem, GET_MODE (smem), ofs);
3694
3695  /* Load up all of the source data.  */
3696  for (i = 0; i < words; ++i)
3697    {
3698      tmp = change_address (smem, DImode,
3699			    gen_rtx_AND (DImode,
3700					 plus_constant (smema, 8*i),
3701					 im8));
3702      set_mem_alias_set (tmp, 0);
3703      emit_move_insn (data_regs[i], tmp);
3704    }
3705
3706  tmp = change_address (smem, DImode,
3707			gen_rtx_AND (DImode,
3708				     plus_constant (smema, 8*words - 1),
3709				     im8));
3710  set_mem_alias_set (tmp, 0);
3711  emit_move_insn (data_regs[words], tmp);
3712
3713  /* Extract the half-word fragments.  Unfortunately DEC decided to make
3714     extxh with offset zero a noop instead of zeroing the register, so
3715     we must take care of that edge condition ourselves with cmov.  */
3716
3717  sreg = copy_addr_to_reg (smema);
3718  areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3719		       1, OPTAB_WIDEN);
3720  if (WORDS_BIG_ENDIAN)
3721    emit_move_insn (sreg, plus_constant (sreg, 7));
3722  for (i = 0; i < words; ++i)
3723    {
3724      if (WORDS_BIG_ENDIAN)
3725	{
3726	  emit_insn (gen_extqh_be (data_regs[i], data_regs[i], sreg));
3727	  emit_insn (gen_extxl_be (ext_tmps[i], data_regs[i+1], i64, sreg));
3728	}
3729      else
3730	{
3731	  emit_insn (gen_extxl_le (data_regs[i], data_regs[i], i64, sreg));
3732	  emit_insn (gen_extqh_le (ext_tmps[i], data_regs[i+1], sreg));
3733	}
3734      emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
3735			      gen_rtx_IF_THEN_ELSE (DImode,
3736						    gen_rtx_EQ (DImode, areg,
3737								const0_rtx),
3738						    const0_rtx, ext_tmps[i])));
3739    }
3740
3741  /* Merge the half-words into whole words.  */
3742  for (i = 0; i < words; ++i)
3743    {
3744      out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3745				  ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3746    }
3747}
3748
3749/* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
3750   may be NULL to store zeros.  */
3751
3752static void
3753alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3754				    HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3755{
3756  rtx const im8 = GEN_INT (-8);
3757  rtx const i64 = GEN_INT (64);
3758  rtx ins_tmps[MAX_MOVE_WORDS];
3759  rtx st_tmp_1, st_tmp_2, dreg;
3760  rtx st_addr_1, st_addr_2, dmema;
3761  HOST_WIDE_INT i;
3762
3763  dmema = XEXP (dmem, 0);
3764  if (GET_CODE (dmema) == LO_SUM)
3765    dmema = force_reg (Pmode, dmema);
3766
3767  /* Generate all the tmp registers we need.  */
3768  if (data_regs != NULL)
3769    for (i = 0; i < words; ++i)
3770      ins_tmps[i] = gen_reg_rtx(DImode);
3771  st_tmp_1 = gen_reg_rtx(DImode);
3772  st_tmp_2 = gen_reg_rtx(DImode);
3773
3774  if (ofs != 0)
3775    dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3776
3777  st_addr_2 = change_address (dmem, DImode,
3778			      gen_rtx_AND (DImode,
3779					   plus_constant (dmema, words*8 - 1),
3780				       im8));
3781  set_mem_alias_set (st_addr_2, 0);
3782
3783  st_addr_1 = change_address (dmem, DImode,
3784			      gen_rtx_AND (DImode, dmema, im8));
3785  set_mem_alias_set (st_addr_1, 0);
3786
3787  /* Load up the destination end bits.  */
3788  emit_move_insn (st_tmp_2, st_addr_2);
3789  emit_move_insn (st_tmp_1, st_addr_1);
3790
3791  /* Shift the input data into place.  */
3792  dreg = copy_addr_to_reg (dmema);
3793  if (WORDS_BIG_ENDIAN)
3794    emit_move_insn (dreg, plus_constant (dreg, 7));
3795  if (data_regs != NULL)
3796    {
3797      for (i = words-1; i >= 0; --i)
3798	{
3799	  if (WORDS_BIG_ENDIAN)
3800	    {
3801	      emit_insn (gen_insql_be (ins_tmps[i], data_regs[i], dreg));
3802	      emit_insn (gen_insxh (data_regs[i], data_regs[i], i64, dreg));
3803	    }
3804	  else
3805	    {
3806	      emit_insn (gen_insxh (ins_tmps[i], data_regs[i], i64, dreg));
3807	      emit_insn (gen_insql_le (data_regs[i], data_regs[i], dreg));
3808	    }
3809	}
3810      for (i = words-1; i > 0; --i)
3811	{
3812	  ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3813					ins_tmps[i-1], ins_tmps[i-1], 1,
3814					OPTAB_WIDEN);
3815	}
3816    }
3817
3818  /* Split and merge the ends with the destination data.  */
3819  if (WORDS_BIG_ENDIAN)
3820    {
3821      emit_insn (gen_mskxl_be (st_tmp_2, st_tmp_2, constm1_rtx, dreg));
3822      emit_insn (gen_mskxh (st_tmp_1, st_tmp_1, i64, dreg));
3823    }
3824  else
3825    {
3826      emit_insn (gen_mskxh (st_tmp_2, st_tmp_2, i64, dreg));
3827      emit_insn (gen_mskxl_le (st_tmp_1, st_tmp_1, constm1_rtx, dreg));
3828    }
3829
3830  if (data_regs != NULL)
3831    {
3832      st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3833			       st_tmp_2, 1, OPTAB_WIDEN);
3834      st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3835			       st_tmp_1, 1, OPTAB_WIDEN);
3836    }
3837
3838  /* Store it all.  */
3839  if (WORDS_BIG_ENDIAN)
3840    emit_move_insn (st_addr_1, st_tmp_1);
3841  else
3842    emit_move_insn (st_addr_2, st_tmp_2);
3843  for (i = words-1; i > 0; --i)
3844    {
3845      rtx tmp = change_address (dmem, DImode,
3846				gen_rtx_AND (DImode,
3847					     plus_constant(dmema,
3848					     WORDS_BIG_ENDIAN ? i*8-1 : i*8),
3849					     im8));
3850      set_mem_alias_set (tmp, 0);
3851      emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3852    }
3853  if (WORDS_BIG_ENDIAN)
3854    emit_move_insn (st_addr_2, st_tmp_2);
3855  else
3856    emit_move_insn (st_addr_1, st_tmp_1);
3857}
3858
3859
3860/* Expand string/block move operations.
3861
3862   operands[0] is the pointer to the destination.
3863   operands[1] is the pointer to the source.
3864   operands[2] is the number of bytes to move.
3865   operands[3] is the alignment.  */
3866
3867int
3868alpha_expand_block_move (rtx operands[])
3869{
3870  rtx bytes_rtx	= operands[2];
3871  rtx align_rtx = operands[3];
3872  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3873  HOST_WIDE_INT bytes = orig_bytes;
3874  HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3875  HOST_WIDE_INT dst_align = src_align;
3876  rtx orig_src = operands[1];
3877  rtx orig_dst = operands[0];
3878  rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3879  rtx tmp;
3880  unsigned int i, words, ofs, nregs = 0;
3881
3882  if (orig_bytes <= 0)
3883    return 1;
3884  else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3885    return 0;
3886
3887  /* Look for additional alignment information from recorded register info.  */
3888
3889  tmp = XEXP (orig_src, 0);
3890  if (GET_CODE (tmp) == REG)
3891    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3892  else if (GET_CODE (tmp) == PLUS
3893	   && GET_CODE (XEXP (tmp, 0)) == REG
3894	   && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
3895    {
3896      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3897      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3898
3899      if (a > src_align)
3900	{
3901          if (a >= 64 && c % 8 == 0)
3902	    src_align = 64;
3903          else if (a >= 32 && c % 4 == 0)
3904	    src_align = 32;
3905          else if (a >= 16 && c % 2 == 0)
3906	    src_align = 16;
3907	}
3908    }
3909
3910  tmp = XEXP (orig_dst, 0);
3911  if (GET_CODE (tmp) == REG)
3912    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3913  else if (GET_CODE (tmp) == PLUS
3914	   && GET_CODE (XEXP (tmp, 0)) == REG
3915	   && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
3916    {
3917      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3918      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3919
3920      if (a > dst_align)
3921	{
3922          if (a >= 64 && c % 8 == 0)
3923	    dst_align = 64;
3924          else if (a >= 32 && c % 4 == 0)
3925	    dst_align = 32;
3926          else if (a >= 16 && c % 2 == 0)
3927	    dst_align = 16;
3928	}
3929    }
3930
3931  ofs = 0;
3932  if (src_align >= 64 && bytes >= 8)
3933    {
3934      words = bytes / 8;
3935
3936      for (i = 0; i < words; ++i)
3937	data_regs[nregs + i] = gen_reg_rtx (DImode);
3938
3939      for (i = 0; i < words; ++i)
3940	emit_move_insn (data_regs[nregs + i],
3941			adjust_address (orig_src, DImode, ofs + i * 8));
3942
3943      nregs += words;
3944      bytes -= words * 8;
3945      ofs += words * 8;
3946    }
3947
3948  if (src_align >= 32 && bytes >= 4)
3949    {
3950      words = bytes / 4;
3951
3952      for (i = 0; i < words; ++i)
3953	data_regs[nregs + i] = gen_reg_rtx (SImode);
3954
3955      for (i = 0; i < words; ++i)
3956	emit_move_insn (data_regs[nregs + i],
3957			adjust_address (orig_src, SImode, ofs + i * 4));
3958
3959      nregs += words;
3960      bytes -= words * 4;
3961      ofs += words * 4;
3962    }
3963
3964  if (bytes >= 8)
3965    {
3966      words = bytes / 8;
3967
3968      for (i = 0; i < words+1; ++i)
3969	data_regs[nregs + i] = gen_reg_rtx (DImode);
3970
3971      alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3972					 words, ofs);
3973
3974      nregs += words;
3975      bytes -= words * 8;
3976      ofs += words * 8;
3977    }
3978
3979  if (! TARGET_BWX && bytes >= 4)
3980    {
3981      data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3982      alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3983      bytes -= 4;
3984      ofs += 4;
3985    }
3986
3987  if (bytes >= 2)
3988    {
3989      if (src_align >= 16)
3990	{
3991	  do {
3992	    data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3993	    emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3994	    bytes -= 2;
3995	    ofs += 2;
3996	  } while (bytes >= 2);
3997	}
3998      else if (! TARGET_BWX)
3999	{
4000	  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
4001	  alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
4002	  bytes -= 2;
4003	  ofs += 2;
4004	}
4005    }
4006
4007  while (bytes > 0)
4008    {
4009      data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
4010      emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
4011      bytes -= 1;
4012      ofs += 1;
4013    }
4014
4015  gcc_assert (nregs <= ARRAY_SIZE (data_regs));
4016
4017  /* Now save it back out again.  */
4018
4019  i = 0, ofs = 0;
4020
4021  /* Write out the data in whatever chunks reading the source allowed.  */
4022  if (dst_align >= 64)
4023    {
4024      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
4025	{
4026	  emit_move_insn (adjust_address (orig_dst, DImode, ofs),
4027			  data_regs[i]);
4028	  ofs += 8;
4029	  i++;
4030	}
4031    }
4032
4033  if (dst_align >= 32)
4034    {
4035      /* If the source has remaining DImode regs, write them out in
4036	 two pieces.  */
4037      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
4038	{
4039	  tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
4040			      NULL_RTX, 1, OPTAB_WIDEN);
4041
4042	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
4043			  gen_lowpart (SImode, data_regs[i]));
4044	  emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
4045			  gen_lowpart (SImode, tmp));
4046	  ofs += 8;
4047	  i++;
4048	}
4049
4050      while (i < nregs && GET_MODE (data_regs[i]) == SImode)
4051	{
4052	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
4053			  data_regs[i]);
4054	  ofs += 4;
4055	  i++;
4056	}
4057    }
4058
4059  if (i < nregs && GET_MODE (data_regs[i]) == DImode)
4060    {
4061      /* Write out a remaining block of words using unaligned methods.  */
4062
4063      for (words = 1; i + words < nregs; words++)
4064	if (GET_MODE (data_regs[i + words]) != DImode)
4065	  break;
4066
4067      if (words == 1)
4068	alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
4069      else
4070        alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
4071					    words, ofs);
4072
4073      i += words;
4074      ofs += words * 8;
4075    }
4076
4077  /* Due to the above, this won't be aligned.  */
4078  /* ??? If we have more than one of these, consider constructing full
4079     words in registers and using alpha_expand_unaligned_store_words.  */
4080  while (i < nregs && GET_MODE (data_regs[i]) == SImode)
4081    {
4082      alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
4083      ofs += 4;
4084      i++;
4085    }
4086
4087  if (dst_align >= 16)
4088    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4089      {
4090	emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
4091	i++;
4092	ofs += 2;
4093      }
4094  else
4095    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4096      {
4097	alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4098	i++;
4099	ofs += 2;
4100      }
4101
4102  /* The remainder must be byte copies.  */
4103  while (i < nregs)
4104    {
4105      gcc_assert (GET_MODE (data_regs[i]) == QImode);
4106      emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4107      i++;
4108      ofs += 1;
4109    }
4110
4111  return 1;
4112}
4113
4114int
4115alpha_expand_block_clear (rtx operands[])
4116{
4117  rtx bytes_rtx	= operands[1];
4118  rtx align_rtx = operands[3];
4119  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4120  HOST_WIDE_INT bytes = orig_bytes;
4121  HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4122  HOST_WIDE_INT alignofs = 0;
4123  rtx orig_dst = operands[0];
4124  rtx tmp;
4125  int i, words, ofs = 0;
4126
4127  if (orig_bytes <= 0)
4128    return 1;
4129  if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4130    return 0;
4131
4132  /* Look for stricter alignment.  */
4133  tmp = XEXP (orig_dst, 0);
4134  if (GET_CODE (tmp) == REG)
4135    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4136  else if (GET_CODE (tmp) == PLUS
4137	   && GET_CODE (XEXP (tmp, 0)) == REG
4138	   && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
4139    {
4140      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4141      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4142
4143      if (a > align)
4144	{
4145          if (a >= 64)
4146	    align = a, alignofs = 8 - c % 8;
4147          else if (a >= 32)
4148	    align = a, alignofs = 4 - c % 4;
4149          else if (a >= 16)
4150	    align = a, alignofs = 2 - c % 2;
4151	}
4152    }
4153
4154  /* Handle an unaligned prefix first.  */
4155
4156  if (alignofs > 0)
4157    {
4158#if HOST_BITS_PER_WIDE_INT >= 64
4159      /* Given that alignofs is bounded by align, the only time BWX could
4160	 generate three stores is for a 7 byte fill.  Prefer two individual
4161	 stores over a load/mask/store sequence.  */
4162      if ((!TARGET_BWX || alignofs == 7)
4163	       && align >= 32
4164	       && !(alignofs == 4 && bytes >= 4))
4165	{
4166	  enum machine_mode mode = (align >= 64 ? DImode : SImode);
4167	  int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4168	  rtx mem, tmp;
4169	  HOST_WIDE_INT mask;
4170
4171	  mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4172	  set_mem_alias_set (mem, 0);
4173
4174	  mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
4175	  if (bytes < alignofs)
4176	    {
4177	      mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
4178	      ofs += bytes;
4179	      bytes = 0;
4180	    }
4181	  else
4182	    {
4183	      bytes -= alignofs;
4184	      ofs += alignofs;
4185	    }
4186	  alignofs = 0;
4187
4188	  tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4189			      NULL_RTX, 1, OPTAB_WIDEN);
4190
4191	  emit_move_insn (mem, tmp);
4192	}
4193#endif
4194
4195      if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4196	{
4197	  emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4198	  bytes -= 1;
4199	  ofs += 1;
4200	  alignofs -= 1;
4201	}
4202      if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4203	{
4204	  emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4205	  bytes -= 2;
4206	  ofs += 2;
4207	  alignofs -= 2;
4208	}
4209      if (alignofs == 4 && bytes >= 4)
4210	{
4211	  emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4212	  bytes -= 4;
4213	  ofs += 4;
4214	  alignofs = 0;
4215	}
4216
4217      /* If we've not used the extra lead alignment information by now,
4218	 we won't be able to.  Downgrade align to match what's left over.  */
4219      if (alignofs > 0)
4220	{
4221	  alignofs = alignofs & -alignofs;
4222	  align = MIN (align, alignofs * BITS_PER_UNIT);
4223	}
4224    }
4225
4226  /* Handle a block of contiguous long-words.  */
4227
4228  if (align >= 64 && bytes >= 8)
4229    {
4230      words = bytes / 8;
4231
4232      for (i = 0; i < words; ++i)
4233	emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4234			const0_rtx);
4235
4236      bytes -= words * 8;
4237      ofs += words * 8;
4238    }
4239
4240  /* If the block is large and appropriately aligned, emit a single
4241     store followed by a sequence of stq_u insns.  */
4242
4243  if (align >= 32 && bytes > 16)
4244    {
4245      rtx orig_dsta;
4246
4247      emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4248      bytes -= 4;
4249      ofs += 4;
4250
4251      orig_dsta = XEXP (orig_dst, 0);
4252      if (GET_CODE (orig_dsta) == LO_SUM)
4253	orig_dsta = force_reg (Pmode, orig_dsta);
4254
4255      words = bytes / 8;
4256      for (i = 0; i < words; ++i)
4257	{
4258	  rtx mem
4259	    = change_address (orig_dst, DImode,
4260			      gen_rtx_AND (DImode,
4261					   plus_constant (orig_dsta, ofs + i*8),
4262					   GEN_INT (-8)));
4263	  set_mem_alias_set (mem, 0);
4264	  emit_move_insn (mem, const0_rtx);
4265	}
4266
4267      /* Depending on the alignment, the first stq_u may have overlapped
4268	 with the initial stl, which means that the last stq_u didn't
4269	 write as much as it would appear.  Leave those questionable bytes
4270	 unaccounted for.  */
4271      bytes -= words * 8 - 4;
4272      ofs += words * 8 - 4;
4273    }
4274
4275  /* Handle a smaller block of aligned words.  */
4276
4277  if ((align >= 64 && bytes == 4)
4278      || (align == 32 && bytes >= 4))
4279    {
4280      words = bytes / 4;
4281
4282      for (i = 0; i < words; ++i)
4283	emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4284			const0_rtx);
4285
4286      bytes -= words * 4;
4287      ofs += words * 4;
4288    }
4289
4290  /* An unaligned block uses stq_u stores for as many as possible.  */
4291
4292  if (bytes >= 8)
4293    {
4294      words = bytes / 8;
4295
4296      alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4297
4298      bytes -= words * 8;
4299      ofs += words * 8;
4300    }
4301
4302  /* Next clean up any trailing pieces.  */
4303
4304#if HOST_BITS_PER_WIDE_INT >= 64
4305  /* Count the number of bits in BYTES for which aligned stores could
4306     be emitted.  */
4307  words = 0;
4308  for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4309    if (bytes & i)
4310      words += 1;
4311
4312  /* If we have appropriate alignment (and it wouldn't take too many
4313     instructions otherwise), mask out the bytes we need.  */
4314  if (TARGET_BWX ? words > 2 : bytes > 0)
4315    {
4316      if (align >= 64)
4317	{
4318	  rtx mem, tmp;
4319	  HOST_WIDE_INT mask;
4320
4321	  mem = adjust_address (orig_dst, DImode, ofs);
4322	  set_mem_alias_set (mem, 0);
4323
4324	  mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4325
4326	  tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4327			      NULL_RTX, 1, OPTAB_WIDEN);
4328
4329	  emit_move_insn (mem, tmp);
4330	  return 1;
4331	}
4332      else if (align >= 32 && bytes < 4)
4333	{
4334	  rtx mem, tmp;
4335	  HOST_WIDE_INT mask;
4336
4337	  mem = adjust_address (orig_dst, SImode, ofs);
4338	  set_mem_alias_set (mem, 0);
4339
4340	  mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4341
4342	  tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4343			      NULL_RTX, 1, OPTAB_WIDEN);
4344
4345	  emit_move_insn (mem, tmp);
4346	  return 1;
4347	}
4348    }
4349#endif
4350
4351  if (!TARGET_BWX && bytes >= 4)
4352    {
4353      alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4354      bytes -= 4;
4355      ofs += 4;
4356    }
4357
4358  if (bytes >= 2)
4359    {
4360      if (align >= 16)
4361	{
4362	  do {
4363	    emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4364			    const0_rtx);
4365	    bytes -= 2;
4366	    ofs += 2;
4367	  } while (bytes >= 2);
4368	}
4369      else if (! TARGET_BWX)
4370	{
4371	  alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4372	  bytes -= 2;
4373	  ofs += 2;
4374	}
4375    }
4376
4377  while (bytes > 0)
4378    {
4379      emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4380      bytes -= 1;
4381      ofs += 1;
4382    }
4383
4384  return 1;
4385}
4386
4387/* Returns a mask so that zap(x, value) == x & mask.  */
4388
4389rtx
4390alpha_expand_zap_mask (HOST_WIDE_INT value)
4391{
4392  rtx result;
4393  int i;
4394
4395  if (HOST_BITS_PER_WIDE_INT >= 64)
4396    {
4397      HOST_WIDE_INT mask = 0;
4398
4399      for (i = 7; i >= 0; --i)
4400	{
4401	  mask <<= 8;
4402	  if (!((value >> i) & 1))
4403	    mask |= 0xff;
4404	}
4405
4406      result = gen_int_mode (mask, DImode);
4407    }
4408  else
4409    {
4410      HOST_WIDE_INT mask_lo = 0, mask_hi = 0;
4411
4412      gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
4413
4414      for (i = 7; i >= 4; --i)
4415	{
4416	  mask_hi <<= 8;
4417	  if (!((value >> i) & 1))
4418	    mask_hi |= 0xff;
4419	}
4420
4421      for (i = 3; i >= 0; --i)
4422	{
4423	  mask_lo <<= 8;
4424	  if (!((value >> i) & 1))
4425	    mask_lo |= 0xff;
4426	}
4427
4428      result = immed_double_const (mask_lo, mask_hi, DImode);
4429    }
4430
4431  return result;
4432}
4433
4434void
4435alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4436				   enum machine_mode mode,
4437				   rtx op0, rtx op1, rtx op2)
4438{
4439  op0 = gen_lowpart (mode, op0);
4440
4441  if (op1 == const0_rtx)
4442    op1 = CONST0_RTX (mode);
4443  else
4444    op1 = gen_lowpart (mode, op1);
4445
4446  if (op2 == const0_rtx)
4447    op2 = CONST0_RTX (mode);
4448  else
4449    op2 = gen_lowpart (mode, op2);
4450
4451  emit_insn ((*gen) (op0, op1, op2));
4452}
4453
4454/* A subroutine of the atomic operation splitters.  Jump to LABEL if
4455   COND is true.  Mark the jump as unlikely to be taken.  */
4456
4457static void
4458emit_unlikely_jump (rtx cond, rtx label)
4459{
4460  rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
4461  rtx x;
4462
4463  x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4464  x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
4465  REG_NOTES (x) = gen_rtx_EXPR_LIST (REG_BR_PROB, very_unlikely, NULL_RTX);
4466}
4467
4468/* A subroutine of the atomic operation splitters.  Emit a load-locked
4469   instruction in MODE.  */
4470
4471static void
4472emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
4473{
4474  rtx (*fn) (rtx, rtx) = NULL;
4475  if (mode == SImode)
4476    fn = gen_load_locked_si;
4477  else if (mode == DImode)
4478    fn = gen_load_locked_di;
4479  emit_insn (fn (reg, mem));
4480}
4481
4482/* A subroutine of the atomic operation splitters.  Emit a store-conditional
4483   instruction in MODE.  */
4484
4485static void
4486emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
4487{
4488  rtx (*fn) (rtx, rtx, rtx) = NULL;
4489  if (mode == SImode)
4490    fn = gen_store_conditional_si;
4491  else if (mode == DImode)
4492    fn = gen_store_conditional_di;
4493  emit_insn (fn (res, mem, val));
4494}
4495
4496/* A subroutine of the atomic operation splitters.  Emit an insxl
4497   instruction in MODE.  */
4498
4499static rtx
4500emit_insxl (enum machine_mode mode, rtx op1, rtx op2)
4501{
4502  rtx ret = gen_reg_rtx (DImode);
4503  rtx (*fn) (rtx, rtx, rtx);
4504
4505  if (WORDS_BIG_ENDIAN)
4506    {
4507      if (mode == QImode)
4508	fn = gen_insbl_be;
4509      else
4510	fn = gen_inswl_be;
4511    }
4512  else
4513    {
4514      if (mode == QImode)
4515	fn = gen_insbl_le;
4516      else
4517	fn = gen_inswl_le;
4518    }
4519  emit_insn (fn (ret, op1, op2));
4520
4521  return ret;
4522}
4523
4524/* Expand an an atomic fetch-and-operate pattern.  CODE is the binary operation
4525   to perform.  MEM is the memory on which to operate.  VAL is the second
4526   operand of the binary operator.  BEFORE and AFTER are optional locations to
4527   return the value of MEM either before of after the operation.  SCRATCH is
4528   a scratch register.  */
4529
4530void
4531alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val,
4532		       rtx before, rtx after, rtx scratch)
4533{
4534  enum machine_mode mode = GET_MODE (mem);
4535  rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4536
4537  emit_insn (gen_memory_barrier ());
4538
4539  label = gen_label_rtx ();
4540  emit_label (label);
4541  label = gen_rtx_LABEL_REF (DImode, label);
4542
4543  if (before == NULL)
4544    before = scratch;
4545  emit_load_locked (mode, before, mem);
4546
4547  if (code == NOT)
4548    x = gen_rtx_AND (mode, gen_rtx_NOT (mode, before), val);
4549  else
4550    x = gen_rtx_fmt_ee (code, mode, before, val);
4551  if (after)
4552    emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
4553  emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
4554
4555  emit_store_conditional (mode, cond, mem, scratch);
4556
4557  x = gen_rtx_EQ (DImode, cond, const0_rtx);
4558  emit_unlikely_jump (x, label);
4559
4560  emit_insn (gen_memory_barrier ());
4561}
4562
4563/* Expand a compare and swap operation.  */
4564
4565void
4566alpha_split_compare_and_swap (rtx retval, rtx mem, rtx oldval, rtx newval,
4567			      rtx scratch)
4568{
4569  enum machine_mode mode = GET_MODE (mem);
4570  rtx label1, label2, x, cond = gen_lowpart (DImode, scratch);
4571
4572  emit_insn (gen_memory_barrier ());
4573
4574  label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4575  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4576  emit_label (XEXP (label1, 0));
4577
4578  emit_load_locked (mode, retval, mem);
4579
4580  x = gen_lowpart (DImode, retval);
4581  if (oldval == const0_rtx)
4582    x = gen_rtx_NE (DImode, x, const0_rtx);
4583  else
4584    {
4585      x = gen_rtx_EQ (DImode, x, oldval);
4586      emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4587      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4588    }
4589  emit_unlikely_jump (x, label2);
4590
4591  emit_move_insn (scratch, newval);
4592  emit_store_conditional (mode, cond, mem, scratch);
4593
4594  x = gen_rtx_EQ (DImode, cond, const0_rtx);
4595  emit_unlikely_jump (x, label1);
4596
4597  emit_insn (gen_memory_barrier ());
4598  emit_label (XEXP (label2, 0));
4599}
4600
4601void
4602alpha_expand_compare_and_swap_12 (rtx dst, rtx mem, rtx oldval, rtx newval)
4603{
4604  enum machine_mode mode = GET_MODE (mem);
4605  rtx addr, align, wdst;
4606  rtx (*fn5) (rtx, rtx, rtx, rtx, rtx);
4607
4608  addr = force_reg (DImode, XEXP (mem, 0));
4609  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4610			       NULL_RTX, 1, OPTAB_DIRECT);
4611
4612  oldval = convert_modes (DImode, mode, oldval, 1);
4613  newval = emit_insxl (mode, newval, addr);
4614
4615  wdst = gen_reg_rtx (DImode);
4616  if (mode == QImode)
4617    fn5 = gen_sync_compare_and_swapqi_1;
4618  else
4619    fn5 = gen_sync_compare_and_swaphi_1;
4620  emit_insn (fn5 (wdst, addr, oldval, newval, align));
4621
4622  emit_move_insn (dst, gen_lowpart (mode, wdst));
4623}
4624
4625void
4626alpha_split_compare_and_swap_12 (enum machine_mode mode, rtx dest, rtx addr,
4627				 rtx oldval, rtx newval, rtx align,
4628				 rtx scratch, rtx cond)
4629{
4630  rtx label1, label2, mem, width, mask, x;
4631
4632  mem = gen_rtx_MEM (DImode, align);
4633  MEM_VOLATILE_P (mem) = 1;
4634
4635  emit_insn (gen_memory_barrier ());
4636  label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4637  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4638  emit_label (XEXP (label1, 0));
4639
4640  emit_load_locked (DImode, scratch, mem);
4641
4642  width = GEN_INT (GET_MODE_BITSIZE (mode));
4643  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4644  if (WORDS_BIG_ENDIAN)
4645    emit_insn (gen_extxl_be (dest, scratch, width, addr));
4646  else
4647    emit_insn (gen_extxl_le (dest, scratch, width, addr));
4648
4649  if (oldval == const0_rtx)
4650    x = gen_rtx_NE (DImode, dest, const0_rtx);
4651  else
4652    {
4653      x = gen_rtx_EQ (DImode, dest, oldval);
4654      emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4655      x = gen_rtx_EQ (DImode, cond, const0_rtx);
4656    }
4657  emit_unlikely_jump (x, label2);
4658
4659  if (WORDS_BIG_ENDIAN)
4660    emit_insn (gen_mskxl_be (scratch, scratch, mask, addr));
4661  else
4662    emit_insn (gen_mskxl_le (scratch, scratch, mask, addr));
4663  emit_insn (gen_iordi3 (scratch, scratch, newval));
4664
4665  emit_store_conditional (DImode, scratch, mem, scratch);
4666
4667  x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4668  emit_unlikely_jump (x, label1);
4669
4670  emit_insn (gen_memory_barrier ());
4671  emit_label (XEXP (label2, 0));
4672}
4673
4674/* Expand an atomic exchange operation.  */
4675
4676void
4677alpha_split_lock_test_and_set (rtx retval, rtx mem, rtx val, rtx scratch)
4678{
4679  enum machine_mode mode = GET_MODE (mem);
4680  rtx label, x, cond = gen_lowpart (DImode, scratch);
4681
4682  emit_insn (gen_memory_barrier ());
4683
4684  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4685  emit_label (XEXP (label, 0));
4686
4687  emit_load_locked (mode, retval, mem);
4688  emit_move_insn (scratch, val);
4689  emit_store_conditional (mode, cond, mem, scratch);
4690
4691  x = gen_rtx_EQ (DImode, cond, const0_rtx);
4692  emit_unlikely_jump (x, label);
4693}
4694
4695void
4696alpha_expand_lock_test_and_set_12 (rtx dst, rtx mem, rtx val)
4697{
4698  enum machine_mode mode = GET_MODE (mem);
4699  rtx addr, align, wdst;
4700  rtx (*fn4) (rtx, rtx, rtx, rtx);
4701
4702  /* Force the address into a register.  */
4703  addr = force_reg (DImode, XEXP (mem, 0));
4704
4705  /* Align it to a multiple of 8.  */
4706  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4707			       NULL_RTX, 1, OPTAB_DIRECT);
4708
4709  /* Insert val into the correct byte location within the word.  */
4710  val = emit_insxl (mode, val, addr);
4711
4712  wdst = gen_reg_rtx (DImode);
4713  if (mode == QImode)
4714    fn4 = gen_sync_lock_test_and_setqi_1;
4715  else
4716    fn4 = gen_sync_lock_test_and_sethi_1;
4717  emit_insn (fn4 (wdst, addr, val, align));
4718
4719  emit_move_insn (dst, gen_lowpart (mode, wdst));
4720}
4721
4722void
4723alpha_split_lock_test_and_set_12 (enum machine_mode mode, rtx dest, rtx addr,
4724				  rtx val, rtx align, rtx scratch)
4725{
4726  rtx label, mem, width, mask, x;
4727
4728  mem = gen_rtx_MEM (DImode, align);
4729  MEM_VOLATILE_P (mem) = 1;
4730
4731  emit_insn (gen_memory_barrier ());
4732  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4733  emit_label (XEXP (label, 0));
4734
4735  emit_load_locked (DImode, scratch, mem);
4736
4737  width = GEN_INT (GET_MODE_BITSIZE (mode));
4738  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4739  if (WORDS_BIG_ENDIAN)
4740    {
4741      emit_insn (gen_extxl_be (dest, scratch, width, addr));
4742      emit_insn (gen_mskxl_be (scratch, scratch, mask, addr));
4743    }
4744  else
4745    {
4746      emit_insn (gen_extxl_le (dest, scratch, width, addr));
4747      emit_insn (gen_mskxl_le (scratch, scratch, mask, addr));
4748    }
4749  emit_insn (gen_iordi3 (scratch, scratch, val));
4750
4751  emit_store_conditional (DImode, scratch, mem, scratch);
4752
4753  x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4754  emit_unlikely_jump (x, label);
4755}
4756
4757/* Adjust the cost of a scheduling dependency.  Return the new cost of
4758   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4759
4760static int
4761alpha_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4762{
4763  enum attr_type insn_type, dep_insn_type;
4764
4765  /* If the dependence is an anti-dependence, there is no cost.  For an
4766     output dependence, there is sometimes a cost, but it doesn't seem
4767     worth handling those few cases.  */
4768  if (REG_NOTE_KIND (link) != 0)
4769    return cost;
4770
4771  /* If we can't recognize the insns, we can't really do anything.  */
4772  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4773    return cost;
4774
4775  insn_type = get_attr_type (insn);
4776  dep_insn_type = get_attr_type (dep_insn);
4777
4778  /* Bring in the user-defined memory latency.  */
4779  if (dep_insn_type == TYPE_ILD
4780      || dep_insn_type == TYPE_FLD
4781      || dep_insn_type == TYPE_LDSYM)
4782    cost += alpha_memory_latency-1;
4783
4784  /* Everything else handled in DFA bypasses now.  */
4785
4786  return cost;
4787}
4788
4789/* The number of instructions that can be issued per cycle.  */
4790
4791static int
4792alpha_issue_rate (void)
4793{
4794  return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4795}
4796
4797/* How many alternative schedules to try.  This should be as wide as the
4798   scheduling freedom in the DFA, but no wider.  Making this value too
4799   large results extra work for the scheduler.
4800
4801   For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4802   alternative schedules.  For EV5, we can choose between E0/E1 and
4803   FA/FM.  For EV6, an arithmetic insn can be issued to U0/U1/L0/L1.  */
4804
4805static int
4806alpha_multipass_dfa_lookahead (void)
4807{
4808  return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4809}
4810
4811/* Machine-specific function data.  */
4812
4813struct machine_function GTY(())
4814{
4815  /* For unicosmk.  */
4816  /* List of call information words for calls from this function.  */
4817  struct rtx_def *first_ciw;
4818  struct rtx_def *last_ciw;
4819  int ciw_count;
4820
4821  /* List of deferred case vectors.  */
4822  struct rtx_def *addr_list;
4823
4824  /* For OSF.  */
4825  const char *some_ld_name;
4826
4827  /* For TARGET_LD_BUGGY_LDGP.  */
4828  struct rtx_def *gp_save_rtx;
4829};
4830
4831/* How to allocate a 'struct machine_function'.  */
4832
4833static struct machine_function *
4834alpha_init_machine_status (void)
4835{
4836  return ((struct machine_function *)
4837		ggc_alloc_cleared (sizeof (struct machine_function)));
4838}
4839
4840/* Functions to save and restore alpha_return_addr_rtx.  */
4841
4842/* Start the ball rolling with RETURN_ADDR_RTX.  */
4843
4844rtx
4845alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4846{
4847  if (count != 0)
4848    return const0_rtx;
4849
4850  return get_hard_reg_initial_val (Pmode, REG_RA);
4851}
4852
4853/* Return or create a memory slot containing the gp value for the current
4854   function.  Needed only if TARGET_LD_BUGGY_LDGP.  */
4855
4856rtx
4857alpha_gp_save_rtx (void)
4858{
4859  rtx seq, m = cfun->machine->gp_save_rtx;
4860
4861  if (m == NULL)
4862    {
4863      start_sequence ();
4864
4865      m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4866      m = validize_mem (m);
4867      emit_move_insn (m, pic_offset_table_rtx);
4868
4869      seq = get_insns ();
4870      end_sequence ();
4871      emit_insn_after (seq, entry_of_function ());
4872
4873      cfun->machine->gp_save_rtx = m;
4874    }
4875
4876  return m;
4877}
4878
4879static int
4880alpha_ra_ever_killed (void)
4881{
4882  rtx top;
4883
4884  if (!has_hard_reg_initial_val (Pmode, REG_RA))
4885    return regs_ever_live[REG_RA];
4886
4887  push_topmost_sequence ();
4888  top = get_insns ();
4889  pop_topmost_sequence ();
4890
4891  return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
4892}
4893
4894
4895/* Return the trap mode suffix applicable to the current
4896   instruction, or NULL.  */
4897
4898static const char *
4899get_trap_mode_suffix (void)
4900{
4901  enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4902
4903  switch (s)
4904    {
4905    case TRAP_SUFFIX_NONE:
4906      return NULL;
4907
4908    case TRAP_SUFFIX_SU:
4909      if (alpha_fptm >= ALPHA_FPTM_SU)
4910	return "su";
4911      return NULL;
4912
4913    case TRAP_SUFFIX_SUI:
4914      if (alpha_fptm >= ALPHA_FPTM_SUI)
4915	return "sui";
4916      return NULL;
4917
4918    case TRAP_SUFFIX_V_SV:
4919      switch (alpha_fptm)
4920	{
4921	case ALPHA_FPTM_N:
4922	  return NULL;
4923	case ALPHA_FPTM_U:
4924	  return "v";
4925	case ALPHA_FPTM_SU:
4926	case ALPHA_FPTM_SUI:
4927	  return "sv";
4928	default:
4929	  gcc_unreachable ();
4930	}
4931
4932    case TRAP_SUFFIX_V_SV_SVI:
4933      switch (alpha_fptm)
4934	{
4935	case ALPHA_FPTM_N:
4936	  return NULL;
4937	case ALPHA_FPTM_U:
4938	  return "v";
4939	case ALPHA_FPTM_SU:
4940	  return "sv";
4941	case ALPHA_FPTM_SUI:
4942	  return "svi";
4943	default:
4944	  gcc_unreachable ();
4945	}
4946      break;
4947
4948    case TRAP_SUFFIX_U_SU_SUI:
4949      switch (alpha_fptm)
4950	{
4951	case ALPHA_FPTM_N:
4952	  return NULL;
4953	case ALPHA_FPTM_U:
4954	  return "u";
4955	case ALPHA_FPTM_SU:
4956	  return "su";
4957	case ALPHA_FPTM_SUI:
4958	  return "sui";
4959	default:
4960	  gcc_unreachable ();
4961	}
4962      break;
4963
4964    default:
4965      gcc_unreachable ();
4966    }
4967  gcc_unreachable ();
4968}
4969
4970/* Return the rounding mode suffix applicable to the current
4971   instruction, or NULL.  */
4972
4973static const char *
4974get_round_mode_suffix (void)
4975{
4976  enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
4977
4978  switch (s)
4979    {
4980    case ROUND_SUFFIX_NONE:
4981      return NULL;
4982    case ROUND_SUFFIX_NORMAL:
4983      switch (alpha_fprm)
4984	{
4985	case ALPHA_FPRM_NORM:
4986	  return NULL;
4987	case ALPHA_FPRM_MINF:
4988	  return "m";
4989	case ALPHA_FPRM_CHOP:
4990	  return "c";
4991	case ALPHA_FPRM_DYN:
4992	  return "d";
4993	default:
4994	  gcc_unreachable ();
4995	}
4996      break;
4997
4998    case ROUND_SUFFIX_C:
4999      return "c";
5000
5001    default:
5002      gcc_unreachable ();
5003    }
5004  gcc_unreachable ();
5005}
5006
5007/* Locate some local-dynamic symbol still in use by this function
5008   so that we can print its name in some movdi_er_tlsldm pattern.  */
5009
5010static int
5011get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
5012{
5013  rtx x = *px;
5014
5015  if (GET_CODE (x) == SYMBOL_REF
5016      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
5017    {
5018      cfun->machine->some_ld_name = XSTR (x, 0);
5019      return 1;
5020    }
5021
5022  return 0;
5023}
5024
5025static const char *
5026get_some_local_dynamic_name (void)
5027{
5028  rtx insn;
5029
5030  if (cfun->machine->some_ld_name)
5031    return cfun->machine->some_ld_name;
5032
5033  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
5034    if (INSN_P (insn)
5035	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
5036      return cfun->machine->some_ld_name;
5037
5038  gcc_unreachable ();
5039}
5040
5041/* Print an operand.  Recognize special options, documented below.  */
5042
5043void
5044print_operand (FILE *file, rtx x, int code)
5045{
5046  int i;
5047
5048  switch (code)
5049    {
5050    case '~':
5051      /* Print the assembler name of the current function.  */
5052      assemble_name (file, alpha_fnname);
5053      break;
5054
5055    case '&':
5056      assemble_name (file, get_some_local_dynamic_name ());
5057      break;
5058
5059    case '/':
5060      {
5061	const char *trap = get_trap_mode_suffix ();
5062	const char *round = get_round_mode_suffix ();
5063
5064	if (trap || round)
5065	  fprintf (file, (TARGET_AS_SLASH_BEFORE_SUFFIX ? "/%s%s" : "%s%s"),
5066		   (trap ? trap : ""), (round ? round : ""));
5067	break;
5068      }
5069
5070    case ',':
5071      /* Generates single precision instruction suffix.  */
5072      fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5073      break;
5074
5075    case '-':
5076      /* Generates double precision instruction suffix.  */
5077      fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5078      break;
5079
5080    case '+':
5081      /* Generates a nop after a noreturn call at the very end of the
5082	 function.  */
5083      if (next_real_insn (current_output_insn) == 0)
5084	fprintf (file, "\n\tnop");
5085      break;
5086
5087    case '#':
5088      if (alpha_this_literal_sequence_number == 0)
5089	alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5090      fprintf (file, "%d", alpha_this_literal_sequence_number);
5091      break;
5092
5093    case '*':
5094      if (alpha_this_gpdisp_sequence_number == 0)
5095	alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5096      fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5097      break;
5098
5099    case 'H':
5100      if (GET_CODE (x) == HIGH)
5101	output_addr_const (file, XEXP (x, 0));
5102      else
5103	output_operand_lossage ("invalid %%H value");
5104      break;
5105
5106    case 'J':
5107      {
5108	const char *lituse;
5109
5110        if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5111	  {
5112	    x = XVECEXP (x, 0, 0);
5113	    lituse = "lituse_tlsgd";
5114	  }
5115	else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5116	  {
5117	    x = XVECEXP (x, 0, 0);
5118	    lituse = "lituse_tlsldm";
5119	  }
5120	else if (GET_CODE (x) == CONST_INT)
5121	  lituse = "lituse_jsr";
5122	else
5123	  {
5124	    output_operand_lossage ("invalid %%J value");
5125	    break;
5126	  }
5127
5128	if (x != const0_rtx)
5129	  fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5130      }
5131      break;
5132
5133    case 'j':
5134      {
5135	const char *lituse;
5136
5137#ifdef HAVE_AS_JSRDIRECT_RELOCS
5138	lituse = "lituse_jsrdirect";
5139#else
5140	lituse = "lituse_jsr";
5141#endif
5142
5143	gcc_assert (INTVAL (x) != 0);
5144	fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5145      }
5146      break;
5147    case 'r':
5148      /* If this operand is the constant zero, write it as "$31".  */
5149      if (GET_CODE (x) == REG)
5150	fprintf (file, "%s", reg_names[REGNO (x)]);
5151      else if (x == CONST0_RTX (GET_MODE (x)))
5152	fprintf (file, "$31");
5153      else
5154	output_operand_lossage ("invalid %%r value");
5155      break;
5156
5157    case 'R':
5158      /* Similar, but for floating-point.  */
5159      if (GET_CODE (x) == REG)
5160	fprintf (file, "%s", reg_names[REGNO (x)]);
5161      else if (x == CONST0_RTX (GET_MODE (x)))
5162	fprintf (file, "$f31");
5163      else
5164	output_operand_lossage ("invalid %%R value");
5165      break;
5166
5167    case 'N':
5168      /* Write the 1's complement of a constant.  */
5169      if (GET_CODE (x) != CONST_INT)
5170	output_operand_lossage ("invalid %%N value");
5171
5172      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5173      break;
5174
5175    case 'P':
5176      /* Write 1 << C, for a constant C.  */
5177      if (GET_CODE (x) != CONST_INT)
5178	output_operand_lossage ("invalid %%P value");
5179
5180      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
5181      break;
5182
5183    case 'h':
5184      /* Write the high-order 16 bits of a constant, sign-extended.  */
5185      if (GET_CODE (x) != CONST_INT)
5186	output_operand_lossage ("invalid %%h value");
5187
5188      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5189      break;
5190
5191    case 'L':
5192      /* Write the low-order 16 bits of a constant, sign-extended.  */
5193      if (GET_CODE (x) != CONST_INT)
5194	output_operand_lossage ("invalid %%L value");
5195
5196      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5197	       (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5198      break;
5199
5200    case 'm':
5201      /* Write mask for ZAP insn.  */
5202      if (GET_CODE (x) == CONST_DOUBLE)
5203	{
5204	  HOST_WIDE_INT mask = 0;
5205	  HOST_WIDE_INT value;
5206
5207	  value = CONST_DOUBLE_LOW (x);
5208	  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5209	       i++, value >>= 8)
5210	    if (value & 0xff)
5211	      mask |= (1 << i);
5212
5213	  value = CONST_DOUBLE_HIGH (x);
5214	  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5215	       i++, value >>= 8)
5216	    if (value & 0xff)
5217	      mask |= (1 << (i + sizeof (int)));
5218
5219	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
5220	}
5221
5222      else if (GET_CODE (x) == CONST_INT)
5223	{
5224	  HOST_WIDE_INT mask = 0, value = INTVAL (x);
5225
5226	  for (i = 0; i < 8; i++, value >>= 8)
5227	    if (value & 0xff)
5228	      mask |= (1 << i);
5229
5230	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5231	}
5232      else
5233	output_operand_lossage ("invalid %%m value");
5234      break;
5235
5236    case 'M':
5237      /* 'b', 'w', 'l', or 'q' as the value of the constant.  */
5238      if (GET_CODE (x) != CONST_INT
5239	  || (INTVAL (x) != 8 && INTVAL (x) != 16
5240	      && INTVAL (x) != 32 && INTVAL (x) != 64))
5241	output_operand_lossage ("invalid %%M value");
5242
5243      fprintf (file, "%s",
5244	       (INTVAL (x) == 8 ? "b"
5245		: INTVAL (x) == 16 ? "w"
5246		: INTVAL (x) == 32 ? "l"
5247		: "q"));
5248      break;
5249
5250    case 'U':
5251      /* Similar, except do it from the mask.  */
5252      if (GET_CODE (x) == CONST_INT)
5253	{
5254	  HOST_WIDE_INT value = INTVAL (x);
5255
5256	  if (value == 0xff)
5257	    {
5258	      fputc ('b', file);
5259	      break;
5260	    }
5261	  if (value == 0xffff)
5262	    {
5263	      fputc ('w', file);
5264	      break;
5265	    }
5266	  if (value == 0xffffffff)
5267	    {
5268	      fputc ('l', file);
5269	      break;
5270	    }
5271	  if (value == -1)
5272	    {
5273	      fputc ('q', file);
5274	      break;
5275	    }
5276	}
5277      else if (HOST_BITS_PER_WIDE_INT == 32
5278	       && GET_CODE (x) == CONST_DOUBLE
5279	       && CONST_DOUBLE_LOW (x) == 0xffffffff
5280	       && CONST_DOUBLE_HIGH (x) == 0)
5281	{
5282	  fputc ('l', file);
5283	  break;
5284	}
5285      output_operand_lossage ("invalid %%U value");
5286      break;
5287
5288    case 's':
5289      /* Write the constant value divided by 8 for little-endian mode or
5290	 (56 - value) / 8 for big-endian mode.  */
5291
5292      if (GET_CODE (x) != CONST_INT
5293	  || (unsigned HOST_WIDE_INT) INTVAL (x) >= (WORDS_BIG_ENDIAN
5294						     ? 56
5295						     : 64)
5296	  || (INTVAL (x) & 7) != 0)
5297	output_operand_lossage ("invalid %%s value");
5298
5299      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5300	       WORDS_BIG_ENDIAN
5301	       ? (56 - INTVAL (x)) / 8
5302	       : INTVAL (x) / 8);
5303      break;
5304
5305    case 'S':
5306      /* Same, except compute (64 - c) / 8 */
5307
5308      if (GET_CODE (x) != CONST_INT
5309	  && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5310	  && (INTVAL (x) & 7) != 8)
5311	output_operand_lossage ("invalid %%s value");
5312
5313      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5314      break;
5315
5316    case 't':
5317      {
5318        /* On Unicos/Mk systems: use a DEX expression if the symbol
5319	   clashes with a register name.  */
5320	int dex = unicosmk_need_dex (x);
5321	if (dex)
5322	  fprintf (file, "DEX(%d)", dex);
5323	else
5324	  output_addr_const (file, x);
5325      }
5326      break;
5327
5328    case 'C': case 'D': case 'c': case 'd':
5329      /* Write out comparison name.  */
5330      {
5331	enum rtx_code c = GET_CODE (x);
5332
5333        if (!COMPARISON_P (x))
5334	  output_operand_lossage ("invalid %%C value");
5335
5336	else if (code == 'D')
5337	  c = reverse_condition (c);
5338	else if (code == 'c')
5339	  c = swap_condition (c);
5340	else if (code == 'd')
5341	  c = swap_condition (reverse_condition (c));
5342
5343        if (c == LEU)
5344	  fprintf (file, "ule");
5345        else if (c == LTU)
5346	  fprintf (file, "ult");
5347	else if (c == UNORDERED)
5348	  fprintf (file, "un");
5349        else
5350	  fprintf (file, "%s", GET_RTX_NAME (c));
5351      }
5352      break;
5353
5354    case 'E':
5355      /* Write the divide or modulus operator.  */
5356      switch (GET_CODE (x))
5357	{
5358	case DIV:
5359	  fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5360	  break;
5361	case UDIV:
5362	  fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5363	  break;
5364	case MOD:
5365	  fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5366	  break;
5367	case UMOD:
5368	  fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5369	  break;
5370	default:
5371	  output_operand_lossage ("invalid %%E value");
5372	  break;
5373	}
5374      break;
5375
5376    case 'A':
5377      /* Write "_u" for unaligned access.  */
5378      if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == AND)
5379	fprintf (file, "_u");
5380      break;
5381
5382    case 0:
5383      if (GET_CODE (x) == REG)
5384	fprintf (file, "%s", reg_names[REGNO (x)]);
5385      else if (GET_CODE (x) == MEM)
5386	output_address (XEXP (x, 0));
5387      else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5388	{
5389	  switch (XINT (XEXP (x, 0), 1))
5390	    {
5391	    case UNSPEC_DTPREL:
5392	    case UNSPEC_TPREL:
5393	      output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5394	      break;
5395	    default:
5396	      output_operand_lossage ("unknown relocation unspec");
5397	      break;
5398	    }
5399	}
5400      else
5401	output_addr_const (file, x);
5402      break;
5403
5404    default:
5405      output_operand_lossage ("invalid %%xn code");
5406    }
5407}
5408
5409void
5410print_operand_address (FILE *file, rtx addr)
5411{
5412  int basereg = 31;
5413  HOST_WIDE_INT offset = 0;
5414
5415  if (GET_CODE (addr) == AND)
5416    addr = XEXP (addr, 0);
5417
5418  if (GET_CODE (addr) == PLUS
5419      && GET_CODE (XEXP (addr, 1)) == CONST_INT)
5420    {
5421      offset = INTVAL (XEXP (addr, 1));
5422      addr = XEXP (addr, 0);
5423    }
5424
5425  if (GET_CODE (addr) == LO_SUM)
5426    {
5427      const char *reloc16, *reloclo;
5428      rtx op1 = XEXP (addr, 1);
5429
5430      if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5431	{
5432	  op1 = XEXP (op1, 0);
5433	  switch (XINT (op1, 1))
5434	    {
5435	    case UNSPEC_DTPREL:
5436	      reloc16 = NULL;
5437	      reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5438	      break;
5439	    case UNSPEC_TPREL:
5440	      reloc16 = NULL;
5441	      reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5442	      break;
5443	    default:
5444	      output_operand_lossage ("unknown relocation unspec");
5445	      return;
5446	    }
5447
5448	  output_addr_const (file, XVECEXP (op1, 0, 0));
5449	}
5450      else
5451	{
5452	  reloc16 = "gprel";
5453	  reloclo = "gprellow";
5454	  output_addr_const (file, op1);
5455	}
5456
5457      if (offset)
5458	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5459
5460      addr = XEXP (addr, 0);
5461      switch (GET_CODE (addr))
5462	{
5463	case REG:
5464	  basereg = REGNO (addr);
5465	  break;
5466
5467	case SUBREG:
5468	  basereg = subreg_regno (addr);
5469	  break;
5470
5471	default:
5472	  gcc_unreachable ();
5473	}
5474
5475      fprintf (file, "($%d)\t\t!%s", basereg,
5476	       (basereg == 29 ? reloc16 : reloclo));
5477      return;
5478    }
5479
5480  switch (GET_CODE (addr))
5481    {
5482    case REG:
5483      basereg = REGNO (addr);
5484      break;
5485
5486    case SUBREG:
5487      basereg = subreg_regno (addr);
5488      break;
5489
5490    case CONST_INT:
5491      offset = INTVAL (addr);
5492      break;
5493
5494#if TARGET_ABI_OPEN_VMS
5495    case SYMBOL_REF:
5496      fprintf (file, "%s", XSTR (addr, 0));
5497      return;
5498
5499    case CONST:
5500      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5501		  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5502      fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5503	       XSTR (XEXP (XEXP (addr, 0), 0), 0),
5504	       INTVAL (XEXP (XEXP (addr, 0), 1)));
5505      return;
5506
5507#endif
5508    default:
5509      gcc_unreachable ();
5510    }
5511
5512  fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5513}
5514
5515/* Emit RTL insns to initialize the variable parts of a trampoline at
5516   TRAMP. FNADDR is an RTX for the address of the function's pure
5517   code.  CXT is an RTX for the static chain value for the function.
5518
5519   The three offset parameters are for the individual template's
5520   layout.  A JMPOFS < 0 indicates that the trampoline does not
5521   contain instructions at all.
5522
5523   We assume here that a function will be called many more times than
5524   its address is taken (e.g., it might be passed to qsort), so we
5525   take the trouble to initialize the "hint" field in the JMP insn.
5526   Note that the hint field is PC (new) + 4 * bits 13:0.  */
5527
5528void
5529alpha_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt,
5530			     int fnofs, int cxtofs, int jmpofs)
5531{
5532  rtx temp, temp1, addr;
5533  /* VMS really uses DImode pointers in memory at this point.  */
5534  enum machine_mode mode = TARGET_ABI_OPEN_VMS ? Pmode : ptr_mode;
5535
5536#ifdef POINTERS_EXTEND_UNSIGNED
5537  fnaddr = convert_memory_address (mode, fnaddr);
5538  cxt = convert_memory_address (mode, cxt);
5539#endif
5540
5541  /* Store function address and CXT.  */
5542  addr = memory_address (mode, plus_constant (tramp, fnofs));
5543  emit_move_insn (gen_rtx_MEM (mode, addr), fnaddr);
5544  addr = memory_address (mode, plus_constant (tramp, cxtofs));
5545  emit_move_insn (gen_rtx_MEM (mode, addr), cxt);
5546
5547  /* This has been disabled since the hint only has a 32k range, and in
5548     no existing OS is the stack within 32k of the text segment.  */
5549  if (0 && jmpofs >= 0)
5550    {
5551      /* Compute hint value.  */
5552      temp = force_operand (plus_constant (tramp, jmpofs+4), NULL_RTX);
5553      temp = expand_binop (DImode, sub_optab, fnaddr, temp, temp, 1,
5554			   OPTAB_WIDEN);
5555      temp = expand_shift (RSHIFT_EXPR, Pmode, temp,
5556		           build_int_cst (NULL_TREE, 2), NULL_RTX, 1);
5557      temp = expand_and (SImode, gen_lowpart (SImode, temp),
5558			 GEN_INT (0x3fff), 0);
5559
5560      /* Merge in the hint.  */
5561      addr = memory_address (SImode, plus_constant (tramp, jmpofs));
5562      temp1 = force_reg (SImode, gen_rtx_MEM (SImode, addr));
5563      temp1 = expand_and (SImode, temp1, GEN_INT (0xffffc000), NULL_RTX);
5564      temp1 = expand_binop (SImode, ior_optab, temp1, temp, temp1, 1,
5565			    OPTAB_WIDEN);
5566      emit_move_insn (gen_rtx_MEM (SImode, addr), temp1);
5567    }
5568
5569#ifdef ENABLE_EXECUTE_STACK
5570  emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5571		     0, VOIDmode, 1, tramp, Pmode);
5572#endif
5573
5574  if (jmpofs >= 0)
5575    emit_insn (gen_imb ());
5576}
5577
5578/* Determine where to put an argument to a function.
5579   Value is zero to push the argument on the stack,
5580   or a hard register in which to store the argument.
5581
5582   MODE is the argument's machine mode.
5583   TYPE is the data type of the argument (as a tree).
5584    This is null for libcalls where that information may
5585    not be available.
5586   CUM is a variable of type CUMULATIVE_ARGS which gives info about
5587    the preceding args and about the function being called.
5588   NAMED is nonzero if this argument is a named parameter
5589    (otherwise it is an extra parameter matching an ellipsis).
5590
5591   On Alpha the first 6 words of args are normally in registers
5592   and the rest are pushed.  */
5593
5594rtx
5595function_arg (CUMULATIVE_ARGS cum, enum machine_mode mode, tree type,
5596	      int named ATTRIBUTE_UNUSED)
5597{
5598  int basereg;
5599  int num_args;
5600
5601  /* Don't get confused and pass small structures in FP registers.  */
5602  if (type && AGGREGATE_TYPE_P (type))
5603    basereg = 16;
5604  else
5605    {
5606#ifdef ENABLE_CHECKING
5607      /* With alpha_split_complex_arg, we shouldn't see any raw complex
5608	 values here.  */
5609      gcc_assert (!COMPLEX_MODE_P (mode));
5610#endif
5611
5612      /* Set up defaults for FP operands passed in FP registers, and
5613	 integral operands passed in integer registers.  */
5614      if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5615	basereg = 32 + 16;
5616      else
5617	basereg = 16;
5618    }
5619
5620  /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5621     the three platforms, so we can't avoid conditional compilation.  */
5622#if TARGET_ABI_OPEN_VMS
5623    {
5624      if (mode == VOIDmode)
5625	return alpha_arg_info_reg_val (cum);
5626
5627      num_args = cum.num_args;
5628      if (num_args >= 6
5629	  || targetm.calls.must_pass_in_stack (mode, type))
5630	return NULL_RTX;
5631    }
5632#elif TARGET_ABI_UNICOSMK
5633    {
5634      int size;
5635
5636      /* If this is the last argument, generate the call info word (CIW).  */
5637      /* ??? We don't include the caller's line number in the CIW because
5638	 I don't know how to determine it if debug infos are turned off.  */
5639      if (mode == VOIDmode)
5640	{
5641	  int i;
5642	  HOST_WIDE_INT lo;
5643	  HOST_WIDE_INT hi;
5644	  rtx ciw;
5645
5646	  lo = 0;
5647
5648	  for (i = 0; i < cum.num_reg_words && i < 5; i++)
5649	    if (cum.reg_args_type[i])
5650	      lo |= (1 << (7 - i));
5651
5652	  if (cum.num_reg_words == 6 && cum.reg_args_type[5])
5653	    lo |= 7;
5654	  else
5655	    lo |= cum.num_reg_words;
5656
5657#if HOST_BITS_PER_WIDE_INT == 32
5658	  hi = (cum.num_args << 20) | cum.num_arg_words;
5659#else
5660	  lo = lo | ((HOST_WIDE_INT) cum.num_args << 52)
5661	    | ((HOST_WIDE_INT) cum.num_arg_words << 32);
5662	  hi = 0;
5663#endif
5664	  ciw = immed_double_const (lo, hi, DImode);
5665
5666	  return gen_rtx_UNSPEC (DImode, gen_rtvec (1, ciw),
5667				 UNSPEC_UMK_LOAD_CIW);
5668	}
5669
5670      size = ALPHA_ARG_SIZE (mode, type, named);
5671      num_args = cum.num_reg_words;
5672      if (cum.force_stack
5673	  || cum.num_reg_words + size > 6
5674	  || targetm.calls.must_pass_in_stack (mode, type))
5675	return NULL_RTX;
5676      else if (type && TYPE_MODE (type) == BLKmode)
5677	{
5678	  rtx reg1, reg2;
5679
5680	  reg1 = gen_rtx_REG (DImode, num_args + 16);
5681	  reg1 = gen_rtx_EXPR_LIST (DImode, reg1, const0_rtx);
5682
5683	  /* The argument fits in two registers. Note that we still need to
5684	     reserve a register for empty structures.  */
5685	  if (size == 0)
5686	    return NULL_RTX;
5687	  else if (size == 1)
5688	    return gen_rtx_PARALLEL (mode, gen_rtvec (1, reg1));
5689	  else
5690	    {
5691	      reg2 = gen_rtx_REG (DImode, num_args + 17);
5692	      reg2 = gen_rtx_EXPR_LIST (DImode, reg2, GEN_INT (8));
5693	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, reg1, reg2));
5694	    }
5695	}
5696    }
5697#elif TARGET_ABI_OSF
5698    {
5699      if (cum >= 6)
5700	return NULL_RTX;
5701      num_args = cum;
5702
5703      /* VOID is passed as a special flag for "last argument".  */
5704      if (type == void_type_node)
5705	basereg = 16;
5706      else if (targetm.calls.must_pass_in_stack (mode, type))
5707	return NULL_RTX;
5708    }
5709#else
5710#error Unhandled ABI
5711#endif
5712
5713  return gen_rtx_REG (mode, num_args + basereg);
5714}
5715
5716static int
5717alpha_arg_partial_bytes (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5718			 enum machine_mode mode ATTRIBUTE_UNUSED,
5719			 tree type ATTRIBUTE_UNUSED,
5720			 bool named ATTRIBUTE_UNUSED)
5721{
5722  int words = 0;
5723
5724#if TARGET_ABI_OPEN_VMS
5725  if (cum->num_args < 6
5726      && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
5727    words = 6 - cum->num_args;
5728#elif TARGET_ABI_UNICOSMK
5729  /* Never any split arguments.  */
5730#elif TARGET_ABI_OSF
5731  if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
5732    words = 6 - *cum;
5733#else
5734#error Unhandled ABI
5735#endif
5736
5737  return words * UNITS_PER_WORD;
5738}
5739
5740
5741/* Return true if TYPE must be returned in memory, instead of in registers.  */
5742
5743static bool
5744alpha_return_in_memory (tree type, tree fndecl ATTRIBUTE_UNUSED)
5745{
5746  enum machine_mode mode = VOIDmode;
5747  int size;
5748
5749  if (type)
5750    {
5751      mode = TYPE_MODE (type);
5752
5753      /* All aggregates are returned in memory.  */
5754      if (AGGREGATE_TYPE_P (type))
5755	return true;
5756    }
5757
5758  size = GET_MODE_SIZE (mode);
5759  switch (GET_MODE_CLASS (mode))
5760    {
5761    case MODE_VECTOR_FLOAT:
5762      /* Pass all float vectors in memory, like an aggregate.  */
5763      return true;
5764
5765    case MODE_COMPLEX_FLOAT:
5766      /* We judge complex floats on the size of their element,
5767	 not the size of the whole type.  */
5768      size = GET_MODE_UNIT_SIZE (mode);
5769      break;
5770
5771    case MODE_INT:
5772    case MODE_FLOAT:
5773    case MODE_COMPLEX_INT:
5774    case MODE_VECTOR_INT:
5775      break;
5776
5777    default:
5778      /* ??? We get called on all sorts of random stuff from
5779	 aggregate_value_p.  We must return something, but it's not
5780	 clear what's safe to return.  Pretend it's a struct I
5781	 guess.  */
5782      return true;
5783    }
5784
5785  /* Otherwise types must fit in one register.  */
5786  return size > UNITS_PER_WORD;
5787}
5788
5789/* Return true if TYPE should be passed by invisible reference.  */
5790
5791static bool
5792alpha_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5793			 enum machine_mode mode,
5794			 tree type ATTRIBUTE_UNUSED,
5795			 bool named ATTRIBUTE_UNUSED)
5796{
5797  return mode == TFmode || mode == TCmode;
5798}
5799
5800/* Define how to find the value returned by a function.  VALTYPE is the
5801   data type of the value (as a tree).  If the precise function being
5802   called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5803   MODE is set instead of VALTYPE for libcalls.
5804
5805   On Alpha the value is found in $0 for integer functions and
5806   $f0 for floating-point functions.  */
5807
5808rtx
5809function_value (tree valtype, tree func ATTRIBUTE_UNUSED,
5810		enum machine_mode mode)
5811{
5812  unsigned int regnum, dummy;
5813  enum mode_class class;
5814
5815  gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5816
5817  if (valtype)
5818    mode = TYPE_MODE (valtype);
5819
5820  class = GET_MODE_CLASS (mode);
5821  switch (class)
5822    {
5823    case MODE_INT:
5824      PROMOTE_MODE (mode, dummy, valtype);
5825      /* FALLTHRU */
5826
5827    case MODE_COMPLEX_INT:
5828    case MODE_VECTOR_INT:
5829      regnum = 0;
5830      break;
5831
5832    case MODE_FLOAT:
5833      regnum = 32;
5834      break;
5835
5836    case MODE_COMPLEX_FLOAT:
5837      {
5838	enum machine_mode cmode = GET_MODE_INNER (mode);
5839
5840	return gen_rtx_PARALLEL
5841	  (VOIDmode,
5842	   gen_rtvec (2,
5843		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5844				         const0_rtx),
5845		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5846				         GEN_INT (GET_MODE_SIZE (cmode)))));
5847      }
5848
5849    default:
5850      gcc_unreachable ();
5851    }
5852
5853  return gen_rtx_REG (mode, regnum);
5854}
5855
5856/* TCmode complex values are passed by invisible reference.  We
5857   should not split these values.  */
5858
5859static bool
5860alpha_split_complex_arg (tree type)
5861{
5862  return TYPE_MODE (type) != TCmode;
5863}
5864
5865static tree
5866alpha_build_builtin_va_list (void)
5867{
5868  tree base, ofs, space, record, type_decl;
5869
5870  if (TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK)
5871    return ptr_type_node;
5872
5873  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5874  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5875  TREE_CHAIN (record) = type_decl;
5876  TYPE_NAME (record) = type_decl;
5877
5878  /* C++? SET_IS_AGGR_TYPE (record, 1); */
5879
5880  /* Dummy field to prevent alignment warnings.  */
5881  space = build_decl (FIELD_DECL, NULL_TREE, integer_type_node);
5882  DECL_FIELD_CONTEXT (space) = record;
5883  DECL_ARTIFICIAL (space) = 1;
5884  DECL_IGNORED_P (space) = 1;
5885
5886  ofs = build_decl (FIELD_DECL, get_identifier ("__offset"),
5887		    integer_type_node);
5888  DECL_FIELD_CONTEXT (ofs) = record;
5889  TREE_CHAIN (ofs) = space;
5890
5891  base = build_decl (FIELD_DECL, get_identifier ("__base"),
5892		     ptr_type_node);
5893  DECL_FIELD_CONTEXT (base) = record;
5894  TREE_CHAIN (base) = ofs;
5895
5896  TYPE_FIELDS (record) = base;
5897  layout_type (record);
5898
5899  va_list_gpr_counter_field = ofs;
5900  return record;
5901}
5902
5903#if TARGET_ABI_OSF
5904/* Helper function for alpha_stdarg_optimize_hook.  Skip over casts
5905   and constant additions.  */
5906
5907static tree
5908va_list_skip_additions (tree lhs)
5909{
5910  tree rhs, stmt;
5911
5912  if (TREE_CODE (lhs) != SSA_NAME)
5913    return lhs;
5914
5915  for (;;)
5916    {
5917      stmt = SSA_NAME_DEF_STMT (lhs);
5918
5919      if (TREE_CODE (stmt) == PHI_NODE)
5920	return stmt;
5921
5922      if (TREE_CODE (stmt) != MODIFY_EXPR
5923	  || TREE_OPERAND (stmt, 0) != lhs)
5924	return lhs;
5925
5926      rhs = TREE_OPERAND (stmt, 1);
5927      if (TREE_CODE (rhs) == WITH_SIZE_EXPR)
5928	rhs = TREE_OPERAND (rhs, 0);
5929
5930      if ((TREE_CODE (rhs) != NOP_EXPR
5931	   && TREE_CODE (rhs) != CONVERT_EXPR
5932	   && (TREE_CODE (rhs) != PLUS_EXPR
5933	       || TREE_CODE (TREE_OPERAND (rhs, 1)) != INTEGER_CST
5934	       || !host_integerp (TREE_OPERAND (rhs, 1), 1)))
5935	  || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5936	return rhs;
5937
5938      lhs = TREE_OPERAND (rhs, 0);
5939    }
5940}
5941
5942/* Check if LHS = RHS statement is
5943   LHS = *(ap.__base + ap.__offset + cst)
5944   or
5945   LHS = *(ap.__base
5946	   + ((ap.__offset + cst <= 47)
5947	      ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5948   If the former, indicate that GPR registers are needed,
5949   if the latter, indicate that FPR registers are needed.
5950   On alpha, cfun->va_list_gpr_size is used as size of the needed
5951   regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if
5952   GPR registers are needed and bit 1 set if FPR registers are needed.
5953   Return true if va_list references should not be scanned for the current
5954   statement.  */
5955
5956static bool
5957alpha_stdarg_optimize_hook (struct stdarg_info *si, tree lhs, tree rhs)
5958{
5959  tree base, offset, arg1, arg2;
5960  int offset_arg = 1;
5961
5962  if (TREE_CODE (rhs) != INDIRECT_REF
5963      || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5964    return false;
5965
5966  lhs = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5967  if (lhs == NULL_TREE
5968      || TREE_CODE (lhs) != PLUS_EXPR)
5969    return false;
5970
5971  base = TREE_OPERAND (lhs, 0);
5972  if (TREE_CODE (base) == SSA_NAME)
5973    base = va_list_skip_additions (base);
5974
5975  if (TREE_CODE (base) != COMPONENT_REF
5976      || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5977    {
5978      base = TREE_OPERAND (lhs, 0);
5979      if (TREE_CODE (base) == SSA_NAME)
5980	base = va_list_skip_additions (base);
5981
5982      if (TREE_CODE (base) != COMPONENT_REF
5983	  || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5984	return false;
5985
5986      offset_arg = 0;
5987    }
5988
5989  base = get_base_address (base);
5990  if (TREE_CODE (base) != VAR_DECL
5991      || !bitmap_bit_p (si->va_list_vars, DECL_UID (base)))
5992    return false;
5993
5994  offset = TREE_OPERAND (lhs, offset_arg);
5995  if (TREE_CODE (offset) == SSA_NAME)
5996    offset = va_list_skip_additions (offset);
5997
5998  if (TREE_CODE (offset) == PHI_NODE)
5999    {
6000      HOST_WIDE_INT sub;
6001
6002      if (PHI_NUM_ARGS (offset) != 2)
6003	goto escapes;
6004
6005      arg1 = va_list_skip_additions (PHI_ARG_DEF (offset, 0));
6006      arg2 = va_list_skip_additions (PHI_ARG_DEF (offset, 1));
6007      if (TREE_CODE (arg2) != MINUS_EXPR && TREE_CODE (arg2) != PLUS_EXPR)
6008	{
6009	  tree tem = arg1;
6010	  arg1 = arg2;
6011	  arg2 = tem;
6012
6013	  if (TREE_CODE (arg2) != MINUS_EXPR && TREE_CODE (arg2) != PLUS_EXPR)
6014	    goto escapes;
6015	}
6016      if (!host_integerp (TREE_OPERAND (arg2, 1), 0))
6017	goto escapes;
6018
6019      sub = tree_low_cst (TREE_OPERAND (arg2, 1), 0);
6020      if (TREE_CODE (arg2) == MINUS_EXPR)
6021	sub = -sub;
6022      if (sub < -48 || sub > -32)
6023	goto escapes;
6024
6025      arg2 = va_list_skip_additions (TREE_OPERAND (arg2, 0));
6026      if (arg1 != arg2)
6027	goto escapes;
6028
6029      if (TREE_CODE (arg1) == SSA_NAME)
6030	arg1 = va_list_skip_additions (arg1);
6031
6032      if (TREE_CODE (arg1) != COMPONENT_REF
6033	  || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6034	  || get_base_address (arg1) != base)
6035	goto escapes;
6036
6037      /* Need floating point regs.  */
6038      cfun->va_list_fpr_size |= 2;
6039    }
6040  else if (TREE_CODE (offset) != COMPONENT_REF
6041	   || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6042	   || get_base_address (offset) != base)
6043    goto escapes;
6044  else
6045    /* Need general regs.  */
6046    cfun->va_list_fpr_size |= 1;
6047  return false;
6048
6049escapes:
6050  si->va_list_escapes = true;
6051  return false;
6052}
6053#endif
6054
6055/* Perform any needed actions needed for a function that is receiving a
6056   variable number of arguments.  */
6057
6058static void
6059alpha_setup_incoming_varargs (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
6060			      tree type, int *pretend_size, int no_rtl)
6061{
6062  CUMULATIVE_ARGS cum = *pcum;
6063
6064  /* Skip the current argument.  */
6065  FUNCTION_ARG_ADVANCE (cum, mode, type, 1);
6066
6067#if TARGET_ABI_UNICOSMK
6068  /* On Unicos/Mk, the standard subroutine __T3E_MISMATCH stores all register
6069     arguments on the stack. Unfortunately, it doesn't always store the first
6070     one (i.e. the one that arrives in $16 or $f16). This is not a problem
6071     with stdargs as we always have at least one named argument there.  */
6072  if (cum.num_reg_words < 6)
6073    {
6074      if (!no_rtl)
6075	{
6076	  emit_insn (gen_umk_mismatch_args (GEN_INT (cum.num_reg_words)));
6077	  emit_insn (gen_arg_home_umk ());
6078	}
6079      *pretend_size = 0;
6080    }
6081#elif TARGET_ABI_OPEN_VMS
6082  /* For VMS, we allocate space for all 6 arg registers plus a count.
6083
6084     However, if NO registers need to be saved, don't allocate any space.
6085     This is not only because we won't need the space, but because AP
6086     includes the current_pretend_args_size and we don't want to mess up
6087     any ap-relative addresses already made.  */
6088  if (cum.num_args < 6)
6089    {
6090      if (!no_rtl)
6091	{
6092	  emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6093	  emit_insn (gen_arg_home ());
6094	}
6095      *pretend_size = 7 * UNITS_PER_WORD;
6096    }
6097#else
6098  /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6099     only push those that are remaining.  However, if NO registers need to
6100     be saved, don't allocate any space.  This is not only because we won't
6101     need the space, but because AP includes the current_pretend_args_size
6102     and we don't want to mess up any ap-relative addresses already made.
6103
6104     If we are not to use the floating-point registers, save the integer
6105     registers where we would put the floating-point registers.  This is
6106     not the most efficient way to implement varargs with just one register
6107     class, but it isn't worth doing anything more efficient in this rare
6108     case.  */
6109  if (cum >= 6)
6110    return;
6111
6112  if (!no_rtl)
6113    {
6114      int count, set = get_varargs_alias_set ();
6115      rtx tmp;
6116
6117      count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6118      if (count > 6 - cum)
6119	count = 6 - cum;
6120
6121      /* Detect whether integer registers or floating-point registers
6122	 are needed by the detected va_arg statements.  See above for
6123	 how these values are computed.  Note that the "escape" value
6124	 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6125	 these bits set.  */
6126      gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6127
6128      if (cfun->va_list_fpr_size & 1)
6129	{
6130	  tmp = gen_rtx_MEM (BLKmode,
6131			     plus_constant (virtual_incoming_args_rtx,
6132					    (cum + 6) * UNITS_PER_WORD));
6133	  MEM_NOTRAP_P (tmp) = 1;
6134	  set_mem_alias_set (tmp, set);
6135	  move_block_from_reg (16 + cum, tmp, count);
6136	}
6137
6138      if (cfun->va_list_fpr_size & 2)
6139	{
6140	  tmp = gen_rtx_MEM (BLKmode,
6141			     plus_constant (virtual_incoming_args_rtx,
6142					    cum * UNITS_PER_WORD));
6143	  MEM_NOTRAP_P (tmp) = 1;
6144	  set_mem_alias_set (tmp, set);
6145	  move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6146	}
6147     }
6148  *pretend_size = 12 * UNITS_PER_WORD;
6149#endif
6150}
6151
6152void
6153alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6154{
6155  HOST_WIDE_INT offset;
6156  tree t, offset_field, base_field;
6157
6158  if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6159    return;
6160
6161  if (TARGET_ABI_UNICOSMK)
6162    std_expand_builtin_va_start (valist, nextarg);
6163
6164  /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6165     up by 48, storing fp arg registers in the first 48 bytes, and the
6166     integer arg registers in the next 48 bytes.  This is only done,
6167     however, if any integer registers need to be stored.
6168
6169     If no integer registers need be stored, then we must subtract 48
6170     in order to account for the integer arg registers which are counted
6171     in argsize above, but which are not actually stored on the stack.
6172     Must further be careful here about structures straddling the last
6173     integer argument register; that futzes with pretend_args_size,
6174     which changes the meaning of AP.  */
6175
6176  if (NUM_ARGS < 6)
6177    offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6178  else
6179    offset = -6 * UNITS_PER_WORD + current_function_pretend_args_size;
6180
6181  if (TARGET_ABI_OPEN_VMS)
6182    {
6183      nextarg = plus_constant (nextarg, offset);
6184      nextarg = plus_constant (nextarg, NUM_ARGS * UNITS_PER_WORD);
6185      t = build (MODIFY_EXPR, TREE_TYPE (valist), valist,
6186		 make_tree (ptr_type_node, nextarg));
6187      TREE_SIDE_EFFECTS (t) = 1;
6188
6189      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6190    }
6191  else
6192    {
6193      base_field = TYPE_FIELDS (TREE_TYPE (valist));
6194      offset_field = TREE_CHAIN (base_field);
6195
6196      base_field = build (COMPONENT_REF, TREE_TYPE (base_field),
6197			  valist, base_field, NULL_TREE);
6198      offset_field = build (COMPONENT_REF, TREE_TYPE (offset_field),
6199			    valist, offset_field, NULL_TREE);
6200
6201      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6202      t = build (PLUS_EXPR, ptr_type_node, t,
6203		 build_int_cst (NULL_TREE, offset));
6204      t = build (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6205      TREE_SIDE_EFFECTS (t) = 1;
6206      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6207
6208      t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6209      t = build (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6210      TREE_SIDE_EFFECTS (t) = 1;
6211      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6212    }
6213}
6214
6215static tree
6216alpha_gimplify_va_arg_1 (tree type, tree base, tree offset, tree *pre_p)
6217{
6218  tree type_size, ptr_type, addend, t, addr, internal_post;
6219
6220  /* If the type could not be passed in registers, skip the block
6221     reserved for the registers.  */
6222  if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6223    {
6224      t = build_int_cst (TREE_TYPE (offset), 6*8);
6225      t = build (MODIFY_EXPR, TREE_TYPE (offset), offset,
6226		 build (MAX_EXPR, TREE_TYPE (offset), offset, t));
6227      gimplify_and_add (t, pre_p);
6228    }
6229
6230  addend = offset;
6231  ptr_type = build_pointer_type (type);
6232
6233  if (TREE_CODE (type) == COMPLEX_TYPE)
6234    {
6235      tree real_part, imag_part, real_temp;
6236
6237      real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6238					   offset, pre_p);
6239
6240      /* Copy the value into a new temporary, lest the formal temporary
6241	 be reused out from under us.  */
6242      real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6243
6244      imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6245					   offset, pre_p);
6246
6247      return build (COMPLEX_EXPR, type, real_temp, imag_part);
6248    }
6249  else if (TREE_CODE (type) == REAL_TYPE)
6250    {
6251      tree fpaddend, cond, fourtyeight;
6252
6253      fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6254      fpaddend = fold (build (MINUS_EXPR, TREE_TYPE (addend),
6255			      addend, fourtyeight));
6256      cond = fold (build (LT_EXPR, boolean_type_node, addend, fourtyeight));
6257      addend = fold (build (COND_EXPR, TREE_TYPE (addend), cond,
6258			    fpaddend, addend));
6259    }
6260
6261  /* Build the final address and force that value into a temporary.  */
6262  addr = build (PLUS_EXPR, ptr_type, fold_convert (ptr_type, base),
6263	        fold_convert (ptr_type, addend));
6264  internal_post = NULL;
6265  gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6266  append_to_statement_list (internal_post, pre_p);
6267
6268  /* Update the offset field.  */
6269  type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6270  if (type_size == NULL || TREE_OVERFLOW (type_size))
6271    t = size_zero_node;
6272  else
6273    {
6274      t = size_binop (PLUS_EXPR, type_size, size_int (7));
6275      t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6276      t = size_binop (MULT_EXPR, t, size_int (8));
6277    }
6278  t = fold_convert (TREE_TYPE (offset), t);
6279  t = build (MODIFY_EXPR, void_type_node, offset,
6280	     build (PLUS_EXPR, TREE_TYPE (offset), offset, t));
6281  gimplify_and_add (t, pre_p);
6282
6283  return build_va_arg_indirect_ref (addr);
6284}
6285
6286static tree
6287alpha_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
6288{
6289  tree offset_field, base_field, offset, base, t, r;
6290  bool indirect;
6291
6292  if (TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK)
6293    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6294
6295  base_field = TYPE_FIELDS (va_list_type_node);
6296  offset_field = TREE_CHAIN (base_field);
6297  base_field = build (COMPONENT_REF, TREE_TYPE (base_field),
6298		      valist, base_field, NULL_TREE);
6299  offset_field = build (COMPONENT_REF, TREE_TYPE (offset_field),
6300			valist, offset_field, NULL_TREE);
6301
6302  /* Pull the fields of the structure out into temporaries.  Since we never
6303     modify the base field, we can use a formal temporary.  Sign-extend the
6304     offset field so that it's the proper width for pointer arithmetic.  */
6305  base = get_formal_tmp_var (base_field, pre_p);
6306
6307  t = fold_convert (lang_hooks.types.type_for_size (64, 0), offset_field);
6308  offset = get_initialized_tmp_var (t, pre_p, NULL);
6309
6310  indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6311  if (indirect)
6312    type = build_pointer_type (type);
6313
6314  /* Find the value.  Note that this will be a stable indirection, or
6315     a composite of stable indirections in the case of complex.  */
6316  r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6317
6318  /* Stuff the offset temporary back into its field.  */
6319  t = build (MODIFY_EXPR, void_type_node, offset_field,
6320	     fold_convert (TREE_TYPE (offset_field), offset));
6321  gimplify_and_add (t, pre_p);
6322
6323  if (indirect)
6324    r = build_va_arg_indirect_ref (r);
6325
6326  return r;
6327}
6328
6329/* Builtins.  */
6330
6331enum alpha_builtin
6332{
6333  ALPHA_BUILTIN_CMPBGE,
6334  ALPHA_BUILTIN_EXTBL,
6335  ALPHA_BUILTIN_EXTWL,
6336  ALPHA_BUILTIN_EXTLL,
6337  ALPHA_BUILTIN_EXTQL,
6338  ALPHA_BUILTIN_EXTWH,
6339  ALPHA_BUILTIN_EXTLH,
6340  ALPHA_BUILTIN_EXTQH,
6341  ALPHA_BUILTIN_INSBL,
6342  ALPHA_BUILTIN_INSWL,
6343  ALPHA_BUILTIN_INSLL,
6344  ALPHA_BUILTIN_INSQL,
6345  ALPHA_BUILTIN_INSWH,
6346  ALPHA_BUILTIN_INSLH,
6347  ALPHA_BUILTIN_INSQH,
6348  ALPHA_BUILTIN_MSKBL,
6349  ALPHA_BUILTIN_MSKWL,
6350  ALPHA_BUILTIN_MSKLL,
6351  ALPHA_BUILTIN_MSKQL,
6352  ALPHA_BUILTIN_MSKWH,
6353  ALPHA_BUILTIN_MSKLH,
6354  ALPHA_BUILTIN_MSKQH,
6355  ALPHA_BUILTIN_UMULH,
6356  ALPHA_BUILTIN_ZAP,
6357  ALPHA_BUILTIN_ZAPNOT,
6358  ALPHA_BUILTIN_AMASK,
6359  ALPHA_BUILTIN_IMPLVER,
6360  ALPHA_BUILTIN_RPCC,
6361  ALPHA_BUILTIN_THREAD_POINTER,
6362  ALPHA_BUILTIN_SET_THREAD_POINTER,
6363
6364  /* TARGET_MAX */
6365  ALPHA_BUILTIN_MINUB8,
6366  ALPHA_BUILTIN_MINSB8,
6367  ALPHA_BUILTIN_MINUW4,
6368  ALPHA_BUILTIN_MINSW4,
6369  ALPHA_BUILTIN_MAXUB8,
6370  ALPHA_BUILTIN_MAXSB8,
6371  ALPHA_BUILTIN_MAXUW4,
6372  ALPHA_BUILTIN_MAXSW4,
6373  ALPHA_BUILTIN_PERR,
6374  ALPHA_BUILTIN_PKLB,
6375  ALPHA_BUILTIN_PKWB,
6376  ALPHA_BUILTIN_UNPKBL,
6377  ALPHA_BUILTIN_UNPKBW,
6378
6379  /* TARGET_CIX */
6380  ALPHA_BUILTIN_CTTZ,
6381  ALPHA_BUILTIN_CTLZ,
6382  ALPHA_BUILTIN_CTPOP,
6383
6384  ALPHA_BUILTIN_max
6385};
6386
6387static unsigned int const code_for_builtin[ALPHA_BUILTIN_max] = {
6388  CODE_FOR_builtin_cmpbge,
6389  CODE_FOR_builtin_extbl,
6390  CODE_FOR_builtin_extwl,
6391  CODE_FOR_builtin_extll,
6392  CODE_FOR_builtin_extql,
6393  CODE_FOR_builtin_extwh,
6394  CODE_FOR_builtin_extlh,
6395  CODE_FOR_builtin_extqh,
6396  CODE_FOR_builtin_insbl,
6397  CODE_FOR_builtin_inswl,
6398  CODE_FOR_builtin_insll,
6399  CODE_FOR_builtin_insql,
6400  CODE_FOR_builtin_inswh,
6401  CODE_FOR_builtin_inslh,
6402  CODE_FOR_builtin_insqh,
6403  CODE_FOR_builtin_mskbl,
6404  CODE_FOR_builtin_mskwl,
6405  CODE_FOR_builtin_mskll,
6406  CODE_FOR_builtin_mskql,
6407  CODE_FOR_builtin_mskwh,
6408  CODE_FOR_builtin_msklh,
6409  CODE_FOR_builtin_mskqh,
6410  CODE_FOR_umuldi3_highpart,
6411  CODE_FOR_builtin_zap,
6412  CODE_FOR_builtin_zapnot,
6413  CODE_FOR_builtin_amask,
6414  CODE_FOR_builtin_implver,
6415  CODE_FOR_builtin_rpcc,
6416  CODE_FOR_load_tp,
6417  CODE_FOR_set_tp,
6418
6419  /* TARGET_MAX */
6420  CODE_FOR_builtin_minub8,
6421  CODE_FOR_builtin_minsb8,
6422  CODE_FOR_builtin_minuw4,
6423  CODE_FOR_builtin_minsw4,
6424  CODE_FOR_builtin_maxub8,
6425  CODE_FOR_builtin_maxsb8,
6426  CODE_FOR_builtin_maxuw4,
6427  CODE_FOR_builtin_maxsw4,
6428  CODE_FOR_builtin_perr,
6429  CODE_FOR_builtin_pklb,
6430  CODE_FOR_builtin_pkwb,
6431  CODE_FOR_builtin_unpkbl,
6432  CODE_FOR_builtin_unpkbw,
6433
6434  /* TARGET_CIX */
6435  CODE_FOR_ctzdi2,
6436  CODE_FOR_clzdi2,
6437  CODE_FOR_popcountdi2
6438};
6439
6440struct alpha_builtin_def
6441{
6442  const char *name;
6443  enum alpha_builtin code;
6444  unsigned int target_mask;
6445  bool is_const;
6446};
6447
6448static struct alpha_builtin_def const zero_arg_builtins[] = {
6449  { "__builtin_alpha_implver",	ALPHA_BUILTIN_IMPLVER,	0, true },
6450  { "__builtin_alpha_rpcc",	ALPHA_BUILTIN_RPCC,	0, false }
6451};
6452
6453static struct alpha_builtin_def const one_arg_builtins[] = {
6454  { "__builtin_alpha_amask",	ALPHA_BUILTIN_AMASK,	0, true },
6455  { "__builtin_alpha_pklb",	ALPHA_BUILTIN_PKLB,	MASK_MAX, true },
6456  { "__builtin_alpha_pkwb",	ALPHA_BUILTIN_PKWB,	MASK_MAX, true },
6457  { "__builtin_alpha_unpkbl",	ALPHA_BUILTIN_UNPKBL,	MASK_MAX, true },
6458  { "__builtin_alpha_unpkbw",	ALPHA_BUILTIN_UNPKBW,	MASK_MAX, true },
6459  { "__builtin_alpha_cttz",	ALPHA_BUILTIN_CTTZ,	MASK_CIX, true },
6460  { "__builtin_alpha_ctlz",	ALPHA_BUILTIN_CTLZ,	MASK_CIX, true },
6461  { "__builtin_alpha_ctpop",	ALPHA_BUILTIN_CTPOP,	MASK_CIX, true }
6462};
6463
6464static struct alpha_builtin_def const two_arg_builtins[] = {
6465  { "__builtin_alpha_cmpbge",	ALPHA_BUILTIN_CMPBGE,	0, true },
6466  { "__builtin_alpha_extbl",	ALPHA_BUILTIN_EXTBL,	0, true },
6467  { "__builtin_alpha_extwl",	ALPHA_BUILTIN_EXTWL,	0, true },
6468  { "__builtin_alpha_extll",	ALPHA_BUILTIN_EXTLL,	0, true },
6469  { "__builtin_alpha_extql",	ALPHA_BUILTIN_EXTQL,	0, true },
6470  { "__builtin_alpha_extwh",	ALPHA_BUILTIN_EXTWH,	0, true },
6471  { "__builtin_alpha_extlh",	ALPHA_BUILTIN_EXTLH,	0, true },
6472  { "__builtin_alpha_extqh",	ALPHA_BUILTIN_EXTQH,	0, true },
6473  { "__builtin_alpha_insbl",	ALPHA_BUILTIN_INSBL,	0, true },
6474  { "__builtin_alpha_inswl",	ALPHA_BUILTIN_INSWL,	0, true },
6475  { "__builtin_alpha_insll",	ALPHA_BUILTIN_INSLL,	0, true },
6476  { "__builtin_alpha_insql",	ALPHA_BUILTIN_INSQL,	0, true },
6477  { "__builtin_alpha_inswh",	ALPHA_BUILTIN_INSWH,	0, true },
6478  { "__builtin_alpha_inslh",	ALPHA_BUILTIN_INSLH,	0, true },
6479  { "__builtin_alpha_insqh",	ALPHA_BUILTIN_INSQH,	0, true },
6480  { "__builtin_alpha_mskbl",	ALPHA_BUILTIN_MSKBL,	0, true },
6481  { "__builtin_alpha_mskwl",	ALPHA_BUILTIN_MSKWL,	0, true },
6482  { "__builtin_alpha_mskll",	ALPHA_BUILTIN_MSKLL,	0, true },
6483  { "__builtin_alpha_mskql",	ALPHA_BUILTIN_MSKQL,	0, true },
6484  { "__builtin_alpha_mskwh",	ALPHA_BUILTIN_MSKWH,	0, true },
6485  { "__builtin_alpha_msklh",	ALPHA_BUILTIN_MSKLH,	0, true },
6486  { "__builtin_alpha_mskqh",	ALPHA_BUILTIN_MSKQH,	0, true },
6487  { "__builtin_alpha_umulh",	ALPHA_BUILTIN_UMULH,	0, true },
6488  { "__builtin_alpha_zap",	ALPHA_BUILTIN_ZAP,	0, true },
6489  { "__builtin_alpha_zapnot",	ALPHA_BUILTIN_ZAPNOT,	0, true },
6490  { "__builtin_alpha_minub8",	ALPHA_BUILTIN_MINUB8,	MASK_MAX, true },
6491  { "__builtin_alpha_minsb8",	ALPHA_BUILTIN_MINSB8,	MASK_MAX, true },
6492  { "__builtin_alpha_minuw4",	ALPHA_BUILTIN_MINUW4,	MASK_MAX, true },
6493  { "__builtin_alpha_minsw4",	ALPHA_BUILTIN_MINSW4,	MASK_MAX, true },
6494  { "__builtin_alpha_maxub8",	ALPHA_BUILTIN_MAXUB8,	MASK_MAX, true },
6495  { "__builtin_alpha_maxsb8",	ALPHA_BUILTIN_MAXSB8,	MASK_MAX, true },
6496  { "__builtin_alpha_maxuw4",	ALPHA_BUILTIN_MAXUW4,	MASK_MAX, true },
6497  { "__builtin_alpha_maxsw4",	ALPHA_BUILTIN_MAXSW4,	MASK_MAX, true },
6498  { "__builtin_alpha_perr",	ALPHA_BUILTIN_PERR,	MASK_MAX, true }
6499};
6500
6501static GTY(()) tree alpha_v8qi_u;
6502static GTY(()) tree alpha_v8qi_s;
6503static GTY(()) tree alpha_v4hi_u;
6504static GTY(()) tree alpha_v4hi_s;
6505
6506static void
6507alpha_init_builtins (void)
6508{
6509  const struct alpha_builtin_def *p;
6510  tree dimode_integer_type_node;
6511  tree ftype, attrs[2];
6512  size_t i;
6513
6514  dimode_integer_type_node = lang_hooks.types.type_for_mode (DImode, 0);
6515
6516  attrs[0] = tree_cons (get_identifier ("nothrow"), NULL, NULL);
6517  attrs[1] = tree_cons (get_identifier ("const"), NULL, attrs[0]);
6518
6519  ftype = build_function_type (dimode_integer_type_node, void_list_node);
6520
6521  p = zero_arg_builtins;
6522  for (i = 0; i < ARRAY_SIZE (zero_arg_builtins); ++i, ++p)
6523    if ((target_flags & p->target_mask) == p->target_mask)
6524      lang_hooks.builtin_function (p->name, ftype, p->code, BUILT_IN_MD,
6525				   NULL, attrs[p->is_const]);
6526
6527  ftype = build_function_type_list (dimode_integer_type_node,
6528				    dimode_integer_type_node, NULL_TREE);
6529
6530  p = one_arg_builtins;
6531  for (i = 0; i < ARRAY_SIZE (one_arg_builtins); ++i, ++p)
6532    if ((target_flags & p->target_mask) == p->target_mask)
6533      lang_hooks.builtin_function (p->name, ftype, p->code, BUILT_IN_MD,
6534				   NULL, attrs[p->is_const]);
6535
6536  ftype = build_function_type_list (dimode_integer_type_node,
6537				    dimode_integer_type_node,
6538				    dimode_integer_type_node, NULL_TREE);
6539
6540  p = two_arg_builtins;
6541  for (i = 0; i < ARRAY_SIZE (two_arg_builtins); ++i, ++p)
6542    if ((target_flags & p->target_mask) == p->target_mask)
6543      lang_hooks.builtin_function (p->name, ftype, p->code, BUILT_IN_MD,
6544				   NULL, attrs[p->is_const]);
6545
6546  ftype = build_function_type (ptr_type_node, void_list_node);
6547  lang_hooks.builtin_function ("__builtin_thread_pointer", ftype,
6548			       ALPHA_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
6549			       NULL, attrs[0]);
6550
6551  ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
6552  lang_hooks.builtin_function ("__builtin_set_thread_pointer", ftype,
6553			       ALPHA_BUILTIN_SET_THREAD_POINTER, BUILT_IN_MD,
6554			       NULL, attrs[0]);
6555
6556  alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6557  alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6558  alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6559  alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6560}
6561
6562/* Expand an expression EXP that calls a built-in function,
6563   with result going to TARGET if that's convenient
6564   (and in mode MODE if that's convenient).
6565   SUBTARGET may be used as the target for computing one of EXP's operands.
6566   IGNORE is nonzero if the value is to be ignored.  */
6567
6568static rtx
6569alpha_expand_builtin (tree exp, rtx target,
6570		      rtx subtarget ATTRIBUTE_UNUSED,
6571		      enum machine_mode mode ATTRIBUTE_UNUSED,
6572		      int ignore ATTRIBUTE_UNUSED)
6573{
6574#define MAX_ARGS 2
6575
6576  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
6577  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6578  tree arglist = TREE_OPERAND (exp, 1);
6579  enum insn_code icode;
6580  rtx op[MAX_ARGS], pat;
6581  int arity;
6582  bool nonvoid;
6583
6584  if (fcode >= ALPHA_BUILTIN_max)
6585    internal_error ("bad builtin fcode");
6586  icode = code_for_builtin[fcode];
6587  if (icode == 0)
6588    internal_error ("bad builtin fcode");
6589
6590  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6591
6592  for (arglist = TREE_OPERAND (exp, 1), arity = 0;
6593       arglist;
6594       arglist = TREE_CHAIN (arglist), arity++)
6595    {
6596      const struct insn_operand_data *insn_op;
6597
6598      tree arg = TREE_VALUE (arglist);
6599      if (arg == error_mark_node)
6600	return NULL_RTX;
6601      if (arity > MAX_ARGS)
6602	return NULL_RTX;
6603
6604      insn_op = &insn_data[icode].operand[arity + nonvoid];
6605
6606      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, 0);
6607
6608      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6609	op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6610    }
6611
6612  if (nonvoid)
6613    {
6614      enum machine_mode tmode = insn_data[icode].operand[0].mode;
6615      if (!target
6616	  || GET_MODE (target) != tmode
6617	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6618	target = gen_reg_rtx (tmode);
6619    }
6620
6621  switch (arity)
6622    {
6623    case 0:
6624      pat = GEN_FCN (icode) (target);
6625      break;
6626    case 1:
6627      if (nonvoid)
6628        pat = GEN_FCN (icode) (target, op[0]);
6629      else
6630	pat = GEN_FCN (icode) (op[0]);
6631      break;
6632    case 2:
6633      pat = GEN_FCN (icode) (target, op[0], op[1]);
6634      break;
6635    default:
6636      gcc_unreachable ();
6637    }
6638  if (!pat)
6639    return NULL_RTX;
6640  emit_insn (pat);
6641
6642  if (nonvoid)
6643    return target;
6644  else
6645    return const0_rtx;
6646}
6647
6648
6649/* Several bits below assume HWI >= 64 bits.  This should be enforced
6650   by config.gcc.  */
6651#if HOST_BITS_PER_WIDE_INT < 64
6652# error "HOST_WIDE_INT too small"
6653#endif
6654
6655/* Fold the builtin for the CMPBGE instruction.  This is a vector comparison
6656   with an 8 bit output vector.  OPINT contains the integer operands; bit N
6657   of OP_CONST is set if OPINT[N] is valid.  */
6658
6659static tree
6660alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6661{
6662  if (op_const == 3)
6663    {
6664      int i, val;
6665      for (i = 0, val = 0; i < 8; ++i)
6666	{
6667	  unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6668	  unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6669	  if (c0 >= c1)
6670	    val |= 1 << i;
6671	}
6672      return build_int_cst (long_integer_type_node, val);
6673    }
6674  else if (op_const == 2 && opint[1] == 0)
6675    return build_int_cst (long_integer_type_node, 0xff);
6676  return NULL;
6677}
6678
6679/* Fold the builtin for the ZAPNOT instruction.  This is essentially a
6680   specialized form of an AND operation.  Other byte manipulation instructions
6681   are defined in terms of this instruction, so this is also used as a
6682   subroutine for other builtins.
6683
6684   OP contains the tree operands; OPINT contains the extracted integer values.
6685   Bit N of OP_CONST it set if OPINT[N] is valid.  OP may be null if only
6686   OPINT may be considered.  */
6687
6688static tree
6689alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6690			   long op_const)
6691{
6692  if (op_const & 2)
6693    {
6694      unsigned HOST_WIDE_INT mask = 0;
6695      int i;
6696
6697      for (i = 0; i < 8; ++i)
6698	if ((opint[1] >> i) & 1)
6699	  mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6700
6701      if (op_const & 1)
6702	return build_int_cst (long_integer_type_node, opint[0] & mask);
6703
6704      if (op)
6705	return fold (build2 (BIT_AND_EXPR, long_integer_type_node, op[0],
6706			     build_int_cst (long_integer_type_node, mask)));
6707    }
6708  else if ((op_const & 1) && opint[0] == 0)
6709    return build_int_cst (long_integer_type_node, 0);
6710  return NULL;
6711}
6712
6713/* Fold the builtins for the EXT family of instructions.  */
6714
6715static tree
6716alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6717			  long op_const, unsigned HOST_WIDE_INT bytemask,
6718			  bool is_high)
6719{
6720  long zap_const = 2;
6721  tree *zap_op = NULL;
6722
6723  if (op_const & 2)
6724    {
6725      unsigned HOST_WIDE_INT loc;
6726
6727      loc = opint[1] & 7;
6728      if (BYTES_BIG_ENDIAN)
6729        loc ^= 7;
6730      loc *= 8;
6731
6732      if (loc != 0)
6733	{
6734	  if (op_const & 1)
6735	    {
6736	      unsigned HOST_WIDE_INT temp = opint[0];
6737	      if (is_high)
6738		temp <<= loc;
6739	      else
6740		temp >>= loc;
6741	      opint[0] = temp;
6742	      zap_const = 3;
6743	    }
6744	}
6745      else
6746	zap_op = op;
6747    }
6748
6749  opint[1] = bytemask;
6750  return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6751}
6752
6753/* Fold the builtins for the INS family of instructions.  */
6754
6755static tree
6756alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6757			  long op_const, unsigned HOST_WIDE_INT bytemask,
6758			  bool is_high)
6759{
6760  if ((op_const & 1) && opint[0] == 0)
6761    return build_int_cst (long_integer_type_node, 0);
6762
6763  if (op_const & 2)
6764    {
6765      unsigned HOST_WIDE_INT temp, loc, byteloc;
6766      tree *zap_op = NULL;
6767
6768      loc = opint[1] & 7;
6769      if (BYTES_BIG_ENDIAN)
6770        loc ^= 7;
6771      bytemask <<= loc;
6772
6773      temp = opint[0];
6774      if (is_high)
6775	{
6776	  byteloc = (64 - (loc * 8)) & 0x3f;
6777	  if (byteloc == 0)
6778	    zap_op = op;
6779	  else
6780	    temp >>= byteloc;
6781	  bytemask >>= 8;
6782	}
6783      else
6784	{
6785	  byteloc = loc * 8;
6786	  if (byteloc == 0)
6787	    zap_op = op;
6788	  else
6789	    temp <<= byteloc;
6790	}
6791
6792      opint[0] = temp;
6793      opint[1] = bytemask;
6794      return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6795    }
6796
6797  return NULL;
6798}
6799
6800static tree
6801alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6802			  long op_const, unsigned HOST_WIDE_INT bytemask,
6803			  bool is_high)
6804{
6805  if (op_const & 2)
6806    {
6807      unsigned HOST_WIDE_INT loc;
6808
6809      loc = opint[1] & 7;
6810      if (BYTES_BIG_ENDIAN)
6811        loc ^= 7;
6812      bytemask <<= loc;
6813
6814      if (is_high)
6815	bytemask >>= 8;
6816
6817      opint[1] = bytemask ^ 0xff;
6818    }
6819
6820  return alpha_fold_builtin_zapnot (op, opint, op_const);
6821}
6822
6823static tree
6824alpha_fold_builtin_umulh (unsigned HOST_WIDE_INT opint[], long op_const)
6825{
6826  switch (op_const)
6827    {
6828    case 3:
6829      {
6830	unsigned HOST_WIDE_INT l;
6831	HOST_WIDE_INT h;
6832
6833	mul_double (opint[0], 0, opint[1], 0, &l, &h);
6834
6835#if HOST_BITS_PER_WIDE_INT > 64
6836# error fixme
6837#endif
6838
6839	return build_int_cst (long_integer_type_node, h);
6840      }
6841
6842    case 1:
6843      opint[1] = opint[0];
6844      /* FALLTHRU */
6845    case 2:
6846      /* Note that (X*1) >> 64 == 0.  */
6847      if (opint[1] == 0 || opint[1] == 1)
6848	return build_int_cst (long_integer_type_node, 0);
6849      break;
6850    }
6851  return NULL;
6852}
6853
6854static tree
6855alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6856{
6857  tree op0 = fold_convert (vtype, op[0]);
6858  tree op1 = fold_convert (vtype, op[1]);
6859  tree val = fold (build2 (code, vtype, op0, op1));
6860  return fold_convert (long_integer_type_node, val);
6861}
6862
6863static tree
6864alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6865{
6866  unsigned HOST_WIDE_INT temp = 0;
6867  int i;
6868
6869  if (op_const != 3)
6870    return NULL;
6871
6872  for (i = 0; i < 8; ++i)
6873    {
6874      unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6875      unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6876      if (a >= b)
6877	temp += a - b;
6878      else
6879	temp += b - a;
6880    }
6881
6882  return build_int_cst (long_integer_type_node, temp);
6883}
6884
6885static tree
6886alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6887{
6888  unsigned HOST_WIDE_INT temp;
6889
6890  if (op_const == 0)
6891    return NULL;
6892
6893  temp = opint[0] & 0xff;
6894  temp |= (opint[0] >> 24) & 0xff00;
6895
6896  return build_int_cst (long_integer_type_node, temp);
6897}
6898
6899static tree
6900alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6901{
6902  unsigned HOST_WIDE_INT temp;
6903
6904  if (op_const == 0)
6905    return NULL;
6906
6907  temp = opint[0] & 0xff;
6908  temp |= (opint[0] >>  8) & 0xff00;
6909  temp |= (opint[0] >> 16) & 0xff0000;
6910  temp |= (opint[0] >> 24) & 0xff000000;
6911
6912  return build_int_cst (long_integer_type_node, temp);
6913}
6914
6915static tree
6916alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6917{
6918  unsigned HOST_WIDE_INT temp;
6919
6920  if (op_const == 0)
6921    return NULL;
6922
6923  temp = opint[0] & 0xff;
6924  temp |= (opint[0] & 0xff00) << 24;
6925
6926  return build_int_cst (long_integer_type_node, temp);
6927}
6928
6929static tree
6930alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6931{
6932  unsigned HOST_WIDE_INT temp;
6933
6934  if (op_const == 0)
6935    return NULL;
6936
6937  temp = opint[0] & 0xff;
6938  temp |= (opint[0] & 0x0000ff00) << 8;
6939  temp |= (opint[0] & 0x00ff0000) << 16;
6940  temp |= (opint[0] & 0xff000000) << 24;
6941
6942  return build_int_cst (long_integer_type_node, temp);
6943}
6944
6945static tree
6946alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6947{
6948  unsigned HOST_WIDE_INT temp;
6949
6950  if (op_const == 0)
6951    return NULL;
6952
6953  if (opint[0] == 0)
6954    temp = 64;
6955  else
6956    temp = exact_log2 (opint[0] & -opint[0]);
6957
6958  return build_int_cst (long_integer_type_node, temp);
6959}
6960
6961static tree
6962alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6963{
6964  unsigned HOST_WIDE_INT temp;
6965
6966  if (op_const == 0)
6967    return NULL;
6968
6969  if (opint[0] == 0)
6970    temp = 64;
6971  else
6972    temp = 64 - floor_log2 (opint[0]) - 1;
6973
6974  return build_int_cst (long_integer_type_node, temp);
6975}
6976
6977static tree
6978alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
6979{
6980  unsigned HOST_WIDE_INT temp, op;
6981
6982  if (op_const == 0)
6983    return NULL;
6984
6985  op = opint[0];
6986  temp = 0;
6987  while (op)
6988    temp++, op &= op - 1;
6989
6990  return build_int_cst (long_integer_type_node, temp);
6991}
6992
6993/* Fold one of our builtin functions.  */
6994
6995static tree
6996alpha_fold_builtin (tree fndecl, tree arglist, bool ignore ATTRIBUTE_UNUSED)
6997{
6998  tree op[MAX_ARGS], t;
6999  unsigned HOST_WIDE_INT opint[MAX_ARGS];
7000  long op_const = 0, arity = 0;
7001
7002  for (t = arglist; t ; t = TREE_CHAIN (t), ++arity)
7003    {
7004      tree arg = TREE_VALUE (t);
7005      if (arg == error_mark_node)
7006	return NULL;
7007      if (arity >= MAX_ARGS)
7008	return NULL;
7009
7010      op[arity] = arg;
7011      opint[arity] = 0;
7012      if (TREE_CODE (arg) == INTEGER_CST)
7013	{
7014          op_const |= 1L << arity;
7015	  opint[arity] = int_cst_value (arg);
7016	}
7017    }
7018
7019  switch (DECL_FUNCTION_CODE (fndecl))
7020    {
7021    case ALPHA_BUILTIN_CMPBGE:
7022      return alpha_fold_builtin_cmpbge (opint, op_const);
7023
7024    case ALPHA_BUILTIN_EXTBL:
7025      return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7026    case ALPHA_BUILTIN_EXTWL:
7027      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7028    case ALPHA_BUILTIN_EXTLL:
7029      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7030    case ALPHA_BUILTIN_EXTQL:
7031      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7032    case ALPHA_BUILTIN_EXTWH:
7033      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7034    case ALPHA_BUILTIN_EXTLH:
7035      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7036    case ALPHA_BUILTIN_EXTQH:
7037      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7038
7039    case ALPHA_BUILTIN_INSBL:
7040      return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7041    case ALPHA_BUILTIN_INSWL:
7042      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7043    case ALPHA_BUILTIN_INSLL:
7044      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7045    case ALPHA_BUILTIN_INSQL:
7046      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7047    case ALPHA_BUILTIN_INSWH:
7048      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7049    case ALPHA_BUILTIN_INSLH:
7050      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7051    case ALPHA_BUILTIN_INSQH:
7052      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7053
7054    case ALPHA_BUILTIN_MSKBL:
7055      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7056    case ALPHA_BUILTIN_MSKWL:
7057      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7058    case ALPHA_BUILTIN_MSKLL:
7059      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7060    case ALPHA_BUILTIN_MSKQL:
7061      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7062    case ALPHA_BUILTIN_MSKWH:
7063      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7064    case ALPHA_BUILTIN_MSKLH:
7065      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7066    case ALPHA_BUILTIN_MSKQH:
7067      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7068
7069    case ALPHA_BUILTIN_UMULH:
7070      return alpha_fold_builtin_umulh (opint, op_const);
7071
7072    case ALPHA_BUILTIN_ZAP:
7073      opint[1] ^= 0xff;
7074      /* FALLTHRU */
7075    case ALPHA_BUILTIN_ZAPNOT:
7076      return alpha_fold_builtin_zapnot (op, opint, op_const);
7077
7078    case ALPHA_BUILTIN_MINUB8:
7079      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7080    case ALPHA_BUILTIN_MINSB8:
7081      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7082    case ALPHA_BUILTIN_MINUW4:
7083      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7084    case ALPHA_BUILTIN_MINSW4:
7085      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7086    case ALPHA_BUILTIN_MAXUB8:
7087      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7088    case ALPHA_BUILTIN_MAXSB8:
7089      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7090    case ALPHA_BUILTIN_MAXUW4:
7091      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7092    case ALPHA_BUILTIN_MAXSW4:
7093      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7094
7095    case ALPHA_BUILTIN_PERR:
7096      return alpha_fold_builtin_perr (opint, op_const);
7097    case ALPHA_BUILTIN_PKLB:
7098      return alpha_fold_builtin_pklb (opint, op_const);
7099    case ALPHA_BUILTIN_PKWB:
7100      return alpha_fold_builtin_pkwb (opint, op_const);
7101    case ALPHA_BUILTIN_UNPKBL:
7102      return alpha_fold_builtin_unpkbl (opint, op_const);
7103    case ALPHA_BUILTIN_UNPKBW:
7104      return alpha_fold_builtin_unpkbw (opint, op_const);
7105
7106    case ALPHA_BUILTIN_CTTZ:
7107      return alpha_fold_builtin_cttz (opint, op_const);
7108    case ALPHA_BUILTIN_CTLZ:
7109      return alpha_fold_builtin_ctlz (opint, op_const);
7110    case ALPHA_BUILTIN_CTPOP:
7111      return alpha_fold_builtin_ctpop (opint, op_const);
7112
7113    case ALPHA_BUILTIN_AMASK:
7114    case ALPHA_BUILTIN_IMPLVER:
7115    case ALPHA_BUILTIN_RPCC:
7116    case ALPHA_BUILTIN_THREAD_POINTER:
7117    case ALPHA_BUILTIN_SET_THREAD_POINTER:
7118      /* None of these are foldable at compile-time.  */
7119    default:
7120      return NULL;
7121    }
7122}
7123
7124/* This page contains routines that are used to determine what the function
7125   prologue and epilogue code will do and write them out.  */
7126
7127/* Compute the size of the save area in the stack.  */
7128
7129/* These variables are used for communication between the following functions.
7130   They indicate various things about the current function being compiled
7131   that are used to tell what kind of prologue, epilogue and procedure
7132   descriptor to generate.  */
7133
7134/* Nonzero if we need a stack procedure.  */
7135enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7136static enum alpha_procedure_types alpha_procedure_type;
7137
7138/* Register number (either FP or SP) that is used to unwind the frame.  */
7139static int vms_unwind_regno;
7140
7141/* Register number used to save FP.  We need not have one for RA since
7142   we don't modify it for register procedures.  This is only defined
7143   for register frame procedures.  */
7144static int vms_save_fp_regno;
7145
7146/* Register number used to reference objects off our PV.  */
7147static int vms_base_regno;
7148
7149/* Compute register masks for saved registers.  */
7150
7151static void
7152alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7153{
7154  unsigned long imask = 0;
7155  unsigned long fmask = 0;
7156  unsigned int i;
7157
7158  /* When outputting a thunk, we don't have valid register life info,
7159     but assemble_start_function wants to output .frame and .mask
7160     directives.  */
7161  if (current_function_is_thunk)
7162    {
7163      *imaskP = 0;
7164      *fmaskP = 0;
7165      return;
7166    }
7167
7168  if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7169    imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7170
7171  /* One for every register we have to save.  */
7172  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7173    if (! fixed_regs[i] && ! call_used_regs[i]
7174	&& regs_ever_live[i] && i != REG_RA
7175	&& (!TARGET_ABI_UNICOSMK || i != HARD_FRAME_POINTER_REGNUM))
7176      {
7177	if (i < 32)
7178	  imask |= (1UL << i);
7179	else
7180	  fmask |= (1UL << (i - 32));
7181      }
7182
7183  /* We need to restore these for the handler.  */
7184  if (current_function_calls_eh_return)
7185    {
7186      for (i = 0; ; ++i)
7187	{
7188	  unsigned regno = EH_RETURN_DATA_REGNO (i);
7189	  if (regno == INVALID_REGNUM)
7190	    break;
7191	  imask |= 1UL << regno;
7192	}
7193    }
7194
7195  /* If any register spilled, then spill the return address also.  */
7196  /* ??? This is required by the Digital stack unwind specification
7197     and isn't needed if we're doing Dwarf2 unwinding.  */
7198  if (imask || fmask || alpha_ra_ever_killed ())
7199    imask |= (1UL << REG_RA);
7200
7201  *imaskP = imask;
7202  *fmaskP = fmask;
7203}
7204
7205int
7206alpha_sa_size (void)
7207{
7208  unsigned long mask[2];
7209  int sa_size = 0;
7210  int i, j;
7211
7212  alpha_sa_mask (&mask[0], &mask[1]);
7213
7214  if (TARGET_ABI_UNICOSMK)
7215    {
7216      if (mask[0] || mask[1])
7217	sa_size = 14;
7218    }
7219  else
7220    {
7221      for (j = 0; j < 2; ++j)
7222	for (i = 0; i < 32; ++i)
7223	  if ((mask[j] >> i) & 1)
7224	    sa_size++;
7225    }
7226
7227  if (TARGET_ABI_UNICOSMK)
7228    {
7229      /* We might not need to generate a frame if we don't make any calls
7230	 (including calls to __T3E_MISMATCH if this is a vararg function),
7231	 don't have any local variables which require stack slots, don't
7232	 use alloca and have not determined that we need a frame for other
7233	 reasons.  */
7234
7235      alpha_procedure_type
7236	= (sa_size || get_frame_size() != 0
7237	   || current_function_outgoing_args_size
7238	   || current_function_stdarg || current_function_calls_alloca
7239	   || frame_pointer_needed)
7240	  ? PT_STACK : PT_REGISTER;
7241
7242      /* Always reserve space for saving callee-saved registers if we
7243	 need a frame as required by the calling convention.  */
7244      if (alpha_procedure_type == PT_STACK)
7245        sa_size = 14;
7246    }
7247  else if (TARGET_ABI_OPEN_VMS)
7248    {
7249      /* Start by assuming we can use a register procedure if we don't
7250	 make any calls (REG_RA not used) or need to save any
7251	 registers and a stack procedure if we do.  */
7252      if ((mask[0] >> REG_RA) & 1)
7253	alpha_procedure_type = PT_STACK;
7254      else if (get_frame_size() != 0)
7255	alpha_procedure_type = PT_REGISTER;
7256      else
7257	alpha_procedure_type = PT_NULL;
7258
7259      /* Don't reserve space for saving FP & RA yet.  Do that later after we've
7260	 made the final decision on stack procedure vs register procedure.  */
7261      if (alpha_procedure_type == PT_STACK)
7262	sa_size -= 2;
7263
7264      /* Decide whether to refer to objects off our PV via FP or PV.
7265	 If we need FP for something else or if we receive a nonlocal
7266	 goto (which expects PV to contain the value), we must use PV.
7267	 Otherwise, start by assuming we can use FP.  */
7268
7269      vms_base_regno
7270	= (frame_pointer_needed
7271	   || current_function_has_nonlocal_label
7272	   || alpha_procedure_type == PT_STACK
7273	   || current_function_outgoing_args_size)
7274	  ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7275
7276      /* If we want to copy PV into FP, we need to find some register
7277	 in which to save FP.  */
7278
7279      vms_save_fp_regno = -1;
7280      if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7281	for (i = 0; i < 32; i++)
7282	  if (! fixed_regs[i] && call_used_regs[i] && ! regs_ever_live[i])
7283	    vms_save_fp_regno = i;
7284
7285      if (vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7286	vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7287      else if (alpha_procedure_type == PT_NULL)
7288	vms_base_regno = REG_PV;
7289
7290      /* Stack unwinding should be done via FP unless we use it for PV.  */
7291      vms_unwind_regno = (vms_base_regno == REG_PV
7292			  ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7293
7294      /* If this is a stack procedure, allow space for saving FP and RA.  */
7295      if (alpha_procedure_type == PT_STACK)
7296	sa_size += 2;
7297    }
7298  else
7299    {
7300      /* Our size must be even (multiple of 16 bytes).  */
7301      if (sa_size & 1)
7302	sa_size++;
7303    }
7304
7305  return sa_size * 8;
7306}
7307
7308/* Define the offset between two registers, one to be eliminated,
7309   and the other its replacement, at the start of a routine.  */
7310
7311HOST_WIDE_INT
7312alpha_initial_elimination_offset (unsigned int from,
7313				  unsigned int to ATTRIBUTE_UNUSED)
7314{
7315  HOST_WIDE_INT ret;
7316
7317  ret = alpha_sa_size ();
7318  ret += ALPHA_ROUND (current_function_outgoing_args_size);
7319
7320  switch (from)
7321    {
7322    case FRAME_POINTER_REGNUM:
7323      break;
7324
7325    case ARG_POINTER_REGNUM:
7326      ret += (ALPHA_ROUND (get_frame_size ()
7327			   + current_function_pretend_args_size)
7328	      - current_function_pretend_args_size);
7329      break;
7330
7331    default:
7332      gcc_unreachable ();
7333    }
7334
7335  return ret;
7336}
7337
7338int
7339alpha_pv_save_size (void)
7340{
7341  alpha_sa_size ();
7342  return alpha_procedure_type == PT_STACK ? 8 : 0;
7343}
7344
7345int
7346alpha_using_fp (void)
7347{
7348  alpha_sa_size ();
7349  return vms_unwind_regno == HARD_FRAME_POINTER_REGNUM;
7350}
7351
7352#if TARGET_ABI_OPEN_VMS
7353
7354const struct attribute_spec vms_attribute_table[] =
7355{
7356  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7357  { "overlaid",   0, 0, true,  false, false, NULL },
7358  { "global",     0, 0, true,  false, false, NULL },
7359  { "initialize", 0, 0, true,  false, false, NULL },
7360  { NULL,         0, 0, false, false, false, NULL }
7361};
7362
7363#endif
7364
7365static int
7366find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED)
7367{
7368  return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx;
7369}
7370
7371int
7372alpha_find_lo_sum_using_gp (rtx insn)
7373{
7374  return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0;
7375}
7376
7377static int
7378alpha_does_function_need_gp (void)
7379{
7380  rtx insn;
7381
7382  /* The GP being variable is an OSF abi thing.  */
7383  if (! TARGET_ABI_OSF)
7384    return 0;
7385
7386  /* We need the gp to load the address of __mcount.  */
7387  if (TARGET_PROFILING_NEEDS_GP && current_function_profile)
7388    return 1;
7389
7390  /* The code emitted by alpha_output_mi_thunk_osf uses the gp.  */
7391  if (current_function_is_thunk)
7392    return 1;
7393
7394  /* The nonlocal receiver pattern assumes that the gp is valid for
7395     the nested function.  Reasonable because it's almost always set
7396     correctly already.  For the cases where that's wrong, make sure
7397     the nested function loads its gp on entry.  */
7398  if (current_function_has_nonlocal_goto)
7399    return 1;
7400
7401  /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7402     Even if we are a static function, we still need to do this in case
7403     our address is taken and passed to something like qsort.  */
7404
7405  push_topmost_sequence ();
7406  insn = get_insns ();
7407  pop_topmost_sequence ();
7408
7409  for (; insn; insn = NEXT_INSN (insn))
7410    if (INSN_P (insn)
7411	&& GET_CODE (PATTERN (insn)) != USE
7412	&& GET_CODE (PATTERN (insn)) != CLOBBER
7413	&& get_attr_usegp (insn))
7414      return 1;
7415
7416  return 0;
7417}
7418
7419
7420/* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7421   sequences.  */
7422
7423static rtx
7424set_frame_related_p (void)
7425{
7426  rtx seq = get_insns ();
7427  rtx insn;
7428
7429  end_sequence ();
7430
7431  if (!seq)
7432    return NULL_RTX;
7433
7434  if (INSN_P (seq))
7435    {
7436      insn = seq;
7437      while (insn != NULL_RTX)
7438	{
7439	  RTX_FRAME_RELATED_P (insn) = 1;
7440	  insn = NEXT_INSN (insn);
7441	}
7442      seq = emit_insn (seq);
7443    }
7444  else
7445    {
7446      seq = emit_insn (seq);
7447      RTX_FRAME_RELATED_P (seq) = 1;
7448    }
7449  return seq;
7450}
7451
7452#define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
7453
7454/* Generates a store with the proper unwind info attached.  VALUE is
7455   stored at BASE_REG+BASE_OFS.  If FRAME_BIAS is nonzero, then BASE_REG
7456   contains SP+FRAME_BIAS, and that is the unwind info that should be
7457   generated.  If FRAME_REG != VALUE, then VALUE is being stored on
7458   behalf of FRAME_REG, and FRAME_REG should be present in the unwind.  */
7459
7460static void
7461emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7462		    HOST_WIDE_INT base_ofs, rtx frame_reg)
7463{
7464  rtx addr, mem, insn;
7465
7466  addr = plus_constant (base_reg, base_ofs);
7467  mem = gen_rtx_MEM (DImode, addr);
7468  set_mem_alias_set (mem, alpha_sr_alias_set);
7469
7470  insn = emit_move_insn (mem, value);
7471  RTX_FRAME_RELATED_P (insn) = 1;
7472
7473  if (frame_bias || value != frame_reg)
7474    {
7475      if (frame_bias)
7476	{
7477	  addr = plus_constant (stack_pointer_rtx, frame_bias + base_ofs);
7478	  mem = gen_rtx_MEM (DImode, addr);
7479	}
7480
7481      REG_NOTES (insn)
7482	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7483			     gen_rtx_SET (VOIDmode, mem, frame_reg),
7484			     REG_NOTES (insn));
7485    }
7486}
7487
7488static void
7489emit_frame_store (unsigned int regno, rtx base_reg,
7490		  HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7491{
7492  rtx reg = gen_rtx_REG (DImode, regno);
7493  emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7494}
7495
7496/* Write function prologue.  */
7497
7498/* On vms we have two kinds of functions:
7499
7500   - stack frame (PROC_STACK)
7501	these are 'normal' functions with local vars and which are
7502	calling other functions
7503   - register frame (PROC_REGISTER)
7504	keeps all data in registers, needs no stack
7505
7506   We must pass this to the assembler so it can generate the
7507   proper pdsc (procedure descriptor)
7508   This is done with the '.pdesc' command.
7509
7510   On not-vms, we don't really differentiate between the two, as we can
7511   simply allocate stack without saving registers.  */
7512
7513void
7514alpha_expand_prologue (void)
7515{
7516  /* Registers to save.  */
7517  unsigned long imask = 0;
7518  unsigned long fmask = 0;
7519  /* Stack space needed for pushing registers clobbered by us.  */
7520  HOST_WIDE_INT sa_size;
7521  /* Complete stack size needed.  */
7522  HOST_WIDE_INT frame_size;
7523  /* Offset from base reg to register save area.  */
7524  HOST_WIDE_INT reg_offset;
7525  rtx sa_reg;
7526  int i;
7527
7528  sa_size = alpha_sa_size ();
7529
7530  frame_size = get_frame_size ();
7531  if (TARGET_ABI_OPEN_VMS)
7532    frame_size = ALPHA_ROUND (sa_size
7533			      + (alpha_procedure_type == PT_STACK ? 8 : 0)
7534			      + frame_size
7535			      + current_function_pretend_args_size);
7536  else if (TARGET_ABI_UNICOSMK)
7537    /* We have to allocate space for the DSIB if we generate a frame.  */
7538    frame_size = ALPHA_ROUND (sa_size
7539			      + (alpha_procedure_type == PT_STACK ? 48 : 0))
7540		 + ALPHA_ROUND (frame_size
7541				+ current_function_outgoing_args_size);
7542  else
7543    frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
7544		  + sa_size
7545		  + ALPHA_ROUND (frame_size
7546				 + current_function_pretend_args_size));
7547
7548  if (TARGET_ABI_OPEN_VMS)
7549    reg_offset = 8;
7550  else
7551    reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
7552
7553  alpha_sa_mask (&imask, &fmask);
7554
7555  /* Emit an insn to reload GP, if needed.  */
7556  if (TARGET_ABI_OSF)
7557    {
7558      alpha_function_needs_gp = alpha_does_function_need_gp ();
7559      if (alpha_function_needs_gp)
7560	emit_insn (gen_prologue_ldgp ());
7561    }
7562
7563  /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7564     the call to mcount ourselves, rather than having the linker do it
7565     magically in response to -pg.  Since _mcount has special linkage,
7566     don't represent the call as a call.  */
7567  if (TARGET_PROFILING_NEEDS_GP && current_function_profile)
7568    emit_insn (gen_prologue_mcount ());
7569
7570  if (TARGET_ABI_UNICOSMK)
7571    unicosmk_gen_dsib (&imask);
7572
7573  /* Adjust the stack by the frame size.  If the frame size is > 4096
7574     bytes, we need to be sure we probe somewhere in the first and last
7575     4096 bytes (we can probably get away without the latter test) and
7576     every 8192 bytes in between.  If the frame size is > 32768, we
7577     do this in a loop.  Otherwise, we generate the explicit probe
7578     instructions.
7579
7580     Note that we are only allowed to adjust sp once in the prologue.  */
7581
7582  if (frame_size <= 32768)
7583    {
7584      if (frame_size > 4096)
7585	{
7586	  int probed;
7587
7588	  for (probed = 4096; probed < frame_size; probed += 8192)
7589	    emit_insn (gen_probe_stack (GEN_INT (TARGET_ABI_UNICOSMK
7590						 ? -probed + 64
7591						 : -probed)));
7592
7593	  /* We only have to do this probe if we aren't saving registers.  */
7594	  if (sa_size == 0 && frame_size > probed - 4096)
7595	    emit_insn (gen_probe_stack (GEN_INT (-frame_size)));
7596	}
7597
7598      if (frame_size != 0)
7599	FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7600				    GEN_INT (TARGET_ABI_UNICOSMK
7601					     ? -frame_size + 64
7602					     : -frame_size))));
7603    }
7604  else
7605    {
7606      /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7607	 number of 8192 byte blocks to probe.  We then probe each block
7608	 in the loop and then set SP to the proper location.  If the
7609	 amount remaining is > 4096, we have to do one more probe if we
7610	 are not saving any registers.  */
7611
7612      HOST_WIDE_INT blocks = (frame_size + 4096) / 8192;
7613      HOST_WIDE_INT leftover = frame_size + 4096 - blocks * 8192;
7614      rtx ptr = gen_rtx_REG (DImode, 22);
7615      rtx count = gen_rtx_REG (DImode, 23);
7616      rtx seq;
7617
7618      emit_move_insn (count, GEN_INT (blocks));
7619      emit_insn (gen_adddi3 (ptr, stack_pointer_rtx,
7620			     GEN_INT (TARGET_ABI_UNICOSMK ? 4096 - 64 : 4096)));
7621
7622      /* Because of the difficulty in emitting a new basic block this
7623	 late in the compilation, generate the loop as a single insn.  */
7624      emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7625
7626      if (leftover > 4096 && sa_size == 0)
7627	{
7628	  rtx last = gen_rtx_MEM (DImode, plus_constant (ptr, -leftover));
7629	  MEM_VOLATILE_P (last) = 1;
7630	  emit_move_insn (last, const0_rtx);
7631	}
7632
7633      if (TARGET_ABI_WINDOWS_NT)
7634	{
7635	  /* For NT stack unwind (done by 'reverse execution'), it's
7636	     not OK to take the result of a loop, even though the value
7637	     is already in ptr, so we reload it via a single operation
7638	     and subtract it to sp.
7639
7640	     Yes, that's correct -- we have to reload the whole constant
7641	     into a temporary via ldah+lda then subtract from sp.  */
7642
7643	  HOST_WIDE_INT lo, hi;
7644	  lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7645	  hi = frame_size - lo;
7646
7647	  emit_move_insn (ptr, GEN_INT (hi));
7648	  emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7649	  seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7650				       ptr));
7651	}
7652      else
7653	{
7654	  seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7655				       GEN_INT (-leftover)));
7656	}
7657
7658      /* This alternative is special, because the DWARF code cannot
7659         possibly intuit through the loop above.  So we invent this
7660         note it looks at instead.  */
7661      RTX_FRAME_RELATED_P (seq) = 1;
7662      REG_NOTES (seq)
7663        = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7664			     gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7665			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7666					     GEN_INT (TARGET_ABI_UNICOSMK
7667						      ? -frame_size + 64
7668						      : -frame_size))),
7669			     REG_NOTES (seq));
7670    }
7671
7672  if (!TARGET_ABI_UNICOSMK)
7673    {
7674      HOST_WIDE_INT sa_bias = 0;
7675
7676      /* Cope with very large offsets to the register save area.  */
7677      sa_reg = stack_pointer_rtx;
7678      if (reg_offset + sa_size > 0x8000)
7679	{
7680	  int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7681	  rtx sa_bias_rtx;
7682
7683	  if (low + sa_size <= 0x8000)
7684	    sa_bias = reg_offset - low, reg_offset = low;
7685	  else
7686	    sa_bias = reg_offset, reg_offset = 0;
7687
7688	  sa_reg = gen_rtx_REG (DImode, 24);
7689	  sa_bias_rtx = GEN_INT (sa_bias);
7690
7691	  if (add_operand (sa_bias_rtx, DImode))
7692	    emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7693	  else
7694	    {
7695	      emit_move_insn (sa_reg, sa_bias_rtx);
7696	      emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7697	    }
7698	}
7699
7700      /* Save regs in stack order.  Beginning with VMS PV.  */
7701      if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7702	emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7703
7704      /* Save register RA next.  */
7705      if (imask & (1UL << REG_RA))
7706	{
7707	  emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7708	  imask &= ~(1UL << REG_RA);
7709	  reg_offset += 8;
7710	}
7711
7712      /* Now save any other registers required to be saved.  */
7713      for (i = 0; i < 31; i++)
7714	if (imask & (1UL << i))
7715	  {
7716	    emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7717	    reg_offset += 8;
7718	  }
7719
7720      for (i = 0; i < 31; i++)
7721	if (fmask & (1UL << i))
7722	  {
7723	    emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7724	    reg_offset += 8;
7725	  }
7726    }
7727  else if (TARGET_ABI_UNICOSMK && alpha_procedure_type == PT_STACK)
7728    {
7729      /* The standard frame on the T3E includes space for saving registers.
7730	 We just have to use it. We don't have to save the return address and
7731	 the old frame pointer here - they are saved in the DSIB.  */
7732
7733      reg_offset = -56;
7734      for (i = 9; i < 15; i++)
7735	if (imask & (1UL << i))
7736	  {
7737	    emit_frame_store (i, hard_frame_pointer_rtx, 0, reg_offset);
7738	    reg_offset -= 8;
7739	  }
7740      for (i = 2; i < 10; i++)
7741	if (fmask & (1UL << i))
7742	  {
7743	    emit_frame_store (i+32, hard_frame_pointer_rtx, 0, reg_offset);
7744	    reg_offset -= 8;
7745	  }
7746    }
7747
7748  if (TARGET_ABI_OPEN_VMS)
7749    {
7750      if (alpha_procedure_type == PT_REGISTER)
7751	/* Register frame procedures save the fp.
7752	   ?? Ought to have a dwarf2 save for this.  */
7753	emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7754			hard_frame_pointer_rtx);
7755
7756      if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7757	emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7758				    gen_rtx_REG (DImode, REG_PV)));
7759
7760      if (alpha_procedure_type != PT_NULL
7761	  && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7762	FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7763
7764      /* If we have to allocate space for outgoing args, do it now.  */
7765      if (current_function_outgoing_args_size != 0)
7766	{
7767	  rtx seq
7768	    = emit_move_insn (stack_pointer_rtx,
7769			      plus_constant
7770			      (hard_frame_pointer_rtx,
7771			       - (ALPHA_ROUND
7772				  (current_function_outgoing_args_size))));
7773
7774	  /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7775	     if ! frame_pointer_needed. Setting the bit will change the CFA
7776	     computation rule to use sp again, which would be wrong if we had
7777	     frame_pointer_needed, as this means sp might move unpredictably
7778	     later on.
7779
7780	     Also, note that
7781	       frame_pointer_needed
7782	       => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7783	     and
7784	       current_function_outgoing_args_size != 0
7785	       => alpha_procedure_type != PT_NULL,
7786
7787	     so when we are not setting the bit here, we are guaranteed to
7788	     have emitted an FRP frame pointer update just before.  */
7789	  RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7790	}
7791    }
7792  else if (!TARGET_ABI_UNICOSMK)
7793    {
7794      /* If we need a frame pointer, set it from the stack pointer.  */
7795      if (frame_pointer_needed)
7796	{
7797	  if (TARGET_CAN_FAULT_IN_PROLOGUE)
7798	    FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7799	  else
7800	    /* This must always be the last instruction in the
7801	       prologue, thus we emit a special move + clobber.  */
7802	      FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7803				           stack_pointer_rtx, sa_reg)));
7804	}
7805    }
7806
7807  /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7808     the prologue, for exception handling reasons, we cannot do this for
7809     any insn that might fault.  We could prevent this for mems with a
7810     (clobber:BLK (scratch)), but this doesn't work for fp insns.  So we
7811     have to prevent all such scheduling with a blockage.
7812
7813     Linux, on the other hand, never bothered to implement OSF/1's
7814     exception handling, and so doesn't care about such things.  Anyone
7815     planning to use dwarf2 frame-unwind info can also omit the blockage.  */
7816
7817  if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7818    emit_insn (gen_blockage ());
7819}
7820
7821/* Count the number of .file directives, so that .loc is up to date.  */
7822int num_source_filenames = 0;
7823
7824/* Output the textual info surrounding the prologue.  */
7825
7826void
7827alpha_start_function (FILE *file, const char *fnname,
7828		      tree decl ATTRIBUTE_UNUSED)
7829{
7830  unsigned long imask = 0;
7831  unsigned long fmask = 0;
7832  /* Stack space needed for pushing registers clobbered by us.  */
7833  HOST_WIDE_INT sa_size;
7834  /* Complete stack size needed.  */
7835  unsigned HOST_WIDE_INT frame_size;
7836  /* Offset from base reg to register save area.  */
7837  HOST_WIDE_INT reg_offset;
7838  char *entry_label = (char *) alloca (strlen (fnname) + 6);
7839  int i;
7840
7841  /* Don't emit an extern directive for functions defined in the same file.  */
7842  if (TARGET_ABI_UNICOSMK)
7843    {
7844      tree name_tree;
7845      name_tree = get_identifier (fnname);
7846      TREE_ASM_WRITTEN (name_tree) = 1;
7847    }
7848
7849  alpha_fnname = fnname;
7850  sa_size = alpha_sa_size ();
7851
7852  frame_size = get_frame_size ();
7853  if (TARGET_ABI_OPEN_VMS)
7854    frame_size = ALPHA_ROUND (sa_size
7855			      + (alpha_procedure_type == PT_STACK ? 8 : 0)
7856			      + frame_size
7857			      + current_function_pretend_args_size);
7858  else if (TARGET_ABI_UNICOSMK)
7859    frame_size = ALPHA_ROUND (sa_size
7860			      + (alpha_procedure_type == PT_STACK ? 48 : 0))
7861		 + ALPHA_ROUND (frame_size
7862			      + current_function_outgoing_args_size);
7863  else
7864    frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
7865		  + sa_size
7866		  + ALPHA_ROUND (frame_size
7867				 + current_function_pretend_args_size));
7868
7869  if (TARGET_ABI_OPEN_VMS)
7870    reg_offset = 8;
7871  else
7872    reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
7873
7874  alpha_sa_mask (&imask, &fmask);
7875
7876  /* Ecoff can handle multiple .file directives, so put out file and lineno.
7877     We have to do that before the .ent directive as we cannot switch
7878     files within procedures with native ecoff because line numbers are
7879     linked to procedure descriptors.
7880     Outputting the lineno helps debugging of one line functions as they
7881     would otherwise get no line number at all. Please note that we would
7882     like to put out last_linenum from final.c, but it is not accessible.  */
7883
7884  if (write_symbols == SDB_DEBUG)
7885    {
7886#ifdef ASM_OUTPUT_SOURCE_FILENAME
7887      ASM_OUTPUT_SOURCE_FILENAME (file,
7888				  DECL_SOURCE_FILE (current_function_decl));
7889#endif
7890#ifdef SDB_OUTPUT_SOURCE_LINE
7891      if (debug_info_level != DINFO_LEVEL_TERSE)
7892        SDB_OUTPUT_SOURCE_LINE (file,
7893				DECL_SOURCE_LINE (current_function_decl));
7894#endif
7895    }
7896
7897  /* Issue function start and label.  */
7898  if (TARGET_ABI_OPEN_VMS
7899      || (!TARGET_ABI_UNICOSMK && !flag_inhibit_size_directive))
7900    {
7901      fputs ("\t.ent ", file);
7902      assemble_name (file, fnname);
7903      putc ('\n', file);
7904
7905      /* If the function needs GP, we'll write the "..ng" label there.
7906	 Otherwise, do it here.  */
7907      if (TARGET_ABI_OSF
7908          && ! alpha_function_needs_gp
7909	  && ! current_function_is_thunk)
7910	{
7911	  putc ('$', file);
7912	  assemble_name (file, fnname);
7913	  fputs ("..ng:\n", file);
7914	}
7915    }
7916
7917  strcpy (entry_label, fnname);
7918  if (TARGET_ABI_OPEN_VMS)
7919    strcat (entry_label, "..en");
7920
7921  /* For public functions, the label must be globalized by appending an
7922     additional colon.  */
7923  if (TARGET_ABI_UNICOSMK && TREE_PUBLIC (decl))
7924    strcat (entry_label, ":");
7925
7926  ASM_OUTPUT_LABEL (file, entry_label);
7927  inside_function = TRUE;
7928
7929  if (TARGET_ABI_OPEN_VMS)
7930    fprintf (file, "\t.base $%d\n", vms_base_regno);
7931
7932  if (!TARGET_ABI_OPEN_VMS && !TARGET_ABI_UNICOSMK && TARGET_IEEE_CONFORMANT
7933      && !flag_inhibit_size_directive)
7934    {
7935      /* Set flags in procedure descriptor to request IEEE-conformant
7936	 math-library routines.  The value we set it to is PDSC_EXC_IEEE
7937	 (/usr/include/pdsc.h).  */
7938      fputs ("\t.eflag 48\n", file);
7939    }
7940
7941  /* Set up offsets to alpha virtual arg/local debugging pointer.  */
7942  alpha_auto_offset = -frame_size + current_function_pretend_args_size;
7943  alpha_arg_offset = -frame_size + 48;
7944
7945  /* Describe our frame.  If the frame size is larger than an integer,
7946     print it as zero to avoid an assembler error.  We won't be
7947     properly describing such a frame, but that's the best we can do.  */
7948  if (TARGET_ABI_UNICOSMK)
7949    ;
7950  else if (TARGET_ABI_OPEN_VMS)
7951    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
7952	     HOST_WIDE_INT_PRINT_DEC "\n",
7953	     vms_unwind_regno,
7954	     frame_size >= (1UL << 31) ? 0 : frame_size,
7955	     reg_offset);
7956  else if (!flag_inhibit_size_directive)
7957    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
7958	     (frame_pointer_needed
7959	      ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
7960	     frame_size >= (1UL << 31) ? 0 : frame_size,
7961	     current_function_pretend_args_size);
7962
7963  /* Describe which registers were spilled.  */
7964  if (TARGET_ABI_UNICOSMK)
7965    ;
7966  else if (TARGET_ABI_OPEN_VMS)
7967    {
7968      if (imask)
7969        /* ??? Does VMS care if mask contains ra?  The old code didn't
7970           set it, so I don't here.  */
7971	fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
7972      if (fmask)
7973	fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
7974      if (alpha_procedure_type == PT_REGISTER)
7975	fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
7976    }
7977  else if (!flag_inhibit_size_directive)
7978    {
7979      if (imask)
7980	{
7981	  fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
7982		   frame_size >= (1UL << 31) ? 0 : reg_offset - frame_size);
7983
7984	  for (i = 0; i < 32; ++i)
7985	    if (imask & (1UL << i))
7986	      reg_offset += 8;
7987	}
7988
7989      if (fmask)
7990	fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
7991		 frame_size >= (1UL << 31) ? 0 : reg_offset - frame_size);
7992    }
7993
7994#if TARGET_ABI_OPEN_VMS
7995  /* Ifdef'ed cause link_section are only available then.  */
7996  readonly_data_section ();
7997  fprintf (file, "\t.align 3\n");
7998  assemble_name (file, fnname); fputs ("..na:\n", file);
7999  fputs ("\t.ascii \"", file);
8000  assemble_name (file, fnname);
8001  fputs ("\\0\"\n", file);
8002  alpha_need_linkage (fnname, 1);
8003  text_section ();
8004#endif
8005}
8006
8007/* Emit the .prologue note at the scheduled end of the prologue.  */
8008
8009static void
8010alpha_output_function_end_prologue (FILE *file)
8011{
8012  if (TARGET_ABI_UNICOSMK)
8013    ;
8014  else if (TARGET_ABI_OPEN_VMS)
8015    fputs ("\t.prologue\n", file);
8016  else if (TARGET_ABI_WINDOWS_NT)
8017    fputs ("\t.prologue 0\n", file);
8018  else if (!flag_inhibit_size_directive)
8019    fprintf (file, "\t.prologue %d\n",
8020	     alpha_function_needs_gp || current_function_is_thunk);
8021}
8022
8023/* Write function epilogue.  */
8024
8025/* ??? At some point we will want to support full unwind, and so will
8026   need to mark the epilogue as well.  At the moment, we just confuse
8027   dwarf2out.  */
8028#undef FRP
8029#define FRP(exp) exp
8030
8031void
8032alpha_expand_epilogue (void)
8033{
8034  /* Registers to save.  */
8035  unsigned long imask = 0;
8036  unsigned long fmask = 0;
8037  /* Stack space needed for pushing registers clobbered by us.  */
8038  HOST_WIDE_INT sa_size;
8039  /* Complete stack size needed.  */
8040  HOST_WIDE_INT frame_size;
8041  /* Offset from base reg to register save area.  */
8042  HOST_WIDE_INT reg_offset;
8043  int fp_is_frame_pointer, fp_offset;
8044  rtx sa_reg, sa_reg_exp = NULL;
8045  rtx sp_adj1, sp_adj2, mem;
8046  rtx eh_ofs;
8047  int i;
8048
8049  sa_size = alpha_sa_size ();
8050
8051  frame_size = get_frame_size ();
8052  if (TARGET_ABI_OPEN_VMS)
8053    frame_size = ALPHA_ROUND (sa_size
8054			      + (alpha_procedure_type == PT_STACK ? 8 : 0)
8055			      + frame_size
8056			      + current_function_pretend_args_size);
8057  else if (TARGET_ABI_UNICOSMK)
8058    frame_size = ALPHA_ROUND (sa_size
8059			      + (alpha_procedure_type == PT_STACK ? 48 : 0))
8060		 + ALPHA_ROUND (frame_size
8061			      + current_function_outgoing_args_size);
8062  else
8063    frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
8064		  + sa_size
8065		  + ALPHA_ROUND (frame_size
8066				 + current_function_pretend_args_size));
8067
8068  if (TARGET_ABI_OPEN_VMS)
8069    {
8070       if (alpha_procedure_type == PT_STACK)
8071          reg_offset = 8;
8072       else
8073          reg_offset = 0;
8074    }
8075  else
8076    reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
8077
8078  alpha_sa_mask (&imask, &fmask);
8079
8080  fp_is_frame_pointer
8081    = ((TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
8082       || (!TARGET_ABI_OPEN_VMS && frame_pointer_needed));
8083  fp_offset = 0;
8084  sa_reg = stack_pointer_rtx;
8085
8086  if (current_function_calls_eh_return)
8087    eh_ofs = EH_RETURN_STACKADJ_RTX;
8088  else
8089    eh_ofs = NULL_RTX;
8090
8091  if (!TARGET_ABI_UNICOSMK && sa_size)
8092    {
8093      /* If we have a frame pointer, restore SP from it.  */
8094      if ((TARGET_ABI_OPEN_VMS
8095	   && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
8096	  || (!TARGET_ABI_OPEN_VMS && frame_pointer_needed))
8097	FRP (emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx));
8098
8099      /* Cope with very large offsets to the register save area.  */
8100      if (reg_offset + sa_size > 0x8000)
8101	{
8102	  int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8103	  HOST_WIDE_INT bias;
8104
8105	  if (low + sa_size <= 0x8000)
8106	    bias = reg_offset - low, reg_offset = low;
8107	  else
8108	    bias = reg_offset, reg_offset = 0;
8109
8110	  sa_reg = gen_rtx_REG (DImode, 22);
8111	  sa_reg_exp = plus_constant (stack_pointer_rtx, bias);
8112
8113	  FRP (emit_move_insn (sa_reg, sa_reg_exp));
8114	}
8115
8116      /* Restore registers in order, excepting a true frame pointer.  */
8117
8118      mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
8119      if (! eh_ofs)
8120        set_mem_alias_set (mem, alpha_sr_alias_set);
8121      FRP (emit_move_insn (gen_rtx_REG (DImode, REG_RA), mem));
8122
8123      reg_offset += 8;
8124      imask &= ~(1UL << REG_RA);
8125
8126      for (i = 0; i < 31; ++i)
8127	if (imask & (1UL << i))
8128	  {
8129	    if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8130	      fp_offset = reg_offset;
8131	    else
8132	      {
8133		mem = gen_rtx_MEM (DImode, plus_constant(sa_reg, reg_offset));
8134		set_mem_alias_set (mem, alpha_sr_alias_set);
8135		FRP (emit_move_insn (gen_rtx_REG (DImode, i), mem));
8136	      }
8137	    reg_offset += 8;
8138	  }
8139
8140      for (i = 0; i < 31; ++i)
8141	if (fmask & (1UL << i))
8142	  {
8143	    mem = gen_rtx_MEM (DFmode, plus_constant(sa_reg, reg_offset));
8144	    set_mem_alias_set (mem, alpha_sr_alias_set);
8145	    FRP (emit_move_insn (gen_rtx_REG (DFmode, i+32), mem));
8146	    reg_offset += 8;
8147	  }
8148    }
8149  else if (TARGET_ABI_UNICOSMK && alpha_procedure_type == PT_STACK)
8150    {
8151      /* Restore callee-saved general-purpose registers.  */
8152
8153      reg_offset = -56;
8154
8155      for (i = 9; i < 15; i++)
8156	if (imask & (1UL << i))
8157	  {
8158	    mem = gen_rtx_MEM (DImode, plus_constant(hard_frame_pointer_rtx,
8159						     reg_offset));
8160	    set_mem_alias_set (mem, alpha_sr_alias_set);
8161	    FRP (emit_move_insn (gen_rtx_REG (DImode, i), mem));
8162	    reg_offset -= 8;
8163	  }
8164
8165      for (i = 2; i < 10; i++)
8166	if (fmask & (1UL << i))
8167	  {
8168	    mem = gen_rtx_MEM (DFmode, plus_constant(hard_frame_pointer_rtx,
8169						     reg_offset));
8170	    set_mem_alias_set (mem, alpha_sr_alias_set);
8171	    FRP (emit_move_insn (gen_rtx_REG (DFmode, i+32), mem));
8172	    reg_offset -= 8;
8173	  }
8174
8175      /* Restore the return address from the DSIB.  */
8176
8177      mem = gen_rtx_MEM (DImode, plus_constant(hard_frame_pointer_rtx, -8));
8178      set_mem_alias_set (mem, alpha_sr_alias_set);
8179      FRP (emit_move_insn (gen_rtx_REG (DImode, REG_RA), mem));
8180    }
8181
8182  if (frame_size || eh_ofs)
8183    {
8184      sp_adj1 = stack_pointer_rtx;
8185
8186      if (eh_ofs)
8187	{
8188	  sp_adj1 = gen_rtx_REG (DImode, 23);
8189	  emit_move_insn (sp_adj1,
8190			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8191	}
8192
8193      /* If the stack size is large, begin computation into a temporary
8194	 register so as not to interfere with a potential fp restore,
8195	 which must be consecutive with an SP restore.  */
8196      if (frame_size < 32768
8197	  && ! (TARGET_ABI_UNICOSMK && current_function_calls_alloca))
8198	sp_adj2 = GEN_INT (frame_size);
8199      else if (TARGET_ABI_UNICOSMK)
8200	{
8201	  sp_adj1 = gen_rtx_REG (DImode, 23);
8202	  FRP (emit_move_insn (sp_adj1, hard_frame_pointer_rtx));
8203	  sp_adj2 = const0_rtx;
8204	}
8205      else if (frame_size < 0x40007fffL)
8206	{
8207	  int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8208
8209	  sp_adj2 = plus_constant (sp_adj1, frame_size - low);
8210	  if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8211	    sp_adj1 = sa_reg;
8212	  else
8213	    {
8214	      sp_adj1 = gen_rtx_REG (DImode, 23);
8215	      FRP (emit_move_insn (sp_adj1, sp_adj2));
8216	    }
8217	  sp_adj2 = GEN_INT (low);
8218	}
8219      else
8220	{
8221	  rtx tmp = gen_rtx_REG (DImode, 23);
8222	  FRP (sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size,
8223					       3, false));
8224	  if (!sp_adj2)
8225	    {
8226	      /* We can't drop new things to memory this late, afaik,
8227		 so build it up by pieces.  */
8228	      FRP (sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
8229							-(frame_size < 0)));
8230	      gcc_assert (sp_adj2);
8231	    }
8232	}
8233
8234      /* From now on, things must be in order.  So emit blockages.  */
8235
8236      /* Restore the frame pointer.  */
8237      if (TARGET_ABI_UNICOSMK)
8238	{
8239	  emit_insn (gen_blockage ());
8240	  mem = gen_rtx_MEM (DImode,
8241			     plus_constant (hard_frame_pointer_rtx, -16));
8242	  set_mem_alias_set (mem, alpha_sr_alias_set);
8243	  FRP (emit_move_insn (hard_frame_pointer_rtx, mem));
8244	}
8245      else if (fp_is_frame_pointer)
8246	{
8247	  emit_insn (gen_blockage ());
8248	  mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, fp_offset));
8249	  set_mem_alias_set (mem, alpha_sr_alias_set);
8250	  FRP (emit_move_insn (hard_frame_pointer_rtx, mem));
8251	}
8252      else if (TARGET_ABI_OPEN_VMS)
8253	{
8254	  emit_insn (gen_blockage ());
8255	  FRP (emit_move_insn (hard_frame_pointer_rtx,
8256			       gen_rtx_REG (DImode, vms_save_fp_regno)));
8257	}
8258
8259      /* Restore the stack pointer.  */
8260      emit_insn (gen_blockage ());
8261      if (sp_adj2 == const0_rtx)
8262	FRP (emit_move_insn (stack_pointer_rtx, sp_adj1));
8263      else
8264	FRP (emit_move_insn (stack_pointer_rtx,
8265			     gen_rtx_PLUS (DImode, sp_adj1, sp_adj2)));
8266    }
8267  else
8268    {
8269      if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8270        {
8271          emit_insn (gen_blockage ());
8272          FRP (emit_move_insn (hard_frame_pointer_rtx,
8273			       gen_rtx_REG (DImode, vms_save_fp_regno)));
8274        }
8275      else if (TARGET_ABI_UNICOSMK && alpha_procedure_type != PT_STACK)
8276	{
8277	  /* Decrement the frame pointer if the function does not have a
8278	     frame.  */
8279
8280	  emit_insn (gen_blockage ());
8281	  FRP (emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
8282				      hard_frame_pointer_rtx, constm1_rtx)));
8283        }
8284    }
8285}
8286
8287/* Output the rest of the textual info surrounding the epilogue.  */
8288
8289void
8290alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8291{
8292#if TARGET_ABI_OPEN_VMS
8293  alpha_write_linkage (file, fnname, decl);
8294#endif
8295
8296  /* End the function.  */
8297  if (!TARGET_ABI_UNICOSMK && !flag_inhibit_size_directive)
8298    {
8299      fputs ("\t.end ", file);
8300      assemble_name (file, fnname);
8301      putc ('\n', file);
8302    }
8303  inside_function = FALSE;
8304
8305  /* Output jump tables and the static subroutine information block.  */
8306  if (TARGET_ABI_UNICOSMK)
8307    {
8308      unicosmk_output_ssib (file, fnname);
8309      unicosmk_output_deferred_case_vectors (file);
8310    }
8311}
8312
8313#if TARGET_ABI_OSF
8314/* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8315
8316   In order to avoid the hordes of differences between generated code
8317   with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8318   lots of code loading up large constants, generate rtl and emit it
8319   instead of going straight to text.
8320
8321   Not sure why this idea hasn't been explored before...  */
8322
8323static void
8324alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8325			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8326			   tree function)
8327{
8328  HOST_WIDE_INT hi, lo;
8329  rtx this, insn, funexp;
8330
8331  reset_block_changes ();
8332
8333  /* We always require a valid GP.  */
8334  emit_insn (gen_prologue_ldgp ());
8335  emit_note (NOTE_INSN_PROLOGUE_END);
8336
8337  /* Find the "this" pointer.  If the function returns a structure,
8338     the structure return pointer is in $16.  */
8339  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8340    this = gen_rtx_REG (Pmode, 17);
8341  else
8342    this = gen_rtx_REG (Pmode, 16);
8343
8344  /* Add DELTA.  When possible we use ldah+lda.  Otherwise load the
8345     entire constant for the add.  */
8346  lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8347  hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8348  if (hi + lo == delta)
8349    {
8350      if (hi)
8351	emit_insn (gen_adddi3 (this, this, GEN_INT (hi)));
8352      if (lo)
8353	emit_insn (gen_adddi3 (this, this, GEN_INT (lo)));
8354    }
8355  else
8356    {
8357      rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0),
8358					   delta, -(delta < 0));
8359      emit_insn (gen_adddi3 (this, this, tmp));
8360    }
8361
8362  /* Add a delta stored in the vtable at VCALL_OFFSET.  */
8363  if (vcall_offset)
8364    {
8365      rtx tmp, tmp2;
8366
8367      tmp = gen_rtx_REG (Pmode, 0);
8368      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8369
8370      lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8371      hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8372      if (hi + lo == vcall_offset)
8373	{
8374	  if (hi)
8375	    emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8376	}
8377      else
8378	{
8379	  tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8380					    vcall_offset, -(vcall_offset < 0));
8381          emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8382	  lo = 0;
8383	}
8384      if (lo)
8385	tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8386      else
8387	tmp2 = tmp;
8388      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8389
8390      emit_insn (gen_adddi3 (this, this, tmp));
8391    }
8392
8393  /* Generate a tail call to the target function.  */
8394  if (! TREE_USED (function))
8395    {
8396      assemble_external (function);
8397      TREE_USED (function) = 1;
8398    }
8399  funexp = XEXP (DECL_RTL (function), 0);
8400  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8401  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8402  SIBLING_CALL_P (insn) = 1;
8403
8404  /* Run just enough of rest_of_compilation to get the insns emitted.
8405     There's not really enough bulk here to make other passes such as
8406     instruction scheduling worth while.  Note that use_thunk calls
8407     assemble_start_function and assemble_end_function.  */
8408  insn = get_insns ();
8409  insn_locators_initialize ();
8410  shorten_branches (insn);
8411  final_start_function (insn, file, 1);
8412  final (insn, file, 1);
8413  final_end_function ();
8414}
8415#endif /* TARGET_ABI_OSF */
8416
8417/* Debugging support.  */
8418
8419#include "gstab.h"
8420
8421/* Count the number of sdb related labels are generated (to find block
8422   start and end boundaries).  */
8423
8424int sdb_label_count = 0;
8425
8426/* Name of the file containing the current function.  */
8427
8428static const char *current_function_file = "";
8429
8430/* Offsets to alpha virtual arg/local debugging pointers.  */
8431
8432long alpha_arg_offset;
8433long alpha_auto_offset;
8434
8435/* Emit a new filename to a stream.  */
8436
8437void
8438alpha_output_filename (FILE *stream, const char *name)
8439{
8440  static int first_time = TRUE;
8441
8442  if (first_time)
8443    {
8444      first_time = FALSE;
8445      ++num_source_filenames;
8446      current_function_file = name;
8447      fprintf (stream, "\t.file\t%d ", num_source_filenames);
8448      output_quoted_string (stream, name);
8449      fprintf (stream, "\n");
8450      if (!TARGET_GAS && write_symbols == DBX_DEBUG)
8451	fprintf (stream, "\t#@stabs\n");
8452    }
8453
8454  else if (write_symbols == DBX_DEBUG)
8455    /* dbxout.c will emit an appropriate .stabs directive.  */
8456    return;
8457
8458  else if (name != current_function_file
8459	   && strcmp (name, current_function_file) != 0)
8460    {
8461      if (inside_function && ! TARGET_GAS)
8462	fprintf (stream, "\t#.file\t%d ", num_source_filenames);
8463      else
8464	{
8465	  ++num_source_filenames;
8466	  current_function_file = name;
8467	  fprintf (stream, "\t.file\t%d ", num_source_filenames);
8468	}
8469
8470      output_quoted_string (stream, name);
8471      fprintf (stream, "\n");
8472    }
8473}
8474
8475/* Structure to show the current status of registers and memory.  */
8476
8477struct shadow_summary
8478{
8479  struct {
8480    unsigned int i     : 31;	/* Mask of int regs */
8481    unsigned int fp    : 31;	/* Mask of fp regs */
8482    unsigned int mem   :  1;	/* mem == imem | fpmem */
8483  } used, defd;
8484};
8485
8486/* Summary the effects of expression X on the machine.  Update SUM, a pointer
8487   to the summary structure.  SET is nonzero if the insn is setting the
8488   object, otherwise zero.  */
8489
8490static void
8491summarize_insn (rtx x, struct shadow_summary *sum, int set)
8492{
8493  const char *format_ptr;
8494  int i, j;
8495
8496  if (x == 0)
8497    return;
8498
8499  switch (GET_CODE (x))
8500    {
8501      /* ??? Note that this case would be incorrect if the Alpha had a
8502	 ZERO_EXTRACT in SET_DEST.  */
8503    case SET:
8504      summarize_insn (SET_SRC (x), sum, 0);
8505      summarize_insn (SET_DEST (x), sum, 1);
8506      break;
8507
8508    case CLOBBER:
8509      summarize_insn (XEXP (x, 0), sum, 1);
8510      break;
8511
8512    case USE:
8513      summarize_insn (XEXP (x, 0), sum, 0);
8514      break;
8515
8516    case ASM_OPERANDS:
8517      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8518	summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8519      break;
8520
8521    case PARALLEL:
8522      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8523	summarize_insn (XVECEXP (x, 0, i), sum, 0);
8524      break;
8525
8526    case SUBREG:
8527      summarize_insn (SUBREG_REG (x), sum, 0);
8528      break;
8529
8530    case REG:
8531      {
8532	int regno = REGNO (x);
8533	unsigned long mask = ((unsigned long) 1) << (regno % 32);
8534
8535	if (regno == 31 || regno == 63)
8536	  break;
8537
8538	if (set)
8539	  {
8540	    if (regno < 32)
8541	      sum->defd.i |= mask;
8542	    else
8543	      sum->defd.fp |= mask;
8544	  }
8545	else
8546	  {
8547	    if (regno < 32)
8548	      sum->used.i  |= mask;
8549	    else
8550	      sum->used.fp |= mask;
8551	  }
8552	}
8553      break;
8554
8555    case MEM:
8556      if (set)
8557	sum->defd.mem = 1;
8558      else
8559	sum->used.mem = 1;
8560
8561      /* Find the regs used in memory address computation: */
8562      summarize_insn (XEXP (x, 0), sum, 0);
8563      break;
8564
8565    case CONST_INT:   case CONST_DOUBLE:
8566    case SYMBOL_REF:  case LABEL_REF:     case CONST:
8567    case SCRATCH:     case ASM_INPUT:
8568      break;
8569
8570      /* Handle common unary and binary ops for efficiency.  */
8571    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
8572    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
8573    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
8574    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
8575    case NE:       case EQ:      case GE:      case GT:        case LE:
8576    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
8577      summarize_insn (XEXP (x, 0), sum, 0);
8578      summarize_insn (XEXP (x, 1), sum, 0);
8579      break;
8580
8581    case NEG:  case NOT:  case SIGN_EXTEND:  case ZERO_EXTEND:
8582    case TRUNCATE:  case FLOAT_EXTEND:  case FLOAT_TRUNCATE:  case FLOAT:
8583    case FIX:  case UNSIGNED_FLOAT:  case UNSIGNED_FIX:  case ABS:
8584    case SQRT:  case FFS:
8585      summarize_insn (XEXP (x, 0), sum, 0);
8586      break;
8587
8588    default:
8589      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8590      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8591	switch (format_ptr[i])
8592	  {
8593	  case 'e':
8594	    summarize_insn (XEXP (x, i), sum, 0);
8595	    break;
8596
8597	  case 'E':
8598	    for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8599	      summarize_insn (XVECEXP (x, i, j), sum, 0);
8600	    break;
8601
8602	  case 'i':
8603	    break;
8604
8605	  default:
8606	    gcc_unreachable ();
8607	  }
8608    }
8609}
8610
8611/* Ensure a sufficient number of `trapb' insns are in the code when
8612   the user requests code with a trap precision of functions or
8613   instructions.
8614
8615   In naive mode, when the user requests a trap-precision of
8616   "instruction", a trapb is needed after every instruction that may
8617   generate a trap.  This ensures that the code is resumption safe but
8618   it is also slow.
8619
8620   When optimizations are turned on, we delay issuing a trapb as long
8621   as possible.  In this context, a trap shadow is the sequence of
8622   instructions that starts with a (potentially) trap generating
8623   instruction and extends to the next trapb or call_pal instruction
8624   (but GCC never generates call_pal by itself).  We can delay (and
8625   therefore sometimes omit) a trapb subject to the following
8626   conditions:
8627
8628   (a) On entry to the trap shadow, if any Alpha register or memory
8629   location contains a value that is used as an operand value by some
8630   instruction in the trap shadow (live on entry), then no instruction
8631   in the trap shadow may modify the register or memory location.
8632
8633   (b) Within the trap shadow, the computation of the base register
8634   for a memory load or store instruction may not involve using the
8635   result of an instruction that might generate an UNPREDICTABLE
8636   result.
8637
8638   (c) Within the trap shadow, no register may be used more than once
8639   as a destination register.  (This is to make life easier for the
8640   trap-handler.)
8641
8642   (d) The trap shadow may not include any branch instructions.  */
8643
8644static void
8645alpha_handle_trap_shadows (void)
8646{
8647  struct shadow_summary shadow;
8648  int trap_pending, exception_nesting;
8649  rtx i, n;
8650
8651  trap_pending = 0;
8652  exception_nesting = 0;
8653  shadow.used.i = 0;
8654  shadow.used.fp = 0;
8655  shadow.used.mem = 0;
8656  shadow.defd = shadow.used;
8657
8658  for (i = get_insns (); i ; i = NEXT_INSN (i))
8659    {
8660      if (GET_CODE (i) == NOTE)
8661	{
8662	  switch (NOTE_LINE_NUMBER (i))
8663	    {
8664	    case NOTE_INSN_EH_REGION_BEG:
8665	      exception_nesting++;
8666	      if (trap_pending)
8667		goto close_shadow;
8668	      break;
8669
8670	    case NOTE_INSN_EH_REGION_END:
8671	      exception_nesting--;
8672	      if (trap_pending)
8673		goto close_shadow;
8674	      break;
8675
8676	    case NOTE_INSN_EPILOGUE_BEG:
8677	      if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8678		goto close_shadow;
8679	      break;
8680	    }
8681	}
8682      else if (trap_pending)
8683	{
8684	  if (alpha_tp == ALPHA_TP_FUNC)
8685	    {
8686	      if (GET_CODE (i) == JUMP_INSN
8687		  && GET_CODE (PATTERN (i)) == RETURN)
8688		goto close_shadow;
8689	    }
8690	  else if (alpha_tp == ALPHA_TP_INSN)
8691	    {
8692	      if (optimize > 0)
8693		{
8694		  struct shadow_summary sum;
8695
8696		  sum.used.i = 0;
8697		  sum.used.fp = 0;
8698		  sum.used.mem = 0;
8699		  sum.defd = sum.used;
8700
8701		  switch (GET_CODE (i))
8702		    {
8703		    case INSN:
8704		      /* Annoyingly, get_attr_trap will die on these.  */
8705		      if (GET_CODE (PATTERN (i)) == USE
8706			  || GET_CODE (PATTERN (i)) == CLOBBER)
8707			break;
8708
8709		      summarize_insn (PATTERN (i), &sum, 0);
8710
8711		      if ((sum.defd.i & shadow.defd.i)
8712			  || (sum.defd.fp & shadow.defd.fp))
8713			{
8714			  /* (c) would be violated */
8715			  goto close_shadow;
8716			}
8717
8718		      /* Combine shadow with summary of current insn: */
8719		      shadow.used.i   |= sum.used.i;
8720		      shadow.used.fp  |= sum.used.fp;
8721		      shadow.used.mem |= sum.used.mem;
8722		      shadow.defd.i   |= sum.defd.i;
8723		      shadow.defd.fp  |= sum.defd.fp;
8724		      shadow.defd.mem |= sum.defd.mem;
8725
8726		      if ((sum.defd.i & shadow.used.i)
8727			  || (sum.defd.fp & shadow.used.fp)
8728			  || (sum.defd.mem & shadow.used.mem))
8729			{
8730			  /* (a) would be violated (also takes care of (b))  */
8731			  gcc_assert (get_attr_trap (i) != TRAP_YES
8732				      || (!(sum.defd.i & sum.used.i)
8733					  && !(sum.defd.fp & sum.used.fp)));
8734
8735			  goto close_shadow;
8736			}
8737		      break;
8738
8739		    case JUMP_INSN:
8740		    case CALL_INSN:
8741		    case CODE_LABEL:
8742		      goto close_shadow;
8743
8744		    default:
8745		      gcc_unreachable ();
8746		    }
8747		}
8748	      else
8749		{
8750		close_shadow:
8751		  n = emit_insn_before (gen_trapb (), i);
8752		  PUT_MODE (n, TImode);
8753		  PUT_MODE (i, TImode);
8754		  trap_pending = 0;
8755		  shadow.used.i = 0;
8756		  shadow.used.fp = 0;
8757		  shadow.used.mem = 0;
8758		  shadow.defd = shadow.used;
8759		}
8760	    }
8761	}
8762
8763      if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8764	  && GET_CODE (i) == INSN
8765	  && GET_CODE (PATTERN (i)) != USE
8766	  && GET_CODE (PATTERN (i)) != CLOBBER
8767	  && get_attr_trap (i) == TRAP_YES)
8768	{
8769	  if (optimize && !trap_pending)
8770	    summarize_insn (PATTERN (i), &shadow, 0);
8771	  trap_pending = 1;
8772	}
8773    }
8774}
8775
8776/* Alpha can only issue instruction groups simultaneously if they are
8777   suitably aligned.  This is very processor-specific.  */
8778/* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8779   that are marked "fake".  These instructions do not exist on that target,
8780   but it is possible to see these insns with deranged combinations of
8781   command-line options, such as "-mtune=ev4 -mmax".  Instead of aborting,
8782   choose a result at random.  */
8783
8784enum alphaev4_pipe {
8785  EV4_STOP = 0,
8786  EV4_IB0 = 1,
8787  EV4_IB1 = 2,
8788  EV4_IBX = 4
8789};
8790
8791enum alphaev5_pipe {
8792  EV5_STOP = 0,
8793  EV5_NONE = 1,
8794  EV5_E01 = 2,
8795  EV5_E0 = 4,
8796  EV5_E1 = 8,
8797  EV5_FAM = 16,
8798  EV5_FA = 32,
8799  EV5_FM = 64
8800};
8801
8802static enum alphaev4_pipe
8803alphaev4_insn_pipe (rtx insn)
8804{
8805  if (recog_memoized (insn) < 0)
8806    return EV4_STOP;
8807  if (get_attr_length (insn) != 4)
8808    return EV4_STOP;
8809
8810  switch (get_attr_type (insn))
8811    {
8812    case TYPE_ILD:
8813    case TYPE_LDSYM:
8814    case TYPE_FLD:
8815    case TYPE_LD_L:
8816      return EV4_IBX;
8817
8818    case TYPE_IADD:
8819    case TYPE_ILOG:
8820    case TYPE_ICMOV:
8821    case TYPE_ICMP:
8822    case TYPE_FST:
8823    case TYPE_SHIFT:
8824    case TYPE_IMUL:
8825    case TYPE_FBR:
8826    case TYPE_MVI:		/* fake */
8827      return EV4_IB0;
8828
8829    case TYPE_IST:
8830    case TYPE_MISC:
8831    case TYPE_IBR:
8832    case TYPE_JSR:
8833    case TYPE_CALLPAL:
8834    case TYPE_FCPYS:
8835    case TYPE_FCMOV:
8836    case TYPE_FADD:
8837    case TYPE_FDIV:
8838    case TYPE_FMUL:
8839    case TYPE_ST_C:
8840    case TYPE_MB:
8841    case TYPE_FSQRT:		/* fake */
8842    case TYPE_FTOI:		/* fake */
8843    case TYPE_ITOF:		/* fake */
8844      return EV4_IB1;
8845
8846    default:
8847      gcc_unreachable ();
8848    }
8849}
8850
8851static enum alphaev5_pipe
8852alphaev5_insn_pipe (rtx insn)
8853{
8854  if (recog_memoized (insn) < 0)
8855    return EV5_STOP;
8856  if (get_attr_length (insn) != 4)
8857    return EV5_STOP;
8858
8859  switch (get_attr_type (insn))
8860    {
8861    case TYPE_ILD:
8862    case TYPE_FLD:
8863    case TYPE_LDSYM:
8864    case TYPE_IADD:
8865    case TYPE_ILOG:
8866    case TYPE_ICMOV:
8867    case TYPE_ICMP:
8868      return EV5_E01;
8869
8870    case TYPE_IST:
8871    case TYPE_FST:
8872    case TYPE_SHIFT:
8873    case TYPE_IMUL:
8874    case TYPE_MISC:
8875    case TYPE_MVI:
8876    case TYPE_LD_L:
8877    case TYPE_ST_C:
8878    case TYPE_MB:
8879    case TYPE_FTOI:		/* fake */
8880    case TYPE_ITOF:		/* fake */
8881      return EV5_E0;
8882
8883    case TYPE_IBR:
8884    case TYPE_JSR:
8885    case TYPE_CALLPAL:
8886      return EV5_E1;
8887
8888    case TYPE_FCPYS:
8889      return EV5_FAM;
8890
8891    case TYPE_FBR:
8892    case TYPE_FCMOV:
8893    case TYPE_FADD:
8894    case TYPE_FDIV:
8895    case TYPE_FSQRT:		/* fake */
8896      return EV5_FA;
8897
8898    case TYPE_FMUL:
8899      return EV5_FM;
8900
8901    default:
8902      gcc_unreachable ();
8903    }
8904}
8905
8906/* IN_USE is a mask of the slots currently filled within the insn group.
8907   The mask bits come from alphaev4_pipe above.  If EV4_IBX is set, then
8908   the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8909
8910   LEN is, of course, the length of the group in bytes.  */
8911
8912static rtx
8913alphaev4_next_group (rtx insn, int *pin_use, int *plen)
8914{
8915  int len, in_use;
8916
8917  len = in_use = 0;
8918
8919  if (! INSN_P (insn)
8920      || GET_CODE (PATTERN (insn)) == CLOBBER
8921      || GET_CODE (PATTERN (insn)) == USE)
8922    goto next_and_done;
8923
8924  while (1)
8925    {
8926      enum alphaev4_pipe pipe;
8927
8928      pipe = alphaev4_insn_pipe (insn);
8929      switch (pipe)
8930	{
8931	case EV4_STOP:
8932	  /* Force complex instructions to start new groups.  */
8933	  if (in_use)
8934	    goto done;
8935
8936	  /* If this is a completely unrecognized insn, it's an asm.
8937	     We don't know how long it is, so record length as -1 to
8938	     signal a needed realignment.  */
8939	  if (recog_memoized (insn) < 0)
8940	    len = -1;
8941	  else
8942	    len = get_attr_length (insn);
8943	  goto next_and_done;
8944
8945	case EV4_IBX:
8946	  if (in_use & EV4_IB0)
8947	    {
8948	      if (in_use & EV4_IB1)
8949		goto done;
8950	      in_use |= EV4_IB1;
8951	    }
8952	  else
8953	    in_use |= EV4_IB0 | EV4_IBX;
8954	  break;
8955
8956	case EV4_IB0:
8957	  if (in_use & EV4_IB0)
8958	    {
8959	      if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
8960		goto done;
8961	      in_use |= EV4_IB1;
8962	    }
8963	  in_use |= EV4_IB0;
8964	  break;
8965
8966	case EV4_IB1:
8967	  if (in_use & EV4_IB1)
8968	    goto done;
8969	  in_use |= EV4_IB1;
8970	  break;
8971
8972	default:
8973	  gcc_unreachable ();
8974	}
8975      len += 4;
8976
8977      /* Haifa doesn't do well scheduling branches.  */
8978      if (GET_CODE (insn) == JUMP_INSN)
8979	goto next_and_done;
8980
8981    next:
8982      insn = next_nonnote_insn (insn);
8983
8984      if (!insn || ! INSN_P (insn))
8985	goto done;
8986
8987      /* Let Haifa tell us where it thinks insn group boundaries are.  */
8988      if (GET_MODE (insn) == TImode)
8989	goto done;
8990
8991      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
8992	goto next;
8993    }
8994
8995 next_and_done:
8996  insn = next_nonnote_insn (insn);
8997
8998 done:
8999  *plen = len;
9000  *pin_use = in_use;
9001  return insn;
9002}
9003
9004/* IN_USE is a mask of the slots currently filled within the insn group.
9005   The mask bits come from alphaev5_pipe above.  If EV5_E01 is set, then
9006   the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9007
9008   LEN is, of course, the length of the group in bytes.  */
9009
9010static rtx
9011alphaev5_next_group (rtx insn, int *pin_use, int *plen)
9012{
9013  int len, in_use;
9014
9015  len = in_use = 0;
9016
9017  if (! INSN_P (insn)
9018      || GET_CODE (PATTERN (insn)) == CLOBBER
9019      || GET_CODE (PATTERN (insn)) == USE)
9020    goto next_and_done;
9021
9022  while (1)
9023    {
9024      enum alphaev5_pipe pipe;
9025
9026      pipe = alphaev5_insn_pipe (insn);
9027      switch (pipe)
9028	{
9029	case EV5_STOP:
9030	  /* Force complex instructions to start new groups.  */
9031	  if (in_use)
9032	    goto done;
9033
9034	  /* If this is a completely unrecognized insn, it's an asm.
9035	     We don't know how long it is, so record length as -1 to
9036	     signal a needed realignment.  */
9037	  if (recog_memoized (insn) < 0)
9038	    len = -1;
9039	  else
9040	    len = get_attr_length (insn);
9041	  goto next_and_done;
9042
9043	/* ??? Most of the places below, we would like to assert never
9044	   happen, as it would indicate an error either in Haifa, or
9045	   in the scheduling description.  Unfortunately, Haifa never
9046	   schedules the last instruction of the BB, so we don't have
9047	   an accurate TI bit to go off.  */
9048	case EV5_E01:
9049	  if (in_use & EV5_E0)
9050	    {
9051	      if (in_use & EV5_E1)
9052		goto done;
9053	      in_use |= EV5_E1;
9054	    }
9055	  else
9056	    in_use |= EV5_E0 | EV5_E01;
9057	  break;
9058
9059	case EV5_E0:
9060	  if (in_use & EV5_E0)
9061	    {
9062	      if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9063		goto done;
9064	      in_use |= EV5_E1;
9065	    }
9066	  in_use |= EV5_E0;
9067	  break;
9068
9069	case EV5_E1:
9070	  if (in_use & EV5_E1)
9071	    goto done;
9072	  in_use |= EV5_E1;
9073	  break;
9074
9075	case EV5_FAM:
9076	  if (in_use & EV5_FA)
9077	    {
9078	      if (in_use & EV5_FM)
9079		goto done;
9080	      in_use |= EV5_FM;
9081	    }
9082	  else
9083	    in_use |= EV5_FA | EV5_FAM;
9084	  break;
9085
9086	case EV5_FA:
9087	  if (in_use & EV5_FA)
9088	    goto done;
9089	  in_use |= EV5_FA;
9090	  break;
9091
9092	case EV5_FM:
9093	  if (in_use & EV5_FM)
9094	    goto done;
9095	  in_use |= EV5_FM;
9096	  break;
9097
9098	case EV5_NONE:
9099	  break;
9100
9101	default:
9102	  gcc_unreachable ();
9103	}
9104      len += 4;
9105
9106      /* Haifa doesn't do well scheduling branches.  */
9107      /* ??? If this is predicted not-taken, slotting continues, except
9108	 that no more IBR, FBR, or JSR insns may be slotted.  */
9109      if (GET_CODE (insn) == JUMP_INSN)
9110	goto next_and_done;
9111
9112    next:
9113      insn = next_nonnote_insn (insn);
9114
9115      if (!insn || ! INSN_P (insn))
9116	goto done;
9117
9118      /* Let Haifa tell us where it thinks insn group boundaries are.  */
9119      if (GET_MODE (insn) == TImode)
9120	goto done;
9121
9122      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9123	goto next;
9124    }
9125
9126 next_and_done:
9127  insn = next_nonnote_insn (insn);
9128
9129 done:
9130  *plen = len;
9131  *pin_use = in_use;
9132  return insn;
9133}
9134
9135static rtx
9136alphaev4_next_nop (int *pin_use)
9137{
9138  int in_use = *pin_use;
9139  rtx nop;
9140
9141  if (!(in_use & EV4_IB0))
9142    {
9143      in_use |= EV4_IB0;
9144      nop = gen_nop ();
9145    }
9146  else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9147    {
9148      in_use |= EV4_IB1;
9149      nop = gen_nop ();
9150    }
9151  else if (TARGET_FP && !(in_use & EV4_IB1))
9152    {
9153      in_use |= EV4_IB1;
9154      nop = gen_fnop ();
9155    }
9156  else
9157    nop = gen_unop ();
9158
9159  *pin_use = in_use;
9160  return nop;
9161}
9162
9163static rtx
9164alphaev5_next_nop (int *pin_use)
9165{
9166  int in_use = *pin_use;
9167  rtx nop;
9168
9169  if (!(in_use & EV5_E1))
9170    {
9171      in_use |= EV5_E1;
9172      nop = gen_nop ();
9173    }
9174  else if (TARGET_FP && !(in_use & EV5_FA))
9175    {
9176      in_use |= EV5_FA;
9177      nop = gen_fnop ();
9178    }
9179  else if (TARGET_FP && !(in_use & EV5_FM))
9180    {
9181      in_use |= EV5_FM;
9182      nop = gen_fnop ();
9183    }
9184  else
9185    nop = gen_unop ();
9186
9187  *pin_use = in_use;
9188  return nop;
9189}
9190
9191/* The instruction group alignment main loop.  */
9192
9193static void
9194alpha_align_insns (unsigned int max_align,
9195		   rtx (*next_group) (rtx, int *, int *),
9196		   rtx (*next_nop) (int *))
9197{
9198  /* ALIGN is the known alignment for the insn group.  */
9199  unsigned int align;
9200  /* OFS is the offset of the current insn in the insn group.  */
9201  int ofs;
9202  int prev_in_use, in_use, len, ldgp;
9203  rtx i, next;
9204
9205  /* Let shorten branches care for assigning alignments to code labels.  */
9206  shorten_branches (get_insns ());
9207
9208  if (align_functions < 4)
9209    align = 4;
9210  else if ((unsigned int) align_functions < max_align)
9211    align = align_functions;
9212  else
9213    align = max_align;
9214
9215  ofs = prev_in_use = 0;
9216  i = get_insns ();
9217  if (GET_CODE (i) == NOTE)
9218    i = next_nonnote_insn (i);
9219
9220  ldgp = alpha_function_needs_gp ? 8 : 0;
9221
9222  while (i)
9223    {
9224      next = (*next_group) (i, &in_use, &len);
9225
9226      /* When we see a label, resync alignment etc.  */
9227      if (GET_CODE (i) == CODE_LABEL)
9228	{
9229	  unsigned int new_align = 1 << label_to_alignment (i);
9230
9231	  if (new_align >= align)
9232	    {
9233	      align = new_align < max_align ? new_align : max_align;
9234	      ofs = 0;
9235	    }
9236
9237	  else if (ofs & (new_align-1))
9238	    ofs = (ofs | (new_align-1)) + 1;
9239	  gcc_assert (!len);
9240	}
9241
9242      /* Handle complex instructions special.  */
9243      else if (in_use == 0)
9244	{
9245	  /* Asms will have length < 0.  This is a signal that we have
9246	     lost alignment knowledge.  Assume, however, that the asm
9247	     will not mis-align instructions.  */
9248	  if (len < 0)
9249	    {
9250	      ofs = 0;
9251	      align = 4;
9252	      len = 0;
9253	    }
9254	}
9255
9256      /* If the known alignment is smaller than the recognized insn group,
9257	 realign the output.  */
9258      else if ((int) align < len)
9259	{
9260	  unsigned int new_log_align = len > 8 ? 4 : 3;
9261	  rtx prev, where;
9262
9263	  where = prev = prev_nonnote_insn (i);
9264	  if (!where || GET_CODE (where) != CODE_LABEL)
9265	    where = i;
9266
9267	  /* Can't realign between a call and its gp reload.  */
9268	  if (! (TARGET_EXPLICIT_RELOCS
9269		 && prev && GET_CODE (prev) == CALL_INSN))
9270	    {
9271	      emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9272	      align = 1 << new_log_align;
9273	      ofs = 0;
9274	    }
9275	}
9276
9277      /* We may not insert padding inside the initial ldgp sequence.  */
9278      else if (ldgp > 0)
9279	ldgp -= len;
9280
9281      /* If the group won't fit in the same INT16 as the previous,
9282	 we need to add padding to keep the group together.  Rather
9283	 than simply leaving the insn filling to the assembler, we
9284	 can make use of the knowledge of what sorts of instructions
9285	 were issued in the previous group to make sure that all of
9286	 the added nops are really free.  */
9287      else if (ofs + len > (int) align)
9288	{
9289	  int nop_count = (align - ofs) / 4;
9290	  rtx where;
9291
9292	  /* Insert nops before labels, branches, and calls to truly merge
9293	     the execution of the nops with the previous instruction group.  */
9294	  where = prev_nonnote_insn (i);
9295	  if (where)
9296	    {
9297	      if (GET_CODE (where) == CODE_LABEL)
9298		{
9299		  rtx where2 = prev_nonnote_insn (where);
9300		  if (where2 && GET_CODE (where2) == JUMP_INSN)
9301		    where = where2;
9302		}
9303	      else if (GET_CODE (where) == INSN)
9304		where = i;
9305	    }
9306	  else
9307	    where = i;
9308
9309	  do
9310	    emit_insn_before ((*next_nop)(&prev_in_use), where);
9311	  while (--nop_count);
9312	  ofs = 0;
9313	}
9314
9315      ofs = (ofs + len) & (align - 1);
9316      prev_in_use = in_use;
9317      i = next;
9318    }
9319}
9320
9321/* Machine dependent reorg pass.  */
9322
9323static void
9324alpha_reorg (void)
9325{
9326  if (alpha_tp != ALPHA_TP_PROG || flag_exceptions)
9327    alpha_handle_trap_shadows ();
9328
9329  /* Due to the number of extra trapb insns, don't bother fixing up
9330     alignment when trap precision is instruction.  Moreover, we can
9331     only do our job when sched2 is run.  */
9332  if (optimize && !optimize_size
9333      && alpha_tp != ALPHA_TP_INSN
9334      && flag_schedule_insns_after_reload)
9335    {
9336      if (alpha_tune == PROCESSOR_EV4)
9337	alpha_align_insns (8, alphaev4_next_group, alphaev4_next_nop);
9338      else if (alpha_tune == PROCESSOR_EV5)
9339	alpha_align_insns (16, alphaev5_next_group, alphaev5_next_nop);
9340    }
9341}
9342
9343#if !TARGET_ABI_UNICOSMK
9344
9345#ifdef HAVE_STAMP_H
9346#include <stamp.h>
9347#endif
9348
9349static void
9350alpha_file_start (void)
9351{
9352#ifdef OBJECT_FORMAT_ELF
9353  /* If emitting dwarf2 debug information, we cannot generate a .file
9354     directive to start the file, as it will conflict with dwarf2out
9355     file numbers.  So it's only useful when emitting mdebug output.  */
9356  targetm.file_start_file_directive = (write_symbols == DBX_DEBUG);
9357#endif
9358
9359  default_file_start ();
9360#ifdef MS_STAMP
9361  fprintf (asm_out_file, "\t.verstamp %d %d\n", MS_STAMP, LS_STAMP);
9362#endif
9363
9364  fputs ("\t.set noreorder\n", asm_out_file);
9365  fputs ("\t.set volatile\n", asm_out_file);
9366  if (!TARGET_ABI_OPEN_VMS)
9367    fputs ("\t.set noat\n", asm_out_file);
9368  if (TARGET_EXPLICIT_RELOCS)
9369    fputs ("\t.set nomacro\n", asm_out_file);
9370  if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9371    {
9372      const char *arch;
9373
9374      if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9375	arch = "ev6";
9376      else if (TARGET_MAX)
9377	arch = "pca56";
9378      else if (TARGET_BWX)
9379	arch = "ev56";
9380      else if (alpha_cpu == PROCESSOR_EV5)
9381	arch = "ev5";
9382      else
9383	arch = "ev4";
9384
9385      fprintf (asm_out_file, "\t.arch %s\n", arch);
9386    }
9387}
9388#endif
9389
9390#ifdef OBJECT_FORMAT_ELF
9391/* Since we've no .dynbss section, pretend flag_pic is always set, so that
9392   we don't wind up with dynamic relocations in .rodata.  */
9393
9394static void
9395alpha_elf_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
9396{
9397  default_elf_select_section_1 (exp, reloc, align, true);
9398}
9399
9400static void
9401alpha_elf_unique_section (tree decl, int reloc)
9402{
9403  default_unique_section_1 (decl, reloc, true);
9404}
9405
9406/* Switch to the section to which we should output X.  The only thing
9407   special we do here is to honor small data.  */
9408
9409static void
9410alpha_elf_select_rtx_section (enum machine_mode mode, rtx x,
9411			      unsigned HOST_WIDE_INT align)
9412{
9413  if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9414    /* ??? Consider using mergeable sdata sections.  */
9415    sdata_section ();
9416  else
9417    {
9418      int save_pic = flag_pic;
9419      flag_pic = 1;
9420      default_elf_select_rtx_section (mode, x, align);
9421      flag_pic = save_pic;
9422    }
9423}
9424
9425static unsigned int
9426alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9427{
9428  unsigned int flags = 0;
9429
9430  if (strcmp (name, ".sdata") == 0
9431      || strncmp (name, ".sdata.", 7) == 0
9432      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9433      || strcmp (name, ".sbss") == 0
9434      || strncmp (name, ".sbss.", 6) == 0
9435      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9436    flags = SECTION_SMALL;
9437
9438  flags |= default_section_type_flags_1 (decl, name, reloc, true);
9439  return flags;
9440}
9441#endif /* OBJECT_FORMAT_ELF */
9442
9443/* Structure to collect function names for final output in link section.  */
9444/* Note that items marked with GTY can't be ifdef'ed out.  */
9445
9446enum links_kind {KIND_UNUSED, KIND_LOCAL, KIND_EXTERN};
9447enum reloc_kind {KIND_LINKAGE, KIND_CODEADDR};
9448
9449struct alpha_links GTY(())
9450{
9451  int num;
9452  rtx linkage;
9453  enum links_kind lkind;
9454  enum reloc_kind rkind;
9455};
9456
9457struct alpha_funcs GTY(())
9458{
9459  int num;
9460  splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *)))
9461    links;
9462};
9463
9464static GTY ((param1_is (char *), param2_is (struct alpha_links *)))
9465  splay_tree alpha_links_tree;
9466static GTY ((param1_is (tree), param2_is (struct alpha_funcs *)))
9467  splay_tree alpha_funcs_tree;
9468
9469static GTY(()) int alpha_funcs_num;
9470
9471#if TARGET_ABI_OPEN_VMS
9472
9473/* Return the VMS argument type corresponding to MODE.  */
9474
9475enum avms_arg_type
9476alpha_arg_type (enum machine_mode mode)
9477{
9478  switch (mode)
9479    {
9480    case SFmode:
9481      return TARGET_FLOAT_VAX ? FF : FS;
9482    case DFmode:
9483      return TARGET_FLOAT_VAX ? FD : FT;
9484    default:
9485      return I64;
9486    }
9487}
9488
9489/* Return an rtx for an integer representing the VMS Argument Information
9490   register value.  */
9491
9492rtx
9493alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9494{
9495  unsigned HOST_WIDE_INT regval = cum.num_args;
9496  int i;
9497
9498  for (i = 0; i < 6; i++)
9499    regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9500
9501  return GEN_INT (regval);
9502}
9503
9504/* Make (or fake) .linkage entry for function call.
9505
9506   IS_LOCAL is 0 if name is used in call, 1 if name is used in definition.
9507
9508   Return an SYMBOL_REF rtx for the linkage.  */
9509
9510rtx
9511alpha_need_linkage (const char *name, int is_local)
9512{
9513  splay_tree_node node;
9514  struct alpha_links *al;
9515
9516  if (name[0] == '*')
9517    name++;
9518
9519  if (is_local)
9520    {
9521      struct alpha_funcs *cfaf;
9522
9523      if (!alpha_funcs_tree)
9524        alpha_funcs_tree = splay_tree_new_ggc ((splay_tree_compare_fn)
9525					       splay_tree_compare_pointers);
9526
9527      cfaf = (struct alpha_funcs *) ggc_alloc (sizeof (struct alpha_funcs));
9528
9529      cfaf->links = 0;
9530      cfaf->num = ++alpha_funcs_num;
9531
9532      splay_tree_insert (alpha_funcs_tree,
9533			 (splay_tree_key) current_function_decl,
9534			 (splay_tree_value) cfaf);
9535    }
9536
9537  if (alpha_links_tree)
9538    {
9539      /* Is this name already defined?  */
9540
9541      node = splay_tree_lookup (alpha_links_tree, (splay_tree_key) name);
9542      if (node)
9543	{
9544	  al = (struct alpha_links *) node->value;
9545	  if (is_local)
9546	    {
9547	      /* Defined here but external assumed.  */
9548	      if (al->lkind == KIND_EXTERN)
9549		al->lkind = KIND_LOCAL;
9550	    }
9551	  else
9552	    {
9553	      /* Used here but unused assumed.  */
9554	      if (al->lkind == KIND_UNUSED)
9555		al->lkind = KIND_LOCAL;
9556	    }
9557	  return al->linkage;
9558	}
9559    }
9560  else
9561    alpha_links_tree = splay_tree_new_ggc ((splay_tree_compare_fn) strcmp);
9562
9563  al = (struct alpha_links *) ggc_alloc (sizeof (struct alpha_links));
9564  name = ggc_strdup (name);
9565
9566  /* Assume external if no definition.  */
9567  al->lkind = (is_local ? KIND_UNUSED : KIND_EXTERN);
9568
9569  /* Ensure we have an IDENTIFIER so assemble_name can mark it used.  */
9570  get_identifier (name);
9571
9572  /* Construct a SYMBOL_REF for us to call.  */
9573  {
9574    size_t name_len = strlen (name);
9575    char *linksym = alloca (name_len + 6);
9576    linksym[0] = '$';
9577    memcpy (linksym + 1, name, name_len);
9578    memcpy (linksym + 1 + name_len, "..lk", 5);
9579    al->linkage = gen_rtx_SYMBOL_REF (Pmode,
9580				      ggc_alloc_string (linksym, name_len + 5));
9581  }
9582
9583  splay_tree_insert (alpha_links_tree, (splay_tree_key) name,
9584		     (splay_tree_value) al);
9585
9586  return al->linkage;
9587}
9588
9589rtx
9590alpha_use_linkage (rtx linkage, tree cfundecl, int lflag, int rflag)
9591{
9592  splay_tree_node cfunnode;
9593  struct alpha_funcs *cfaf;
9594  struct alpha_links *al;
9595  const char *name = XSTR (linkage, 0);
9596
9597  cfaf = (struct alpha_funcs *) 0;
9598  al = (struct alpha_links *) 0;
9599
9600  cfunnode = splay_tree_lookup (alpha_funcs_tree, (splay_tree_key) cfundecl);
9601  cfaf = (struct alpha_funcs *) cfunnode->value;
9602
9603  if (cfaf->links)
9604    {
9605      splay_tree_node lnode;
9606
9607      /* Is this name already defined?  */
9608
9609      lnode = splay_tree_lookup (cfaf->links, (splay_tree_key) name);
9610      if (lnode)
9611	al = (struct alpha_links *) lnode->value;
9612    }
9613  else
9614    cfaf->links = splay_tree_new_ggc ((splay_tree_compare_fn) strcmp);
9615
9616  if (!al)
9617    {
9618      size_t name_len;
9619      size_t buflen;
9620      char buf [512];
9621      char *linksym;
9622      splay_tree_node node = 0;
9623      struct alpha_links *anl;
9624
9625      if (name[0] == '*')
9626	name++;
9627
9628      name_len = strlen (name);
9629
9630      al = (struct alpha_links *) ggc_alloc (sizeof (struct alpha_links));
9631      al->num = cfaf->num;
9632
9633      node = splay_tree_lookup (alpha_links_tree, (splay_tree_key) name);
9634      if (node)
9635	{
9636	  anl = (struct alpha_links *) node->value;
9637	  al->lkind = anl->lkind;
9638	}
9639
9640      sprintf (buf, "$%d..%s..lk", cfaf->num, name);
9641      buflen = strlen (buf);
9642      linksym = alloca (buflen + 1);
9643      memcpy (linksym, buf, buflen + 1);
9644
9645      al->linkage = gen_rtx_SYMBOL_REF
9646	(Pmode, ggc_alloc_string (linksym, buflen + 1));
9647
9648      splay_tree_insert (cfaf->links, (splay_tree_key) name,
9649			 (splay_tree_value) al);
9650    }
9651
9652  if (rflag)
9653    al->rkind = KIND_CODEADDR;
9654  else
9655    al->rkind = KIND_LINKAGE;
9656
9657  if (lflag)
9658    return gen_rtx_MEM (Pmode, plus_constant (al->linkage, 8));
9659  else
9660    return al->linkage;
9661}
9662
9663static int
9664alpha_write_one_linkage (splay_tree_node node, void *data)
9665{
9666  const char *const name = (const char *) node->key;
9667  struct alpha_links *link = (struct alpha_links *) node->value;
9668  FILE *stream = (FILE *) data;
9669
9670  fprintf (stream, "$%d..%s..lk:\n", link->num, name);
9671  if (link->rkind == KIND_CODEADDR)
9672    {
9673      if (link->lkind == KIND_LOCAL)
9674	{
9675	  /* Local and used */
9676	  fprintf (stream, "\t.quad %s..en\n", name);
9677	}
9678      else
9679	{
9680	  /* External and used, request code address.  */
9681	  fprintf (stream, "\t.code_address %s\n", name);
9682	}
9683    }
9684  else
9685    {
9686      if (link->lkind == KIND_LOCAL)
9687	{
9688	  /* Local and used, build linkage pair.  */
9689	  fprintf (stream, "\t.quad %s..en\n", name);
9690	  fprintf (stream, "\t.quad %s\n", name);
9691	}
9692      else
9693	{
9694	  /* External and used, request linkage pair.  */
9695	  fprintf (stream, "\t.linkage %s\n", name);
9696	}
9697    }
9698
9699  return 0;
9700}
9701
9702static void
9703alpha_write_linkage (FILE *stream, const char *funname, tree fundecl)
9704{
9705  splay_tree_node node;
9706  struct alpha_funcs *func;
9707
9708  link_section ();
9709  fprintf (stream, "\t.align 3\n");
9710  node = splay_tree_lookup (alpha_funcs_tree, (splay_tree_key) fundecl);
9711  func = (struct alpha_funcs *) node->value;
9712
9713  fputs ("\t.name ", stream);
9714  assemble_name (stream, funname);
9715  fputs ("..na\n", stream);
9716  ASM_OUTPUT_LABEL (stream, funname);
9717  fprintf (stream, "\t.pdesc ");
9718  assemble_name (stream, funname);
9719  fprintf (stream, "..en,%s\n",
9720	   alpha_procedure_type == PT_STACK ? "stack"
9721	   : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9722
9723  if (func->links)
9724    {
9725      splay_tree_foreach (func->links, alpha_write_one_linkage, stream);
9726      /* splay_tree_delete (func->links); */
9727    }
9728}
9729
9730/* Given a decl, a section name, and whether the decl initializer
9731   has relocs, choose attributes for the section.  */
9732
9733#define SECTION_VMS_OVERLAY	SECTION_FORGET
9734#define SECTION_VMS_GLOBAL SECTION_MACH_DEP
9735#define SECTION_VMS_INITIALIZE (SECTION_VMS_GLOBAL << 1)
9736
9737static unsigned int
9738vms_section_type_flags (tree decl, const char *name, int reloc)
9739{
9740  unsigned int flags = default_section_type_flags (decl, name, reloc);
9741
9742  if (decl && DECL_ATTRIBUTES (decl)
9743      && lookup_attribute ("overlaid", DECL_ATTRIBUTES (decl)))
9744    flags |= SECTION_VMS_OVERLAY;
9745  if (decl && DECL_ATTRIBUTES (decl)
9746      && lookup_attribute ("global", DECL_ATTRIBUTES (decl)))
9747    flags |= SECTION_VMS_GLOBAL;
9748  if (decl && DECL_ATTRIBUTES (decl)
9749      && lookup_attribute ("initialize", DECL_ATTRIBUTES (decl)))
9750    flags |= SECTION_VMS_INITIALIZE;
9751
9752  return flags;
9753}
9754
9755/* Switch to an arbitrary section NAME with attributes as specified
9756   by FLAGS.  ALIGN specifies any known alignment requirements for
9757   the section; 0 if the default should be used.  */
9758
9759static void
9760vms_asm_named_section (const char *name, unsigned int flags,
9761		       tree decl ATTRIBUTE_UNUSED)
9762{
9763  fputc ('\n', asm_out_file);
9764  fprintf (asm_out_file, ".section\t%s", name);
9765
9766  if (flags & SECTION_VMS_OVERLAY)
9767    fprintf (asm_out_file, ",OVR");
9768  if (flags & SECTION_VMS_GLOBAL)
9769    fprintf (asm_out_file, ",GBL");
9770  if (flags & SECTION_VMS_INITIALIZE)
9771    fprintf (asm_out_file, ",NOMOD");
9772  if (flags & SECTION_DEBUG)
9773    fprintf (asm_out_file, ",NOWRT");
9774
9775  fputc ('\n', asm_out_file);
9776}
9777
9778/* Record an element in the table of global constructors.  SYMBOL is
9779   a SYMBOL_REF of the function to be called; PRIORITY is a number
9780   between 0 and MAX_INIT_PRIORITY.
9781
9782   Differs from default_ctors_section_asm_out_constructor in that the
9783   width of the .ctors entry is always 64 bits, rather than the 32 bits
9784   used by a normal pointer.  */
9785
9786static void
9787vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9788{
9789  ctors_section ();
9790  assemble_align (BITS_PER_WORD);
9791  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9792}
9793
9794static void
9795vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9796{
9797  dtors_section ();
9798  assemble_align (BITS_PER_WORD);
9799  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9800}
9801#else
9802
9803rtx
9804alpha_need_linkage (const char *name ATTRIBUTE_UNUSED,
9805		    int is_local ATTRIBUTE_UNUSED)
9806{
9807  return NULL_RTX;
9808}
9809
9810rtx
9811alpha_use_linkage (rtx linkage ATTRIBUTE_UNUSED,
9812		   tree cfundecl ATTRIBUTE_UNUSED,
9813		   int lflag ATTRIBUTE_UNUSED,
9814		   int rflag ATTRIBUTE_UNUSED)
9815{
9816  return NULL_RTX;
9817}
9818
9819#endif /* TARGET_ABI_OPEN_VMS */
9820
9821#if TARGET_ABI_UNICOSMK
9822
9823/* This evaluates to true if we do not know how to pass TYPE solely in
9824   registers.  This is the case for all arguments that do not fit in two
9825   registers.  */
9826
9827static bool
9828unicosmk_must_pass_in_stack (enum machine_mode mode, tree type)
9829{
9830  if (type == NULL)
9831    return false;
9832
9833  if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9834    return true;
9835  if (TREE_ADDRESSABLE (type))
9836    return true;
9837
9838  return ALPHA_ARG_SIZE (mode, type, 0) > 2;
9839}
9840
9841/* Define the offset between two registers, one to be eliminated, and the
9842   other its replacement, at the start of a routine.  */
9843
9844int
9845unicosmk_initial_elimination_offset (int from, int to)
9846{
9847  int fixed_size;
9848
9849  fixed_size = alpha_sa_size();
9850  if (fixed_size != 0)
9851    fixed_size += 48;
9852
9853  if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9854    return -fixed_size;
9855  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9856    return 0;
9857  else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9858    return (ALPHA_ROUND (current_function_outgoing_args_size)
9859	    + ALPHA_ROUND (get_frame_size()));
9860  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9861    return (ALPHA_ROUND (fixed_size)
9862	    + ALPHA_ROUND (get_frame_size()
9863			   + current_function_outgoing_args_size));
9864  else
9865    gcc_unreachable ();
9866}
9867
9868/* Output the module name for .ident and .end directives. We have to strip
9869   directories and add make sure that the module name starts with a letter
9870   or '$'.  */
9871
9872static void
9873unicosmk_output_module_name (FILE *file)
9874{
9875  const char *name = lbasename (main_input_filename);
9876  unsigned len = strlen (name);
9877  char *clean_name = alloca (len + 2);
9878  char *ptr = clean_name;
9879
9880  /* CAM only accepts module names that start with a letter or '$'. We
9881     prefix the module name with a '$' if necessary.  */
9882
9883  if (!ISALPHA (*name))
9884    *ptr++ = '$';
9885  memcpy (ptr, name, len + 1);
9886  clean_symbol_name (clean_name);
9887  fputs (clean_name, file);
9888}
9889
9890/* Output the definition of a common variable.  */
9891
9892void
9893unicosmk_output_common (FILE *file, const char *name, int size, int align)
9894{
9895  tree name_tree;
9896  printf ("T3E__: common %s\n", name);
9897
9898  common_section ();
9899  fputs("\t.endp\n\n\t.psect ", file);
9900  assemble_name(file, name);
9901  fprintf(file, ",%d,common\n", floor_log2 (align / BITS_PER_UNIT));
9902  fprintf(file, "\t.byte\t0:%d\n", size);
9903
9904  /* Mark the symbol as defined in this module.  */
9905  name_tree = get_identifier (name);
9906  TREE_ASM_WRITTEN (name_tree) = 1;
9907}
9908
9909#define SECTION_PUBLIC SECTION_MACH_DEP
9910#define SECTION_MAIN (SECTION_PUBLIC << 1)
9911static int current_section_align;
9912
9913static unsigned int
9914unicosmk_section_type_flags (tree decl, const char *name,
9915			     int reloc ATTRIBUTE_UNUSED)
9916{
9917  unsigned int flags = default_section_type_flags (decl, name, reloc);
9918
9919  if (!decl)
9920    return flags;
9921
9922  if (TREE_CODE (decl) == FUNCTION_DECL)
9923    {
9924      current_section_align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9925      if (align_functions_log > current_section_align)
9926	current_section_align = align_functions_log;
9927
9928      if (! strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), "main"))
9929	flags |= SECTION_MAIN;
9930    }
9931  else
9932    current_section_align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT);
9933
9934  if (TREE_PUBLIC (decl))
9935    flags |= SECTION_PUBLIC;
9936
9937  return flags;
9938}
9939
9940/* Generate a section name for decl and associate it with the
9941   declaration.  */
9942
9943static void
9944unicosmk_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
9945{
9946  const char *name;
9947  int len;
9948
9949  gcc_assert (decl);
9950
9951  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9952  name = default_strip_name_encoding (name);
9953  len = strlen (name);
9954
9955  if (TREE_CODE (decl) == FUNCTION_DECL)
9956    {
9957      char *string;
9958
9959      /* It is essential that we prefix the section name here because
9960	 otherwise the section names generated for constructors and
9961	 destructors confuse collect2.  */
9962
9963      string = alloca (len + 6);
9964      sprintf (string, "code@%s", name);
9965      DECL_SECTION_NAME (decl) = build_string (len + 5, string);
9966    }
9967  else if (TREE_PUBLIC (decl))
9968    DECL_SECTION_NAME (decl) = build_string (len, name);
9969  else
9970    {
9971      char *string;
9972
9973      string = alloca (len + 6);
9974      sprintf (string, "data@%s", name);
9975      DECL_SECTION_NAME (decl) = build_string (len + 5, string);
9976    }
9977}
9978
9979/* Switch to an arbitrary section NAME with attributes as specified
9980   by FLAGS.  ALIGN specifies any known alignment requirements for
9981   the section; 0 if the default should be used.  */
9982
9983static void
9984unicosmk_asm_named_section (const char *name, unsigned int flags,
9985			    tree decl ATTRIBUTE_UNUSED)
9986{
9987  const char *kind;
9988
9989  /* Close the previous section.  */
9990
9991  fputs ("\t.endp\n\n", asm_out_file);
9992
9993  /* Find out what kind of section we are opening.  */
9994
9995  if (flags & SECTION_MAIN)
9996    fputs ("\t.start\tmain\n", asm_out_file);
9997
9998  if (flags & SECTION_CODE)
9999    kind = "code";
10000  else if (flags & SECTION_PUBLIC)
10001    kind = "common";
10002  else
10003    kind = "data";
10004
10005  if (current_section_align != 0)
10006    fprintf (asm_out_file, "\t.psect\t%s,%d,%s\n", name,
10007	     current_section_align, kind);
10008  else
10009    fprintf (asm_out_file, "\t.psect\t%s,%s\n", name, kind);
10010}
10011
10012static void
10013unicosmk_insert_attributes (tree decl, tree *attr_ptr ATTRIBUTE_UNUSED)
10014{
10015  if (DECL_P (decl)
10016      && (TREE_PUBLIC (decl) || TREE_CODE (decl) == FUNCTION_DECL))
10017    unicosmk_unique_section (decl, 0);
10018}
10019
10020/* Output an alignment directive. We have to use the macro 'gcc@code@align'
10021   in code sections because .align fill unused space with zeroes.  */
10022
10023void
10024unicosmk_output_align (FILE *file, int align)
10025{
10026  if (inside_function)
10027    fprintf (file, "\tgcc@code@align\t%d\n", align);
10028  else
10029    fprintf (file, "\t.align\t%d\n", align);
10030}
10031
10032/* Add a case vector to the current function's list of deferred case
10033   vectors. Case vectors have to be put into a separate section because CAM
10034   does not allow data definitions in code sections.  */
10035
10036void
10037unicosmk_defer_case_vector (rtx lab, rtx vec)
10038{
10039  struct machine_function *machine = cfun->machine;
10040
10041  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10042  machine->addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec,
10043					  machine->addr_list);
10044}
10045
10046/* Output a case vector.  */
10047
10048static void
10049unicosmk_output_addr_vec (FILE *file, rtx vec)
10050{
10051  rtx lab  = XEXP (vec, 0);
10052  rtx body = XEXP (vec, 1);
10053  int vlen = XVECLEN (body, 0);
10054  int idx;
10055
10056  (*targetm.asm_out.internal_label) (file, "L", CODE_LABEL_NUMBER (lab));
10057
10058  for (idx = 0; idx < vlen; idx++)
10059    {
10060      ASM_OUTPUT_ADDR_VEC_ELT
10061        (file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10062    }
10063}
10064
10065/* Output current function's deferred case vectors.  */
10066
10067static void
10068unicosmk_output_deferred_case_vectors (FILE *file)
10069{
10070  struct machine_function *machine = cfun->machine;
10071  rtx t;
10072
10073  if (machine->addr_list == NULL_RTX)
10074    return;
10075
10076  data_section ();
10077  for (t = machine->addr_list; t; t = XEXP (t, 1))
10078    unicosmk_output_addr_vec (file, XEXP (t, 0));
10079}
10080
10081/* Generate the name of the SSIB section for the current function.  */
10082
10083#define SSIB_PREFIX "__SSIB_"
10084#define SSIB_PREFIX_LEN 7
10085
10086static const char *
10087unicosmk_ssib_name (void)
10088{
10089  /* This is ok since CAM won't be able to deal with names longer than that
10090     anyway.  */
10091
10092  static char name[256];
10093
10094  rtx x;
10095  const char *fnname;
10096  int len;
10097
10098  x = DECL_RTL (cfun->decl);
10099  gcc_assert (GET_CODE (x) == MEM);
10100  x = XEXP (x, 0);
10101  gcc_assert (GET_CODE (x) == SYMBOL_REF);
10102  fnname = XSTR (x, 0);
10103
10104  len = strlen (fnname);
10105  if (len + SSIB_PREFIX_LEN > 255)
10106    len = 255 - SSIB_PREFIX_LEN;
10107
10108  strcpy (name, SSIB_PREFIX);
10109  strncpy (name + SSIB_PREFIX_LEN, fnname, len);
10110  name[len + SSIB_PREFIX_LEN] = 0;
10111
10112  return name;
10113}
10114
10115/* Set up the dynamic subprogram information block (DSIB) and update the
10116   frame pointer register ($15) for subroutines which have a frame. If the
10117   subroutine doesn't have a frame, simply increment $15.  */
10118
10119static void
10120unicosmk_gen_dsib (unsigned long *imaskP)
10121{
10122  if (alpha_procedure_type == PT_STACK)
10123    {
10124      const char *ssib_name;
10125      rtx mem;
10126
10127      /* Allocate 64 bytes for the DSIB.  */
10128
10129      FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
10130                                  GEN_INT (-64))));
10131      emit_insn (gen_blockage ());
10132
10133      /* Save the return address.  */
10134
10135      mem = gen_rtx_MEM (DImode, plus_constant (stack_pointer_rtx, 56));
10136      set_mem_alias_set (mem, alpha_sr_alias_set);
10137      FRP (emit_move_insn (mem, gen_rtx_REG (DImode, REG_RA)));
10138      (*imaskP) &= ~(1UL << REG_RA);
10139
10140      /* Save the old frame pointer.  */
10141
10142      mem = gen_rtx_MEM (DImode, plus_constant (stack_pointer_rtx, 48));
10143      set_mem_alias_set (mem, alpha_sr_alias_set);
10144      FRP (emit_move_insn (mem, hard_frame_pointer_rtx));
10145      (*imaskP) &= ~(1UL << HARD_FRAME_POINTER_REGNUM);
10146
10147      emit_insn (gen_blockage ());
10148
10149      /* Store the SSIB pointer.  */
10150
10151      ssib_name = ggc_strdup (unicosmk_ssib_name ());
10152      mem = gen_rtx_MEM (DImode, plus_constant (stack_pointer_rtx, 32));
10153      set_mem_alias_set (mem, alpha_sr_alias_set);
10154
10155      FRP (emit_move_insn (gen_rtx_REG (DImode, 5),
10156                           gen_rtx_SYMBOL_REF (Pmode, ssib_name)));
10157      FRP (emit_move_insn (mem, gen_rtx_REG (DImode, 5)));
10158
10159      /* Save the CIW index.  */
10160
10161      mem = gen_rtx_MEM (DImode, plus_constant (stack_pointer_rtx, 24));
10162      set_mem_alias_set (mem, alpha_sr_alias_set);
10163      FRP (emit_move_insn (mem, gen_rtx_REG (DImode, 25)));
10164
10165      emit_insn (gen_blockage ());
10166
10167      /* Set the new frame pointer.  */
10168
10169      FRP (emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
10170                                  stack_pointer_rtx, GEN_INT (64))));
10171
10172    }
10173  else
10174    {
10175      /* Increment the frame pointer register to indicate that we do not
10176         have a frame.  */
10177
10178      FRP (emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
10179                                  hard_frame_pointer_rtx, const1_rtx)));
10180    }
10181}
10182
10183/* Output the static subroutine information block for the current
10184   function.  */
10185
10186static void
10187unicosmk_output_ssib (FILE *file, const char *fnname)
10188{
10189  int len;
10190  int i;
10191  rtx x;
10192  rtx ciw;
10193  struct machine_function *machine = cfun->machine;
10194
10195  ssib_section ();
10196  fprintf (file, "\t.endp\n\n\t.psect\t%s%s,data\n", user_label_prefix,
10197	   unicosmk_ssib_name ());
10198
10199  /* Some required stuff and the function name length.  */
10200
10201  len = strlen (fnname);
10202  fprintf (file, "\t.quad\t^X20008%2.2X28\n", len);
10203
10204  /* Saved registers
10205     ??? We don't do that yet.  */
10206
10207  fputs ("\t.quad\t0\n", file);
10208
10209  /* Function address.  */
10210
10211  fputs ("\t.quad\t", file);
10212  assemble_name (file, fnname);
10213  putc ('\n', file);
10214
10215  fputs ("\t.quad\t0\n", file);
10216  fputs ("\t.quad\t0\n", file);
10217
10218  /* Function name.
10219     ??? We do it the same way Cray CC does it but this could be
10220     simplified.  */
10221
10222  for( i = 0; i < len; i++ )
10223    fprintf (file, "\t.byte\t%d\n", (int)(fnname[i]));
10224  if( (len % 8) == 0 )
10225    fputs ("\t.quad\t0\n", file);
10226  else
10227    fprintf (file, "\t.bits\t%d : 0\n", (8 - (len % 8))*8);
10228
10229  /* All call information words used in the function.  */
10230
10231  for (x = machine->first_ciw; x; x = XEXP (x, 1))
10232    {
10233      ciw = XEXP (x, 0);
10234#if HOST_BITS_PER_WIDE_INT == 32
10235      fprintf (file, "\t.quad\t" HOST_WIDE_INT_PRINT_DOUBLE_HEX "\n",
10236	       CONST_DOUBLE_HIGH (ciw), CONST_DOUBLE_LOW (ciw));
10237#else
10238      fprintf (file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n", INTVAL (ciw));
10239#endif
10240    }
10241}
10242
10243/* Add a call information word (CIW) to the list of the current function's
10244   CIWs and return its index.
10245
10246   X is a CONST_INT or CONST_DOUBLE representing the CIW.  */
10247
10248rtx
10249unicosmk_add_call_info_word (rtx x)
10250{
10251  rtx node;
10252  struct machine_function *machine = cfun->machine;
10253
10254  node = gen_rtx_EXPR_LIST (VOIDmode, x, NULL_RTX);
10255  if (machine->first_ciw == NULL_RTX)
10256    machine->first_ciw = node;
10257  else
10258    XEXP (machine->last_ciw, 1) = node;
10259
10260  machine->last_ciw = node;
10261  ++machine->ciw_count;
10262
10263  return GEN_INT (machine->ciw_count
10264		  + strlen (current_function_name ())/8 + 5);
10265}
10266
10267static char unicosmk_section_buf[100];
10268
10269char *
10270unicosmk_text_section (void)
10271{
10272  static int count = 0;
10273  sprintf (unicosmk_section_buf, "\t.endp\n\n\t.psect\tgcc@text___%d,code",
10274				 count++);
10275  return unicosmk_section_buf;
10276}
10277
10278char *
10279unicosmk_data_section (void)
10280{
10281  static int count = 1;
10282  sprintf (unicosmk_section_buf, "\t.endp\n\n\t.psect\tgcc@data___%d,data",
10283				 count++);
10284  return unicosmk_section_buf;
10285}
10286
10287/* The Cray assembler doesn't accept extern declarations for symbols which
10288   are defined in the same file. We have to keep track of all global
10289   symbols which are referenced and/or defined in a source file and output
10290   extern declarations for those which are referenced but not defined at
10291   the end of file.  */
10292
10293/* List of identifiers for which an extern declaration might have to be
10294   emitted.  */
10295/* FIXME: needs to use GC, so it can be saved and restored for PCH.  */
10296
10297struct unicosmk_extern_list
10298{
10299  struct unicosmk_extern_list *next;
10300  const char *name;
10301};
10302
10303static struct unicosmk_extern_list *unicosmk_extern_head = 0;
10304
10305/* Output extern declarations which are required for every asm file.  */
10306
10307static void
10308unicosmk_output_default_externs (FILE *file)
10309{
10310  static const char *const externs[] =
10311    { "__T3E_MISMATCH" };
10312
10313  int i;
10314  int n;
10315
10316  n = ARRAY_SIZE (externs);
10317
10318  for (i = 0; i < n; i++)
10319    fprintf (file, "\t.extern\t%s\n", externs[i]);
10320}
10321
10322/* Output extern declarations for global symbols which are have been
10323   referenced but not defined.  */
10324
10325static void
10326unicosmk_output_externs (FILE *file)
10327{
10328  struct unicosmk_extern_list *p;
10329  const char *real_name;
10330  int len;
10331  tree name_tree;
10332
10333  len = strlen (user_label_prefix);
10334  for (p = unicosmk_extern_head; p != 0; p = p->next)
10335    {
10336      /* We have to strip the encoding and possibly remove user_label_prefix
10337	 from the identifier in order to handle -fleading-underscore and
10338	 explicit asm names correctly (cf. gcc.dg/asm-names-1.c).  */
10339      real_name = default_strip_name_encoding (p->name);
10340      if (len && p->name[0] == '*'
10341	  && !memcmp (real_name, user_label_prefix, len))
10342	real_name += len;
10343
10344      name_tree = get_identifier (real_name);
10345      if (! TREE_ASM_WRITTEN (name_tree))
10346	{
10347	  TREE_ASM_WRITTEN (name_tree) = 1;
10348	  fputs ("\t.extern\t", file);
10349	  assemble_name (file, p->name);
10350	  putc ('\n', file);
10351	}
10352    }
10353}
10354
10355/* Record an extern.  */
10356
10357void
10358unicosmk_add_extern (const char *name)
10359{
10360  struct unicosmk_extern_list *p;
10361
10362  p = (struct unicosmk_extern_list *)
10363       xmalloc (sizeof (struct unicosmk_extern_list));
10364  p->next = unicosmk_extern_head;
10365  p->name = name;
10366  unicosmk_extern_head = p;
10367}
10368
10369/* The Cray assembler generates incorrect code if identifiers which
10370   conflict with register names are used as instruction operands. We have
10371   to replace such identifiers with DEX expressions.  */
10372
10373/* Structure to collect identifiers which have been replaced by DEX
10374   expressions.  */
10375/* FIXME: needs to use GC, so it can be saved and restored for PCH.  */
10376
10377struct unicosmk_dex {
10378  struct unicosmk_dex *next;
10379  const char *name;
10380};
10381
10382/* List of identifiers which have been replaced by DEX expressions. The DEX
10383   number is determined by the position in the list.  */
10384
10385static struct unicosmk_dex *unicosmk_dex_list = NULL;
10386
10387/* The number of elements in the DEX list.  */
10388
10389static int unicosmk_dex_count = 0;
10390
10391/* Check if NAME must be replaced by a DEX expression.  */
10392
10393static int
10394unicosmk_special_name (const char *name)
10395{
10396  if (name[0] == '*')
10397    ++name;
10398
10399  if (name[0] == '$')
10400    ++name;
10401
10402  if (name[0] != 'r' && name[0] != 'f' && name[0] != 'R' && name[0] != 'F')
10403    return 0;
10404
10405  switch (name[1])
10406    {
10407    case '1':  case '2':
10408      return (name[2] == '\0' || (ISDIGIT (name[2]) && name[3] == '\0'));
10409
10410    case '3':
10411      return (name[2] == '\0'
10412	       || ((name[2] == '0' || name[2] == '1') && name[3] == '\0'));
10413
10414    default:
10415      return (ISDIGIT (name[1]) && name[2] == '\0');
10416    }
10417}
10418
10419/* Return the DEX number if X must be replaced by a DEX expression and 0
10420   otherwise.  */
10421
10422static int
10423unicosmk_need_dex (rtx x)
10424{
10425  struct unicosmk_dex *dex;
10426  const char *name;
10427  int i;
10428
10429  if (GET_CODE (x) != SYMBOL_REF)
10430    return 0;
10431
10432  name = XSTR (x,0);
10433  if (! unicosmk_special_name (name))
10434    return 0;
10435
10436  i = unicosmk_dex_count;
10437  for (dex = unicosmk_dex_list; dex; dex = dex->next)
10438    {
10439      if (! strcmp (name, dex->name))
10440        return i;
10441      --i;
10442    }
10443
10444  dex = (struct unicosmk_dex *) xmalloc (sizeof (struct unicosmk_dex));
10445  dex->name = name;
10446  dex->next = unicosmk_dex_list;
10447  unicosmk_dex_list = dex;
10448
10449  ++unicosmk_dex_count;
10450  return unicosmk_dex_count;
10451}
10452
10453/* Output the DEX definitions for this file.  */
10454
10455static void
10456unicosmk_output_dex (FILE *file)
10457{
10458  struct unicosmk_dex *dex;
10459  int i;
10460
10461  if (unicosmk_dex_list == NULL)
10462    return;
10463
10464  fprintf (file, "\t.dexstart\n");
10465
10466  i = unicosmk_dex_count;
10467  for (dex = unicosmk_dex_list; dex; dex = dex->next)
10468    {
10469      fprintf (file, "\tDEX (%d) = ", i);
10470      assemble_name (file, dex->name);
10471      putc ('\n', file);
10472      --i;
10473    }
10474
10475  fprintf (file, "\t.dexend\n");
10476}
10477
10478/* Output text that to appear at the beginning of an assembler file.  */
10479
10480static void
10481unicosmk_file_start (void)
10482{
10483  int i;
10484
10485  fputs ("\t.ident\t", asm_out_file);
10486  unicosmk_output_module_name (asm_out_file);
10487  fputs ("\n\n", asm_out_file);
10488
10489  /* The Unicos/Mk assembler uses different register names. Instead of trying
10490     to support them, we simply use micro definitions.  */
10491
10492  /* CAM has different register names: rN for the integer register N and fN
10493     for the floating-point register N. Instead of trying to use these in
10494     alpha.md, we define the symbols $N and $fN to refer to the appropriate
10495     register.  */
10496
10497  for (i = 0; i < 32; ++i)
10498    fprintf (asm_out_file, "$%d <- r%d\n", i, i);
10499
10500  for (i = 0; i < 32; ++i)
10501    fprintf (asm_out_file, "$f%d <- f%d\n", i, i);
10502
10503  putc ('\n', asm_out_file);
10504
10505  /* The .align directive fill unused space with zeroes which does not work
10506     in code sections. We define the macro 'gcc@code@align' which uses nops
10507     instead. Note that it assumes that code sections always have the
10508     biggest possible alignment since . refers to the current offset from
10509     the beginning of the section.  */
10510
10511  fputs ("\t.macro gcc@code@align n\n", asm_out_file);
10512  fputs ("gcc@n@bytes = 1 << n\n", asm_out_file);
10513  fputs ("gcc@here = . % gcc@n@bytes\n", asm_out_file);
10514  fputs ("\t.if ne, gcc@here, 0\n", asm_out_file);
10515  fputs ("\t.repeat (gcc@n@bytes - gcc@here) / 4\n", asm_out_file);
10516  fputs ("\tbis r31,r31,r31\n", asm_out_file);
10517  fputs ("\t.endr\n", asm_out_file);
10518  fputs ("\t.endif\n", asm_out_file);
10519  fputs ("\t.endm gcc@code@align\n\n", asm_out_file);
10520
10521  /* Output extern declarations which should always be visible.  */
10522  unicosmk_output_default_externs (asm_out_file);
10523
10524  /* Open a dummy section. We always need to be inside a section for the
10525     section-switching code to work correctly.
10526     ??? This should be a module id or something like that. I still have to
10527     figure out what the rules for those are.  */
10528  fputs ("\n\t.psect\t$SG00000,data\n", asm_out_file);
10529}
10530
10531/* Output text to appear at the end of an assembler file. This includes all
10532   pending extern declarations and DEX expressions.  */
10533
10534static void
10535unicosmk_file_end (void)
10536{
10537  fputs ("\t.endp\n\n", asm_out_file);
10538
10539  /* Output all pending externs.  */
10540
10541  unicosmk_output_externs (asm_out_file);
10542
10543  /* Output dex definitions used for functions whose names conflict with
10544     register names.  */
10545
10546  unicosmk_output_dex (asm_out_file);
10547
10548  fputs ("\t.end\t", asm_out_file);
10549  unicosmk_output_module_name (asm_out_file);
10550  putc ('\n', asm_out_file);
10551}
10552
10553#else
10554
10555static void
10556unicosmk_output_deferred_case_vectors (FILE *file ATTRIBUTE_UNUSED)
10557{}
10558
10559static void
10560unicosmk_gen_dsib (unsigned long *imaskP ATTRIBUTE_UNUSED)
10561{}
10562
10563static void
10564unicosmk_output_ssib (FILE * file ATTRIBUTE_UNUSED,
10565		      const char * fnname ATTRIBUTE_UNUSED)
10566{}
10567
10568rtx
10569unicosmk_add_call_info_word (rtx x ATTRIBUTE_UNUSED)
10570{
10571  return NULL_RTX;
10572}
10573
10574static int
10575unicosmk_need_dex (rtx x ATTRIBUTE_UNUSED)
10576{
10577  return 0;
10578}
10579
10580#endif /* TARGET_ABI_UNICOSMK */
10581
10582static void
10583alpha_init_libfuncs (void)
10584{
10585  if (TARGET_ABI_UNICOSMK)
10586    {
10587      /* Prevent gcc from generating calls to __divsi3.  */
10588      set_optab_libfunc (sdiv_optab, SImode, 0);
10589      set_optab_libfunc (udiv_optab, SImode, 0);
10590
10591      /* Use the functions provided by the system library
10592	 for DImode integer division.  */
10593      set_optab_libfunc (sdiv_optab, DImode, "$sldiv");
10594      set_optab_libfunc (udiv_optab, DImode, "$uldiv");
10595    }
10596  else if (TARGET_ABI_OPEN_VMS)
10597    {
10598      /* Use the VMS runtime library functions for division and
10599	 remainder.  */
10600      set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10601      set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10602      set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10603      set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10604      set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10605      set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10606      set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10607      set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10608    }
10609}
10610
10611
10612/* Initialize the GCC target structure.  */
10613#if TARGET_ABI_OPEN_VMS
10614# undef TARGET_ATTRIBUTE_TABLE
10615# define TARGET_ATTRIBUTE_TABLE vms_attribute_table
10616# undef TARGET_SECTION_TYPE_FLAGS
10617# define TARGET_SECTION_TYPE_FLAGS vms_section_type_flags
10618#endif
10619
10620#undef TARGET_IN_SMALL_DATA_P
10621#define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
10622
10623#if TARGET_ABI_UNICOSMK
10624# undef TARGET_INSERT_ATTRIBUTES
10625# define TARGET_INSERT_ATTRIBUTES unicosmk_insert_attributes
10626# undef TARGET_SECTION_TYPE_FLAGS
10627# define TARGET_SECTION_TYPE_FLAGS unicosmk_section_type_flags
10628# undef TARGET_ASM_UNIQUE_SECTION
10629# define TARGET_ASM_UNIQUE_SECTION unicosmk_unique_section
10630#undef TARGET_ASM_FUNCTION_RODATA_SECTION
10631#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
10632# undef TARGET_ASM_GLOBALIZE_LABEL
10633# define TARGET_ASM_GLOBALIZE_LABEL hook_void_FILEptr_constcharptr
10634# undef TARGET_MUST_PASS_IN_STACK
10635# define TARGET_MUST_PASS_IN_STACK unicosmk_must_pass_in_stack
10636#endif
10637
10638#undef TARGET_ASM_ALIGNED_HI_OP
10639#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
10640#undef TARGET_ASM_ALIGNED_DI_OP
10641#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
10642
10643/* Default unaligned ops are provided for ELF systems.  To get unaligned
10644   data for non-ELF systems, we have to turn off auto alignment.  */
10645#ifndef OBJECT_FORMAT_ELF
10646#undef TARGET_ASM_UNALIGNED_HI_OP
10647#define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
10648#undef TARGET_ASM_UNALIGNED_SI_OP
10649#define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
10650#undef TARGET_ASM_UNALIGNED_DI_OP
10651#define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
10652#endif
10653
10654#ifdef OBJECT_FORMAT_ELF
10655#undef  TARGET_ASM_SELECT_SECTION
10656#define TARGET_ASM_SELECT_SECTION  alpha_elf_select_section
10657#undef  TARGET_ASM_UNIQUE_SECTION
10658#define TARGET_ASM_UNIQUE_SECTION  alpha_elf_unique_section
10659#undef	TARGET_ASM_SELECT_RTX_SECTION
10660#define	TARGET_ASM_SELECT_RTX_SECTION  alpha_elf_select_rtx_section
10661#undef  TARGET_SECTION_TYPE_FLAGS
10662#define TARGET_SECTION_TYPE_FLAGS  alpha_elf_section_type_flags
10663#endif
10664
10665#undef TARGET_ASM_FUNCTION_END_PROLOGUE
10666#define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
10667
10668#undef TARGET_INIT_LIBFUNCS
10669#define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
10670
10671#if TARGET_ABI_UNICOSMK
10672#undef TARGET_ASM_FILE_START
10673#define TARGET_ASM_FILE_START unicosmk_file_start
10674#undef TARGET_ASM_FILE_END
10675#define TARGET_ASM_FILE_END unicosmk_file_end
10676#else
10677#undef TARGET_ASM_FILE_START
10678#define TARGET_ASM_FILE_START alpha_file_start
10679#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
10680#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
10681#endif
10682
10683#undef TARGET_SCHED_ADJUST_COST
10684#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
10685#undef TARGET_SCHED_ISSUE_RATE
10686#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
10687#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
10688#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
10689  alpha_multipass_dfa_lookahead
10690
10691#undef TARGET_HAVE_TLS
10692#define TARGET_HAVE_TLS HAVE_AS_TLS
10693
10694#undef  TARGET_INIT_BUILTINS
10695#define TARGET_INIT_BUILTINS alpha_init_builtins
10696#undef  TARGET_EXPAND_BUILTIN
10697#define TARGET_EXPAND_BUILTIN alpha_expand_builtin
10698#undef  TARGET_FOLD_BUILTIN
10699#define TARGET_FOLD_BUILTIN alpha_fold_builtin
10700
10701#undef TARGET_FUNCTION_OK_FOR_SIBCALL
10702#define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
10703#undef TARGET_CANNOT_COPY_INSN_P
10704#define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
10705#undef TARGET_CANNOT_FORCE_CONST_MEM
10706#define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
10707
10708#if TARGET_ABI_OSF
10709#undef TARGET_ASM_OUTPUT_MI_THUNK
10710#define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
10711#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10712#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
10713#undef TARGET_STDARG_OPTIMIZE_HOOK
10714#define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
10715#endif
10716
10717#undef TARGET_RTX_COSTS
10718#define TARGET_RTX_COSTS alpha_rtx_costs
10719#undef TARGET_ADDRESS_COST
10720#define TARGET_ADDRESS_COST hook_int_rtx_0
10721
10722#undef TARGET_MACHINE_DEPENDENT_REORG
10723#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
10724
10725#undef TARGET_PROMOTE_FUNCTION_ARGS
10726#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
10727#undef TARGET_PROMOTE_FUNCTION_RETURN
10728#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
10729#undef TARGET_PROMOTE_PROTOTYPES
10730#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_false
10731#undef TARGET_RETURN_IN_MEMORY
10732#define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10733#undef TARGET_PASS_BY_REFERENCE
10734#define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10735#undef TARGET_SETUP_INCOMING_VARARGS
10736#define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10737#undef TARGET_STRICT_ARGUMENT_NAMING
10738#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10739#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10740#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10741#undef TARGET_SPLIT_COMPLEX_ARG
10742#define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10743#undef TARGET_GIMPLIFY_VA_ARG_EXPR
10744#define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10745#undef TARGET_ARG_PARTIAL_BYTES
10746#define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10747
10748#undef TARGET_SCALAR_MODE_SUPPORTED_P
10749#define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10750#undef TARGET_VECTOR_MODE_SUPPORTED_P
10751#define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10752
10753#undef TARGET_BUILD_BUILTIN_VA_LIST
10754#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10755
10756/* The Alpha architecture does not require sequential consistency.  See
10757   http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
10758   for an example of how it can be violated in practice.  */
10759#undef TARGET_RELAXED_ORDERING
10760#define TARGET_RELAXED_ORDERING true
10761
10762#undef TARGET_DEFAULT_TARGET_FLAGS
10763#define TARGET_DEFAULT_TARGET_FLAGS \
10764  (TARGET_DEFAULT | TARGET_CPU_DEFAULT | TARGET_DEFAULT_EXPLICIT_RELOCS)
10765#undef TARGET_HANDLE_OPTION
10766#define TARGET_HANDLE_OPTION alpha_handle_option
10767
10768#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10769#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
10770#define TARGET_MANGLE_FUNDAMENTAL_TYPE alpha_mangle_fundamental_type
10771#endif
10772
10773struct gcc_target targetm = TARGET_INITIALIZER;
10774
10775
10776#include "gt-alpha.h"
10777