1/* Subroutines for insn-output.c for HPPA.
2   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4   Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GCC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING.  If not, write to
20the Free Software Foundation, 51 Franklin Street, Fifth Floor,
21Boston, MA 02110-1301, USA.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "rtl.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-attr.h"
34#include "flags.h"
35#include "tree.h"
36#include "output.h"
37#include "except.h"
38#include "expr.h"
39#include "optabs.h"
40#include "reload.h"
41#include "integrate.h"
42#include "function.h"
43#include "toplev.h"
44#include "ggc.h"
45#include "recog.h"
46#include "predict.h"
47#include "tm_p.h"
48#include "target.h"
49#include "target-def.h"
50
51/* Return nonzero if there is a bypass for the output of
52   OUT_INSN and the fp store IN_INSN.  */
53int
54hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
55{
56  enum machine_mode store_mode;
57  enum machine_mode other_mode;
58  rtx set;
59
60  if (recog_memoized (in_insn) < 0
61      || get_attr_type (in_insn) != TYPE_FPSTORE
62      || recog_memoized (out_insn) < 0)
63    return 0;
64
65  store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
66
67  set = single_set (out_insn);
68  if (!set)
69    return 0;
70
71  other_mode = GET_MODE (SET_SRC (set));
72
73  return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
74}
75
76
77#ifndef DO_FRAME_NOTES
78#ifdef INCOMING_RETURN_ADDR_RTX
79#define DO_FRAME_NOTES 1
80#else
81#define DO_FRAME_NOTES 0
82#endif
83#endif
84
85static void copy_reg_pointer (rtx, rtx);
86static void fix_range (const char *);
87static bool pa_handle_option (size_t, const char *, int);
88static int hppa_address_cost (rtx);
89static bool hppa_rtx_costs (rtx, int, int, int *);
90static inline rtx force_mode (enum machine_mode, rtx);
91static void pa_reorg (void);
92static void pa_combine_instructions (void);
93static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
94static int forward_branch_p (rtx);
95static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
96static int compute_movmem_length (rtx);
97static int compute_clrmem_length (rtx);
98static bool pa_assemble_integer (rtx, unsigned int, int);
99static void remove_useless_addtr_insns (int);
100static void store_reg (int, HOST_WIDE_INT, int);
101static void store_reg_modify (int, int, HOST_WIDE_INT);
102static void load_reg (int, HOST_WIDE_INT, int);
103static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
104static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
105static void update_total_code_bytes (int);
106static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
107static int pa_adjust_cost (rtx, rtx, rtx, int);
108static int pa_adjust_priority (rtx, int);
109static int pa_issue_rate (void);
110static void pa_select_section (tree, int, unsigned HOST_WIDE_INT)
111     ATTRIBUTE_UNUSED;
112static void pa_encode_section_info (tree, rtx, int);
113static const char *pa_strip_name_encoding (const char *);
114static bool pa_function_ok_for_sibcall (tree, tree);
115static void pa_globalize_label (FILE *, const char *)
116     ATTRIBUTE_UNUSED;
117static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
118				    HOST_WIDE_INT, tree);
119#if !defined(USE_COLLECT2)
120static void pa_asm_out_constructor (rtx, int);
121static void pa_asm_out_destructor (rtx, int);
122#endif
123static void pa_init_builtins (void);
124static rtx hppa_builtin_saveregs (void);
125static tree hppa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
126static bool pa_scalar_mode_supported_p (enum machine_mode);
127static bool pa_commutative_p (rtx x, int outer_code);
128static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
129static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
130static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
131static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
132static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
133static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
134static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
135static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
136static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
137static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
138static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
139static void output_deferred_plabels (void);
140static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
141#ifdef ASM_OUTPUT_EXTERNAL_REAL
142static void pa_hpux_file_end (void);
143#endif
144#ifdef HPUX_LONG_DOUBLE_LIBRARY
145static void pa_hpux_init_libfuncs (void);
146#endif
147static rtx pa_struct_value_rtx (tree, int);
148static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
149				  tree, bool);
150static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
151				 tree, bool);
152static struct machine_function * pa_init_machine_status (void);
153
154
155/* Save the operands last given to a compare for use when we
156   generate a scc or bcc insn.  */
157rtx hppa_compare_op0, hppa_compare_op1;
158enum cmp_type hppa_branch_type;
159
160/* Which cpu we are scheduling for.  */
161enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
162
163/* The UNIX standard to use for predefines and linking.  */
164int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
165
166/* Counts for the number of callee-saved general and floating point
167   registers which were saved by the current function's prologue.  */
168static int gr_saved, fr_saved;
169
170static rtx find_addr_reg (rtx);
171
172/* Keep track of the number of bytes we have output in the CODE subspace
173   during this compilation so we'll know when to emit inline long-calls.  */
174unsigned long total_code_bytes;
175
176/* The last address of the previous function plus the number of bytes in
177   associated thunks that have been output.  This is used to determine if
178   a thunk can use an IA-relative branch to reach its target function.  */
179static int last_address;
180
181/* Variables to handle plabels that we discover are necessary at assembly
182   output time.  They are output after the current function.  */
183struct deferred_plabel GTY(())
184{
185  rtx internal_label;
186  rtx symbol;
187};
188static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
189  deferred_plabels;
190static size_t n_deferred_plabels = 0;
191
192
193/* Initialize the GCC target structure.  */
194
195#undef TARGET_ASM_ALIGNED_HI_OP
196#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
197#undef TARGET_ASM_ALIGNED_SI_OP
198#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
199#undef TARGET_ASM_ALIGNED_DI_OP
200#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
201#undef TARGET_ASM_UNALIGNED_HI_OP
202#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
203#undef TARGET_ASM_UNALIGNED_SI_OP
204#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
205#undef TARGET_ASM_UNALIGNED_DI_OP
206#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
207#undef TARGET_ASM_INTEGER
208#define TARGET_ASM_INTEGER pa_assemble_integer
209
210#undef TARGET_ASM_FUNCTION_PROLOGUE
211#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
212#undef TARGET_ASM_FUNCTION_EPILOGUE
213#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
214
215#undef TARGET_SCHED_ADJUST_COST
216#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
217#undef TARGET_SCHED_ADJUST_PRIORITY
218#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
219#undef TARGET_SCHED_ISSUE_RATE
220#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
221
222#undef TARGET_ENCODE_SECTION_INFO
223#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
224#undef TARGET_STRIP_NAME_ENCODING
225#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
226
227#undef TARGET_FUNCTION_OK_FOR_SIBCALL
228#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
229
230#undef TARGET_COMMUTATIVE_P
231#define TARGET_COMMUTATIVE_P pa_commutative_p
232
233#undef TARGET_ASM_OUTPUT_MI_THUNK
234#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
235#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
236#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
237
238#undef TARGET_ASM_FILE_END
239#ifdef ASM_OUTPUT_EXTERNAL_REAL
240#define TARGET_ASM_FILE_END pa_hpux_file_end
241#else
242#define TARGET_ASM_FILE_END output_deferred_plabels
243#endif
244
245#if !defined(USE_COLLECT2)
246#undef TARGET_ASM_CONSTRUCTOR
247#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
248#undef TARGET_ASM_DESTRUCTOR
249#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
250#endif
251
252#undef TARGET_DEFAULT_TARGET_FLAGS
253#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
254#undef TARGET_HANDLE_OPTION
255#define TARGET_HANDLE_OPTION pa_handle_option
256
257#undef TARGET_INIT_BUILTINS
258#define TARGET_INIT_BUILTINS pa_init_builtins
259
260#undef TARGET_RTX_COSTS
261#define TARGET_RTX_COSTS hppa_rtx_costs
262#undef TARGET_ADDRESS_COST
263#define TARGET_ADDRESS_COST hppa_address_cost
264
265#undef TARGET_MACHINE_DEPENDENT_REORG
266#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
267
268#ifdef HPUX_LONG_DOUBLE_LIBRARY
269#undef TARGET_INIT_LIBFUNCS
270#define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
271#endif
272
273#undef TARGET_PROMOTE_FUNCTION_RETURN
274#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
275#undef TARGET_PROMOTE_PROTOTYPES
276#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
277
278#undef TARGET_STRUCT_VALUE_RTX
279#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
280#undef TARGET_RETURN_IN_MEMORY
281#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
282#undef TARGET_MUST_PASS_IN_STACK
283#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
284#undef TARGET_PASS_BY_REFERENCE
285#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
286#undef TARGET_CALLEE_COPIES
287#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
288#undef TARGET_ARG_PARTIAL_BYTES
289#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
290
291#undef TARGET_EXPAND_BUILTIN_SAVEREGS
292#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
293#undef TARGET_GIMPLIFY_VA_ARG_EXPR
294#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
295
296#undef TARGET_SCALAR_MODE_SUPPORTED_P
297#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
298
299#undef TARGET_CANNOT_FORCE_CONST_MEM
300#define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
301
302struct gcc_target targetm = TARGET_INITIALIZER;
303
304/* Parse the -mfixed-range= option string.  */
305
306static void
307fix_range (const char *const_str)
308{
309  int i, first, last;
310  char *str, *dash, *comma;
311
312  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
313     REG2 are either register names or register numbers.  The effect
314     of this option is to mark the registers in the range from REG1 to
315     REG2 as ``fixed'' so they won't be used by the compiler.  This is
316     used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
317
318  i = strlen (const_str);
319  str = (char *) alloca (i + 1);
320  memcpy (str, const_str, i + 1);
321
322  while (1)
323    {
324      dash = strchr (str, '-');
325      if (!dash)
326	{
327	  warning (0, "value of -mfixed-range must have form REG1-REG2");
328	  return;
329	}
330      *dash = '\0';
331
332      comma = strchr (dash + 1, ',');
333      if (comma)
334	*comma = '\0';
335
336      first = decode_reg_name (str);
337      if (first < 0)
338	{
339	  warning (0, "unknown register name: %s", str);
340	  return;
341	}
342
343      last = decode_reg_name (dash + 1);
344      if (last < 0)
345	{
346	  warning (0, "unknown register name: %s", dash + 1);
347	  return;
348	}
349
350      *dash = '-';
351
352      if (first > last)
353	{
354	  warning (0, "%s-%s is an empty range", str, dash + 1);
355	  return;
356	}
357
358      for (i = first; i <= last; ++i)
359	fixed_regs[i] = call_used_regs[i] = 1;
360
361      if (!comma)
362	break;
363
364      *comma = ',';
365      str = comma + 1;
366    }
367
368  /* Check if all floating point registers have been fixed.  */
369  for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
370    if (!fixed_regs[i])
371      break;
372
373  if (i > FP_REG_LAST)
374    target_flags |= MASK_DISABLE_FPREGS;
375}
376
377/* Implement TARGET_HANDLE_OPTION.  */
378
379static bool
380pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
381{
382  switch (code)
383    {
384    case OPT_mnosnake:
385    case OPT_mpa_risc_1_0:
386    case OPT_march_1_0:
387      target_flags &= ~(MASK_PA_11 | MASK_PA_20);
388      return true;
389
390    case OPT_msnake:
391    case OPT_mpa_risc_1_1:
392    case OPT_march_1_1:
393      target_flags &= ~MASK_PA_20;
394      target_flags |= MASK_PA_11;
395      return true;
396
397    case OPT_mpa_risc_2_0:
398    case OPT_march_2_0:
399      target_flags |= MASK_PA_11 | MASK_PA_20;
400      return true;
401
402    case OPT_mschedule_:
403      if (strcmp (arg, "8000") == 0)
404	pa_cpu = PROCESSOR_8000;
405      else if (strcmp (arg, "7100") == 0)
406	pa_cpu = PROCESSOR_7100;
407      else if (strcmp (arg, "700") == 0)
408	pa_cpu = PROCESSOR_700;
409      else if (strcmp (arg, "7100LC") == 0)
410	pa_cpu = PROCESSOR_7100LC;
411      else if (strcmp (arg, "7200") == 0)
412	pa_cpu = PROCESSOR_7200;
413      else if (strcmp (arg, "7300") == 0)
414	pa_cpu = PROCESSOR_7300;
415      else
416	return false;
417      return true;
418
419    case OPT_mfixed_range_:
420      fix_range (arg);
421      return true;
422
423#if TARGET_HPUX
424    case OPT_munix_93:
425      flag_pa_unix = 1993;
426      return true;
427#endif
428
429#if TARGET_HPUX_10_10
430    case OPT_munix_95:
431      flag_pa_unix = 1995;
432      return true;
433#endif
434
435#if TARGET_HPUX_11_11
436    case OPT_munix_98:
437      flag_pa_unix = 1998;
438      return true;
439#endif
440
441    default:
442      return true;
443    }
444}
445
446void
447override_options (void)
448{
449  /* Unconditional branches in the delay slot are not compatible with dwarf2
450     call frame information.  There is no benefit in using this optimization
451     on PA8000 and later processors.  */
452  if (pa_cpu >= PROCESSOR_8000
453      || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
454      || flag_unwind_tables)
455    target_flags &= ~MASK_JUMP_IN_DELAY;
456
457  if (flag_pic && TARGET_PORTABLE_RUNTIME)
458    {
459      warning (0, "PIC code generation is not supported in the portable runtime model");
460    }
461
462  if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
463   {
464      warning (0, "PIC code generation is not compatible with fast indirect calls");
465   }
466
467  if (! TARGET_GAS && write_symbols != NO_DEBUG)
468    {
469      warning (0, "-g is only supported when using GAS on this processor,");
470      warning (0, "-g option disabled");
471      write_symbols = NO_DEBUG;
472    }
473
474  /* We only support the "big PIC" model now.  And we always generate PIC
475     code when in 64bit mode.  */
476  if (flag_pic == 1 || TARGET_64BIT)
477    flag_pic = 2;
478
479  /* We can't guarantee that .dword is available for 32-bit targets.  */
480  if (UNITS_PER_WORD == 4)
481    targetm.asm_out.aligned_op.di = NULL;
482
483  /* The unaligned ops are only available when using GAS.  */
484  if (!TARGET_GAS)
485    {
486      targetm.asm_out.unaligned_op.hi = NULL;
487      targetm.asm_out.unaligned_op.si = NULL;
488      targetm.asm_out.unaligned_op.di = NULL;
489    }
490
491  init_machine_status = pa_init_machine_status;
492}
493
494static void
495pa_init_builtins (void)
496{
497#ifdef DONT_HAVE_FPUTC_UNLOCKED
498  built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
499  implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
500#endif
501}
502
503/* Function to init struct machine_function.
504   This will be called, via a pointer variable,
505   from push_function_context.  */
506
507static struct machine_function *
508pa_init_machine_status (void)
509{
510  return ggc_alloc_cleared (sizeof (machine_function));
511}
512
513/* If FROM is a probable pointer register, mark TO as a probable
514   pointer register with the same pointer alignment as FROM.  */
515
516static void
517copy_reg_pointer (rtx to, rtx from)
518{
519  if (REG_POINTER (from))
520    mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
521}
522
523/* Return 1 if X contains a symbolic expression.  We know these
524   expressions will have one of a few well defined forms, so
525   we need only check those forms.  */
526int
527symbolic_expression_p (rtx x)
528{
529
530  /* Strip off any HIGH.  */
531  if (GET_CODE (x) == HIGH)
532    x = XEXP (x, 0);
533
534  return (symbolic_operand (x, VOIDmode));
535}
536
537/* Accept any constant that can be moved in one instruction into a
538   general register.  */
539int
540cint_ok_for_move (HOST_WIDE_INT intval)
541{
542  /* OK if ldo, ldil, or zdepi, can be used.  */
543  return (CONST_OK_FOR_LETTER_P (intval, 'J')
544	  || CONST_OK_FOR_LETTER_P (intval, 'N')
545	  || CONST_OK_FOR_LETTER_P (intval, 'K'));
546}
547
548/* Return truth value of whether OP can be used as an operand in a
549   adddi3 insn.  */
550int
551adddi3_operand (rtx op, enum machine_mode mode)
552{
553  return (register_operand (op, mode)
554	  || (GET_CODE (op) == CONST_INT
555	      && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
556}
557
558/* True iff zdepi can be used to generate this CONST_INT.
559   zdepi first sign extends a 5 bit signed number to a given field
560   length, then places this field anywhere in a zero.  */
561int
562zdepi_cint_p (unsigned HOST_WIDE_INT x)
563{
564  unsigned HOST_WIDE_INT lsb_mask, t;
565
566  /* This might not be obvious, but it's at least fast.
567     This function is critical; we don't have the time loops would take.  */
568  lsb_mask = x & -x;
569  t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
570  /* Return true iff t is a power of two.  */
571  return ((t & (t - 1)) == 0);
572}
573
574/* True iff depi or extru can be used to compute (reg & mask).
575   Accept bit pattern like these:
576   0....01....1
577   1....10....0
578   1..10..01..1  */
579int
580and_mask_p (unsigned HOST_WIDE_INT mask)
581{
582  mask = ~mask;
583  mask += mask & -mask;
584  return (mask & (mask - 1)) == 0;
585}
586
587/* True iff depi can be used to compute (reg | MASK).  */
588int
589ior_mask_p (unsigned HOST_WIDE_INT mask)
590{
591  mask += mask & -mask;
592  return (mask & (mask - 1)) == 0;
593}
594
595/* Legitimize PIC addresses.  If the address is already
596   position-independent, we return ORIG.  Newly generated
597   position-independent addresses go to REG.  If we need more
598   than one register, we lose.  */
599
600rtx
601legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
602{
603  rtx pic_ref = orig;
604
605  gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
606
607  /* Labels need special handling.  */
608  if (pic_label_operand (orig, mode))
609    {
610      rtx insn;
611
612      /* We do not want to go through the movXX expanders here since that
613	 would create recursion.
614
615	 Nor do we really want to call a generator for a named pattern
616	 since that requires multiple patterns if we want to support
617	 multiple word sizes.
618
619	 So instead we just emit the raw set, which avoids the movXX
620	 expanders completely.  */
621      mark_reg_pointer (reg, BITS_PER_UNIT);
622      insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
623
624      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
625      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
626
627      /* During and after reload, we need to generate a REG_LABEL note and
628	 update LABEL_NUSES because this is not done automatically.  */
629      if (reload_in_progress || reload_completed)
630	{
631	  /* Extract LABEL_REF.  */
632	  if (GET_CODE (orig) == CONST)
633	    orig = XEXP (XEXP (orig, 0), 0);
634	  /* Extract CODE_LABEL.  */
635	  orig = XEXP (orig, 0);
636	  REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, orig,
637						REG_NOTES (insn));
638	  LABEL_NUSES (orig)++;
639	}
640      current_function_uses_pic_offset_table = 1;
641      return reg;
642    }
643  if (GET_CODE (orig) == SYMBOL_REF)
644    {
645      rtx insn, tmp_reg;
646
647      gcc_assert (reg);
648
649      /* Before reload, allocate a temporary register for the intermediate
650	 result.  This allows the sequence to be deleted when the final
651	 result is unused and the insns are trivially dead.  */
652      tmp_reg = ((reload_in_progress || reload_completed)
653		 ? reg : gen_reg_rtx (Pmode));
654
655      emit_move_insn (tmp_reg,
656		      gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
657				    gen_rtx_HIGH (word_mode, orig)));
658      pic_ref
659	= gen_const_mem (Pmode,
660		         gen_rtx_LO_SUM (Pmode, tmp_reg,
661				         gen_rtx_UNSPEC (Pmode,
662						         gen_rtvec (1, orig),
663						         UNSPEC_DLTIND14R)));
664
665      current_function_uses_pic_offset_table = 1;
666      mark_reg_pointer (reg, BITS_PER_UNIT);
667      insn = emit_move_insn (reg, pic_ref);
668
669      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
670      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
671
672      return reg;
673    }
674  else if (GET_CODE (orig) == CONST)
675    {
676      rtx base;
677
678      if (GET_CODE (XEXP (orig, 0)) == PLUS
679	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
680	return orig;
681
682      gcc_assert (reg);
683      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
684
685      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
686      orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
687				     base == reg ? 0 : reg);
688
689      if (GET_CODE (orig) == CONST_INT)
690	{
691	  if (INT_14_BITS (orig))
692	    return plus_constant (base, INTVAL (orig));
693	  orig = force_reg (Pmode, orig);
694	}
695      pic_ref = gen_rtx_PLUS (Pmode, base, orig);
696      /* Likewise, should we set special REG_NOTEs here?  */
697    }
698
699  return pic_ref;
700}
701
702static GTY(()) rtx gen_tls_tga;
703
704static rtx
705gen_tls_get_addr (void)
706{
707  if (!gen_tls_tga)
708    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
709  return gen_tls_tga;
710}
711
712static rtx
713hppa_tls_call (rtx arg)
714{
715  rtx ret;
716
717  ret = gen_reg_rtx (Pmode);
718  emit_library_call_value (gen_tls_get_addr (), ret,
719		  	   LCT_CONST, Pmode, 1, arg, Pmode);
720
721  return ret;
722}
723
724static rtx
725legitimize_tls_address (rtx addr)
726{
727  rtx ret, insn, tmp, t1, t2, tp;
728  enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
729
730  switch (model)
731    {
732      case TLS_MODEL_GLOBAL_DYNAMIC:
733	tmp = gen_reg_rtx (Pmode);
734	if (flag_pic)
735	  emit_insn (gen_tgd_load_pic (tmp, addr));
736	else
737	  emit_insn (gen_tgd_load (tmp, addr));
738	ret = hppa_tls_call (tmp);
739	break;
740
741      case TLS_MODEL_LOCAL_DYNAMIC:
742	ret = gen_reg_rtx (Pmode);
743	tmp = gen_reg_rtx (Pmode);
744	start_sequence ();
745	if (flag_pic)
746	  emit_insn (gen_tld_load_pic (tmp, addr));
747	else
748	  emit_insn (gen_tld_load (tmp, addr));
749	t1 = hppa_tls_call (tmp);
750	insn = get_insns ();
751	end_sequence ();
752	t2 = gen_reg_rtx (Pmode);
753	emit_libcall_block (insn, t2, t1,
754			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
755				            UNSPEC_TLSLDBASE));
756	emit_insn (gen_tld_offset_load (ret, addr, t2));
757	break;
758
759      case TLS_MODEL_INITIAL_EXEC:
760	tp = gen_reg_rtx (Pmode);
761	tmp = gen_reg_rtx (Pmode);
762	ret = gen_reg_rtx (Pmode);
763	emit_insn (gen_tp_load (tp));
764	if (flag_pic)
765	  emit_insn (gen_tie_load_pic (tmp, addr));
766	else
767	  emit_insn (gen_tie_load (tmp, addr));
768	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
769	break;
770
771      case TLS_MODEL_LOCAL_EXEC:
772	tp = gen_reg_rtx (Pmode);
773	ret = gen_reg_rtx (Pmode);
774	emit_insn (gen_tp_load (tp));
775	emit_insn (gen_tle_load (ret, addr, tp));
776	break;
777
778      default:
779	gcc_unreachable ();
780    }
781
782  return ret;
783}
784
785/* Try machine-dependent ways of modifying an illegitimate address
786   to be legitimate.  If we find one, return the new, valid address.
787   This macro is used in only one place: `memory_address' in explow.c.
788
789   OLDX is the address as it was before break_out_memory_refs was called.
790   In some cases it is useful to look at this to decide what needs to be done.
791
792   MODE and WIN are passed so that this macro can use
793   GO_IF_LEGITIMATE_ADDRESS.
794
795   It is always safe for this macro to do nothing.  It exists to recognize
796   opportunities to optimize the output.
797
798   For the PA, transform:
799
800	memory(X + <large int>)
801
802   into:
803
804	if (<large int> & mask) >= 16
805	  Y = (<large int> & ~mask) + mask + 1	Round up.
806	else
807	  Y = (<large int> & ~mask)		Round down.
808	Z = X + Y
809	memory (Z + (<large int> - Y));
810
811   This is for CSE to find several similar references, and only use one Z.
812
813   X can either be a SYMBOL_REF or REG, but because combine cannot
814   perform a 4->2 combination we do nothing for SYMBOL_REF + D where
815   D will not fit in 14 bits.
816
817   MODE_FLOAT references allow displacements which fit in 5 bits, so use
818   0x1f as the mask.
819
820   MODE_INT references allow displacements which fit in 14 bits, so use
821   0x3fff as the mask.
822
823   This relies on the fact that most mode MODE_FLOAT references will use FP
824   registers and most mode MODE_INT references will use integer registers.
825   (In the rare case of an FP register used in an integer MODE, we depend
826   on secondary reloads to clean things up.)
827
828
829   It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
830   manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
831   addressing modes to be used).
832
833   Put X and Z into registers.  Then put the entire expression into
834   a register.  */
835
836rtx
837hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
838			 enum machine_mode mode)
839{
840  rtx orig = x;
841
842  /* We need to canonicalize the order of operands in unscaled indexed
843     addresses since the code that checks if an address is valid doesn't
844     always try both orders.  */
845  if (!TARGET_NO_SPACE_REGS
846      && GET_CODE (x) == PLUS
847      && GET_MODE (x) == Pmode
848      && REG_P (XEXP (x, 0))
849      && REG_P (XEXP (x, 1))
850      && REG_POINTER (XEXP (x, 0))
851      && !REG_POINTER (XEXP (x, 1)))
852    return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
853
854  if (PA_SYMBOL_REF_TLS_P (x))
855    return legitimize_tls_address (x);
856  else if (flag_pic)
857    return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
858
859  /* Strip off CONST.  */
860  if (GET_CODE (x) == CONST)
861    x = XEXP (x, 0);
862
863  /* Special case.  Get the SYMBOL_REF into a register and use indexing.
864     That should always be safe.  */
865  if (GET_CODE (x) == PLUS
866      && GET_CODE (XEXP (x, 0)) == REG
867      && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
868    {
869      rtx reg = force_reg (Pmode, XEXP (x, 1));
870      return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
871    }
872
873  /* Note we must reject symbols which represent function addresses
874     since the assembler/linker can't handle arithmetic on plabels.  */
875  if (GET_CODE (x) == PLUS
876      && GET_CODE (XEXP (x, 1)) == CONST_INT
877      && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
878	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
879	  || GET_CODE (XEXP (x, 0)) == REG))
880    {
881      rtx int_part, ptr_reg;
882      HOST_WIDE_INT newoffset;
883      HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
884      HOST_WIDE_INT mask;
885
886      mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
887	      ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
888
889      /* Choose which way to round the offset.  Round up if we
890	 are >= halfway to the next boundary.  */
891      if ((offset & mask) >= ((mask + 1) / 2))
892	newoffset = (offset & ~ mask) + mask + 1;
893      else
894	newoffset = (offset & ~ mask);
895
896      /* If the newoffset will not fit in 14 bits (ldo), then
897	 handling this would take 4 or 5 instructions (2 to load
898	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
899	 add the new offset and the SYMBOL_REF.)  Combine can
900	 not handle 4->2 or 5->2 combinations, so do not create
901	 them.  */
902      if (! VAL_14_BITS_P (newoffset)
903	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
904	{
905	  rtx const_part = plus_constant (XEXP (x, 0), newoffset);
906	  rtx tmp_reg
907	    = force_reg (Pmode,
908			 gen_rtx_HIGH (Pmode, const_part));
909	  ptr_reg
910	    = force_reg (Pmode,
911			 gen_rtx_LO_SUM (Pmode,
912					 tmp_reg, const_part));
913	}
914      else
915	{
916	  if (! VAL_14_BITS_P (newoffset))
917	    int_part = force_reg (Pmode, GEN_INT (newoffset));
918	  else
919	    int_part = GEN_INT (newoffset);
920
921	  ptr_reg = force_reg (Pmode,
922			       gen_rtx_PLUS (Pmode,
923					     force_reg (Pmode, XEXP (x, 0)),
924					     int_part));
925	}
926      return plus_constant (ptr_reg, offset - newoffset);
927    }
928
929  /* Handle (plus (mult (a) (shadd_constant)) (b)).  */
930
931  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
932      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
933      && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
934      && (OBJECT_P (XEXP (x, 1))
935	  || GET_CODE (XEXP (x, 1)) == SUBREG)
936      && GET_CODE (XEXP (x, 1)) != CONST)
937    {
938      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
939      rtx reg1, reg2;
940
941      reg1 = XEXP (x, 1);
942      if (GET_CODE (reg1) != REG)
943	reg1 = force_reg (Pmode, force_operand (reg1, 0));
944
945      reg2 = XEXP (XEXP (x, 0), 0);
946      if (GET_CODE (reg2) != REG)
947        reg2 = force_reg (Pmode, force_operand (reg2, 0));
948
949      return force_reg (Pmode, gen_rtx_PLUS (Pmode,
950					     gen_rtx_MULT (Pmode,
951							   reg2,
952							   GEN_INT (val)),
953					     reg1));
954    }
955
956  /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
957
958     Only do so for floating point modes since this is more speculative
959     and we lose if it's an integer store.  */
960  if (GET_CODE (x) == PLUS
961      && GET_CODE (XEXP (x, 0)) == PLUS
962      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
963      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
964      && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
965      && (mode == SFmode || mode == DFmode))
966    {
967
968      /* First, try and figure out what to use as a base register.  */
969      rtx reg1, reg2, base, idx, orig_base;
970
971      reg1 = XEXP (XEXP (x, 0), 1);
972      reg2 = XEXP (x, 1);
973      base = NULL_RTX;
974      idx = NULL_RTX;
975
976      /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
977	 then emit_move_sequence will turn on REG_POINTER so we'll know
978	 it's a base register below.  */
979      if (GET_CODE (reg1) != REG)
980	reg1 = force_reg (Pmode, force_operand (reg1, 0));
981
982      if (GET_CODE (reg2) != REG)
983	reg2 = force_reg (Pmode, force_operand (reg2, 0));
984
985      /* Figure out what the base and index are.  */
986
987      if (GET_CODE (reg1) == REG
988	  && REG_POINTER (reg1))
989	{
990	  base = reg1;
991	  orig_base = XEXP (XEXP (x, 0), 1);
992	  idx = gen_rtx_PLUS (Pmode,
993			      gen_rtx_MULT (Pmode,
994					    XEXP (XEXP (XEXP (x, 0), 0), 0),
995					    XEXP (XEXP (XEXP (x, 0), 0), 1)),
996			      XEXP (x, 1));
997	}
998      else if (GET_CODE (reg2) == REG
999	       && REG_POINTER (reg2))
1000	{
1001	  base = reg2;
1002	  orig_base = XEXP (x, 1);
1003	  idx = XEXP (x, 0);
1004	}
1005
1006      if (base == 0)
1007	return orig;
1008
1009      /* If the index adds a large constant, try to scale the
1010	 constant so that it can be loaded with only one insn.  */
1011      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1012	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1013			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1014	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1015	{
1016	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1017	  HOST_WIDE_INT val = INTVAL (XEXP (idx, 1));
1018
1019	  val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1020	  reg1 = XEXP (XEXP (idx, 0), 0);
1021	  if (GET_CODE (reg1) != REG)
1022	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1023
1024	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1025
1026	  /* We can now generate a simple scaled indexed address.  */
1027	  return
1028	    force_reg
1029	      (Pmode, gen_rtx_PLUS (Pmode,
1030				    gen_rtx_MULT (Pmode, reg1,
1031						  XEXP (XEXP (idx, 0), 1)),
1032				    base));
1033	}
1034
1035      /* If B + C is still a valid base register, then add them.  */
1036      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1037	  && INTVAL (XEXP (idx, 1)) <= 4096
1038	  && INTVAL (XEXP (idx, 1)) >= -4096)
1039	{
1040	  HOST_WIDE_INT val = INTVAL (XEXP (XEXP (idx, 0), 1));
1041	  rtx reg1, reg2;
1042
1043	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1044
1045	  reg2 = XEXP (XEXP (idx, 0), 0);
1046	  if (GET_CODE (reg2) != CONST_INT)
1047	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1048
1049	  return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1050						 gen_rtx_MULT (Pmode,
1051							       reg2,
1052							       GEN_INT (val)),
1053						 reg1));
1054	}
1055
1056      /* Get the index into a register, then add the base + index and
1057	 return a register holding the result.  */
1058
1059      /* First get A into a register.  */
1060      reg1 = XEXP (XEXP (idx, 0), 0);
1061      if (GET_CODE (reg1) != REG)
1062	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1063
1064      /* And get B into a register.  */
1065      reg2 = XEXP (idx, 1);
1066      if (GET_CODE (reg2) != REG)
1067	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1068
1069      reg1 = force_reg (Pmode,
1070			gen_rtx_PLUS (Pmode,
1071				      gen_rtx_MULT (Pmode, reg1,
1072						    XEXP (XEXP (idx, 0), 1)),
1073				      reg2));
1074
1075      /* Add the result to our base register and return.  */
1076      return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1077
1078    }
1079
1080  /* Uh-oh.  We might have an address for x[n-100000].  This needs
1081     special handling to avoid creating an indexed memory address
1082     with x-100000 as the base.
1083
1084     If the constant part is small enough, then it's still safe because
1085     there is a guard page at the beginning and end of the data segment.
1086
1087     Scaled references are common enough that we want to try and rearrange the
1088     terms so that we can use indexing for these addresses too.  Only
1089     do the optimization for floatint point modes.  */
1090
1091  if (GET_CODE (x) == PLUS
1092      && symbolic_expression_p (XEXP (x, 1)))
1093    {
1094      /* Ugly.  We modify things here so that the address offset specified
1095	 by the index expression is computed first, then added to x to form
1096	 the entire address.  */
1097
1098      rtx regx1, regx2, regy1, regy2, y;
1099
1100      /* Strip off any CONST.  */
1101      y = XEXP (x, 1);
1102      if (GET_CODE (y) == CONST)
1103	y = XEXP (y, 0);
1104
1105      if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1106	{
1107	  /* See if this looks like
1108		(plus (mult (reg) (shadd_const))
1109		      (const (plus (symbol_ref) (const_int))))
1110
1111	     Where const_int is small.  In that case the const
1112	     expression is a valid pointer for indexing.
1113
1114	     If const_int is big, but can be divided evenly by shadd_const
1115	     and added to (reg).  This allows more scaled indexed addresses.  */
1116	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1117	      && GET_CODE (XEXP (x, 0)) == MULT
1118	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1119	      && INTVAL (XEXP (y, 1)) >= -4096
1120	      && INTVAL (XEXP (y, 1)) <= 4095
1121	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1122	      && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1123	    {
1124	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
1125	      rtx reg1, reg2;
1126
1127	      reg1 = XEXP (x, 1);
1128	      if (GET_CODE (reg1) != REG)
1129		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1130
1131	      reg2 = XEXP (XEXP (x, 0), 0);
1132	      if (GET_CODE (reg2) != REG)
1133	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1134
1135	      return force_reg (Pmode,
1136				gen_rtx_PLUS (Pmode,
1137					      gen_rtx_MULT (Pmode,
1138							    reg2,
1139							    GEN_INT (val)),
1140					      reg1));
1141	    }
1142	  else if ((mode == DFmode || mode == SFmode)
1143		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1144		   && GET_CODE (XEXP (x, 0)) == MULT
1145		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1146		   && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1147		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1148		   && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1149	    {
1150	      regx1
1151		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1152					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1153	      regx2 = XEXP (XEXP (x, 0), 0);
1154	      if (GET_CODE (regx2) != REG)
1155		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1156	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1157							regx2, regx1));
1158	      return
1159		force_reg (Pmode,
1160			   gen_rtx_PLUS (Pmode,
1161					 gen_rtx_MULT (Pmode, regx2,
1162						       XEXP (XEXP (x, 0), 1)),
1163					 force_reg (Pmode, XEXP (y, 0))));
1164	    }
1165	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1166		   && INTVAL (XEXP (y, 1)) >= -4096
1167		   && INTVAL (XEXP (y, 1)) <= 4095)
1168	    {
1169	      /* This is safe because of the guard page at the
1170		 beginning and end of the data space.  Just
1171		 return the original address.  */
1172	      return orig;
1173	    }
1174	  else
1175	    {
1176	      /* Doesn't look like one we can optimize.  */
1177	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1178	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1179	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1180	      regx1 = force_reg (Pmode,
1181				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1182						 regx1, regy2));
1183	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1184	    }
1185	}
1186    }
1187
1188  return orig;
1189}
1190
1191/* For the HPPA, REG and REG+CONST is cost 0
1192   and addresses involving symbolic constants are cost 2.
1193
1194   PIC addresses are very expensive.
1195
1196   It is no coincidence that this has the same structure
1197   as GO_IF_LEGITIMATE_ADDRESS.  */
1198
1199static int
1200hppa_address_cost (rtx X)
1201{
1202  switch (GET_CODE (X))
1203    {
1204    case REG:
1205    case PLUS:
1206    case LO_SUM:
1207      return 1;
1208    case HIGH:
1209      return 2;
1210    default:
1211      return 4;
1212    }
1213}
1214
1215/* Compute a (partial) cost for rtx X.  Return true if the complete
1216   cost has been computed, and false if subexpressions should be
1217   scanned.  In either case, *TOTAL contains the cost result.  */
1218
1219static bool
1220hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1221{
1222  switch (code)
1223    {
1224    case CONST_INT:
1225      if (INTVAL (x) == 0)
1226	*total = 0;
1227      else if (INT_14_BITS (x))
1228	*total = 1;
1229      else
1230	*total = 2;
1231      return true;
1232
1233    case HIGH:
1234      *total = 2;
1235      return true;
1236
1237    case CONST:
1238    case LABEL_REF:
1239    case SYMBOL_REF:
1240      *total = 4;
1241      return true;
1242
1243    case CONST_DOUBLE:
1244      if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1245	  && outer_code != SET)
1246	*total = 0;
1247      else
1248        *total = 8;
1249      return true;
1250
1251    case MULT:
1252      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1253        *total = COSTS_N_INSNS (3);
1254      else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1255	*total = COSTS_N_INSNS (8);
1256      else
1257	*total = COSTS_N_INSNS (20);
1258      return true;
1259
1260    case DIV:
1261      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1262	{
1263	  *total = COSTS_N_INSNS (14);
1264	  return true;
1265	}
1266      /* FALLTHRU */
1267
1268    case UDIV:
1269    case MOD:
1270    case UMOD:
1271      *total = COSTS_N_INSNS (60);
1272      return true;
1273
1274    case PLUS: /* this includes shNadd insns */
1275    case MINUS:
1276      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1277	*total = COSTS_N_INSNS (3);
1278      else
1279        *total = COSTS_N_INSNS (1);
1280      return true;
1281
1282    case ASHIFT:
1283    case ASHIFTRT:
1284    case LSHIFTRT:
1285      *total = COSTS_N_INSNS (1);
1286      return true;
1287
1288    default:
1289      return false;
1290    }
1291}
1292
1293/* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1294   new rtx with the correct mode.  */
1295static inline rtx
1296force_mode (enum machine_mode mode, rtx orig)
1297{
1298  if (mode == GET_MODE (orig))
1299    return orig;
1300
1301  gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1302
1303  return gen_rtx_REG (mode, REGNO (orig));
1304}
1305
1306/* Return 1 if *X is a thread-local symbol.  */
1307
1308static int
1309pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1310{
1311  return PA_SYMBOL_REF_TLS_P (*x);
1312}
1313
1314/* Return 1 if X contains a thread-local symbol.  */
1315
1316bool
1317pa_tls_referenced_p (rtx x)
1318{
1319  if (!TARGET_HAVE_TLS)
1320    return false;
1321
1322  return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1323}
1324
1325/* Emit insns to move operands[1] into operands[0].
1326
1327   Return 1 if we have written out everything that needs to be done to
1328   do the move.  Otherwise, return 0 and the caller will emit the move
1329   normally.
1330
1331   Note SCRATCH_REG may not be in the proper mode depending on how it
1332   will be used.  This routine is responsible for creating a new copy
1333   of SCRATCH_REG in the proper mode.  */
1334
1335int
1336emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1337{
1338  register rtx operand0 = operands[0];
1339  register rtx operand1 = operands[1];
1340  register rtx tem;
1341
1342  /* We can only handle indexed addresses in the destination operand
1343     of floating point stores.  Thus, we need to break out indexed
1344     addresses from the destination operand.  */
1345  if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1346    {
1347      /* This is only safe up to the beginning of life analysis.  */
1348      gcc_assert (!no_new_pseudos);
1349
1350      tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1351      operand0 = replace_equiv_address (operand0, tem);
1352    }
1353
1354  /* On targets with non-equivalent space registers, break out unscaled
1355     indexed addresses from the source operand before the final CSE.
1356     We have to do this because the REG_POINTER flag is not correctly
1357     carried through various optimization passes and CSE may substitute
1358     a pseudo without the pointer set for one with the pointer set.  As
1359     a result, we loose various opportunities to create insns with
1360     unscaled indexed addresses.  */
1361  if (!TARGET_NO_SPACE_REGS
1362      && !cse_not_expected
1363      && GET_CODE (operand1) == MEM
1364      && GET_CODE (XEXP (operand1, 0)) == PLUS
1365      && REG_P (XEXP (XEXP (operand1, 0), 0))
1366      && REG_P (XEXP (XEXP (operand1, 0), 1)))
1367    operand1
1368      = replace_equiv_address (operand1,
1369			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1370
1371  if (scratch_reg
1372      && reload_in_progress && GET_CODE (operand0) == REG
1373      && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1374    operand0 = reg_equiv_mem[REGNO (operand0)];
1375  else if (scratch_reg
1376	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1377	   && GET_CODE (SUBREG_REG (operand0)) == REG
1378	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1379    {
1380     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1381	the code which tracks sets/uses for delete_output_reload.  */
1382      rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1383				 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1384				 SUBREG_BYTE (operand0));
1385      operand0 = alter_subreg (&temp);
1386    }
1387
1388  if (scratch_reg
1389      && reload_in_progress && GET_CODE (operand1) == REG
1390      && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1391    operand1 = reg_equiv_mem[REGNO (operand1)];
1392  else if (scratch_reg
1393	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1394	   && GET_CODE (SUBREG_REG (operand1)) == REG
1395	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1396    {
1397     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1398	the code which tracks sets/uses for delete_output_reload.  */
1399      rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1400				 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1401				 SUBREG_BYTE (operand1));
1402      operand1 = alter_subreg (&temp);
1403    }
1404
1405  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1406      && ((tem = find_replacement (&XEXP (operand0, 0)))
1407	  != XEXP (operand0, 0)))
1408    operand0 = replace_equiv_address (operand0, tem);
1409
1410  if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1411      && ((tem = find_replacement (&XEXP (operand1, 0)))
1412	  != XEXP (operand1, 0)))
1413    operand1 = replace_equiv_address (operand1, tem);
1414
1415  /* Handle secondary reloads for loads/stores of FP registers from
1416     REG+D addresses where D does not fit in 5 or 14 bits, including
1417     (subreg (mem (addr))) cases.  */
1418  if (scratch_reg
1419      && fp_reg_operand (operand0, mode)
1420      && ((GET_CODE (operand1) == MEM
1421	   && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1422				 XEXP (operand1, 0)))
1423	  || ((GET_CODE (operand1) == SUBREG
1424	       && GET_CODE (XEXP (operand1, 0)) == MEM
1425	       && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1426				      ? SFmode : DFmode),
1427				     XEXP (XEXP (operand1, 0), 0))))))
1428    {
1429      if (GET_CODE (operand1) == SUBREG)
1430	operand1 = XEXP (operand1, 0);
1431
1432      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1433	 it in WORD_MODE regardless of what mode it was originally given
1434	 to us.  */
1435      scratch_reg = force_mode (word_mode, scratch_reg);
1436
1437      /* D might not fit in 14 bits either; for such cases load D into
1438	 scratch reg.  */
1439      if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1440	{
1441	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1442	  emit_move_insn (scratch_reg,
1443			  gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1444					  Pmode,
1445					  XEXP (XEXP (operand1, 0), 0),
1446					  scratch_reg));
1447	}
1448      else
1449	emit_move_insn (scratch_reg, XEXP (operand1, 0));
1450      emit_insn (gen_rtx_SET (VOIDmode, operand0,
1451			      replace_equiv_address (operand1, scratch_reg)));
1452      return 1;
1453    }
1454  else if (scratch_reg
1455	   && fp_reg_operand (operand1, mode)
1456	   && ((GET_CODE (operand0) == MEM
1457		&& !memory_address_p ((GET_MODE_SIZE (mode) == 4
1458					? SFmode : DFmode),
1459				       XEXP (operand0, 0)))
1460	       || ((GET_CODE (operand0) == SUBREG)
1461		   && GET_CODE (XEXP (operand0, 0)) == MEM
1462		   && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1463					  ? SFmode : DFmode),
1464			   		 XEXP (XEXP (operand0, 0), 0)))))
1465    {
1466      if (GET_CODE (operand0) == SUBREG)
1467	operand0 = XEXP (operand0, 0);
1468
1469      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1470	 it in WORD_MODE regardless of what mode it was originally given
1471	 to us.  */
1472      scratch_reg = force_mode (word_mode, scratch_reg);
1473
1474      /* D might not fit in 14 bits either; for such cases load D into
1475	 scratch reg.  */
1476      if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1477	{
1478	  emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1479	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1480								        0)),
1481						       Pmode,
1482						       XEXP (XEXP (operand0, 0),
1483								   0),
1484						       scratch_reg));
1485	}
1486      else
1487	emit_move_insn (scratch_reg, XEXP (operand0, 0));
1488      emit_insn (gen_rtx_SET (VOIDmode,
1489			      replace_equiv_address (operand0, scratch_reg),
1490			      operand1));
1491      return 1;
1492    }
1493  /* Handle secondary reloads for loads of FP registers from constant
1494     expressions by forcing the constant into memory.
1495
1496     Use scratch_reg to hold the address of the memory location.
1497
1498     The proper fix is to change PREFERRED_RELOAD_CLASS to return
1499     NO_REGS when presented with a const_int and a register class
1500     containing only FP registers.  Doing so unfortunately creates
1501     more problems than it solves.   Fix this for 2.5.  */
1502  else if (scratch_reg
1503	   && CONSTANT_P (operand1)
1504	   && fp_reg_operand (operand0, mode))
1505    {
1506      rtx const_mem, xoperands[2];
1507
1508      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1509	 it in WORD_MODE regardless of what mode it was originally given
1510	 to us.  */
1511      scratch_reg = force_mode (word_mode, scratch_reg);
1512
1513      /* Force the constant into memory and put the address of the
1514	 memory location into scratch_reg.  */
1515      const_mem = force_const_mem (mode, operand1);
1516      xoperands[0] = scratch_reg;
1517      xoperands[1] = XEXP (const_mem, 0);
1518      emit_move_sequence (xoperands, Pmode, 0);
1519
1520      /* Now load the destination register.  */
1521      emit_insn (gen_rtx_SET (mode, operand0,
1522			      replace_equiv_address (const_mem, scratch_reg)));
1523      return 1;
1524    }
1525  /* Handle secondary reloads for SAR.  These occur when trying to load
1526     the SAR from memory, FP register, or with a constant.  */
1527  else if (scratch_reg
1528	   && GET_CODE (operand0) == REG
1529	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1530	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1531	   && (GET_CODE (operand1) == MEM
1532	       || GET_CODE (operand1) == CONST_INT
1533	       || (GET_CODE (operand1) == REG
1534		   && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1535    {
1536      /* D might not fit in 14 bits either; for such cases load D into
1537	 scratch reg.  */
1538      if (GET_CODE (operand1) == MEM
1539	  && !memory_address_p (Pmode, XEXP (operand1, 0)))
1540	{
1541	  /* We are reloading the address into the scratch register, so we
1542	     want to make sure the scratch register is a full register.  */
1543	  scratch_reg = force_mode (word_mode, scratch_reg);
1544
1545	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1546	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1547								        0)),
1548						       Pmode,
1549						       XEXP (XEXP (operand1, 0),
1550						       0),
1551						       scratch_reg));
1552
1553	  /* Now we are going to load the scratch register from memory,
1554	     we want to load it in the same width as the original MEM,
1555	     which must be the same as the width of the ultimate destination,
1556	     OPERAND0.  */
1557	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1558
1559	  emit_move_insn (scratch_reg,
1560			  replace_equiv_address (operand1, scratch_reg));
1561	}
1562      else
1563	{
1564	  /* We want to load the scratch register using the same mode as
1565	     the ultimate destination.  */
1566	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1567
1568	  emit_move_insn (scratch_reg, operand1);
1569	}
1570
1571      /* And emit the insn to set the ultimate destination.  We know that
1572	 the scratch register has the same mode as the destination at this
1573	 point.  */
1574      emit_move_insn (operand0, scratch_reg);
1575      return 1;
1576    }
1577  /* Handle the most common case: storing into a register.  */
1578  else if (register_operand (operand0, mode))
1579    {
1580      if (register_operand (operand1, mode)
1581	  || (GET_CODE (operand1) == CONST_INT
1582	      && cint_ok_for_move (INTVAL (operand1)))
1583	  || (operand1 == CONST0_RTX (mode))
1584	  || (GET_CODE (operand1) == HIGH
1585	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1586	  /* Only `general_operands' can come here, so MEM is ok.  */
1587	  || GET_CODE (operand1) == MEM)
1588	{
1589	  /* Various sets are created during RTL generation which don't
1590	     have the REG_POINTER flag correctly set.  After the CSE pass,
1591	     instruction recognition can fail if we don't consistently
1592	     set this flag when performing register copies.  This should
1593	     also improve the opportunities for creating insns that use
1594	     unscaled indexing.  */
1595	  if (REG_P (operand0) && REG_P (operand1))
1596	    {
1597	      if (REG_POINTER (operand1)
1598		  && !REG_POINTER (operand0)
1599		  && !HARD_REGISTER_P (operand0))
1600		copy_reg_pointer (operand0, operand1);
1601	      else if (REG_POINTER (operand0)
1602		       && !REG_POINTER (operand1)
1603		       && !HARD_REGISTER_P (operand1))
1604		copy_reg_pointer (operand1, operand0);
1605	    }
1606
1607	  /* When MEMs are broken out, the REG_POINTER flag doesn't
1608	     get set.  In some cases, we can set the REG_POINTER flag
1609	     from the declaration for the MEM.  */
1610	  if (REG_P (operand0)
1611	      && GET_CODE (operand1) == MEM
1612	      && !REG_POINTER (operand0))
1613	    {
1614	      tree decl = MEM_EXPR (operand1);
1615
1616	      /* Set the register pointer flag and register alignment
1617		 if the declaration for this memory reference is a
1618		 pointer type.  Fortran indirect argument references
1619		 are ignored.  */
1620	      if (decl
1621		  && !(flag_argument_noalias > 1
1622		       && TREE_CODE (decl) == INDIRECT_REF
1623		       && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1624		{
1625		  tree type;
1626
1627		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1628		     tree operand 1.  */
1629		  if (TREE_CODE (decl) == COMPONENT_REF)
1630		    decl = TREE_OPERAND (decl, 1);
1631
1632		  type = TREE_TYPE (decl);
1633		  if (TREE_CODE (type) == ARRAY_TYPE)
1634		    type = get_inner_array_type (type);
1635
1636		  if (POINTER_TYPE_P (type))
1637		    {
1638		      int align;
1639
1640		      type = TREE_TYPE (type);
1641		      /* Using TYPE_ALIGN_OK is rather conservative as
1642			 only the ada frontend actually sets it.  */
1643		      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1644			       : BITS_PER_UNIT);
1645		      mark_reg_pointer (operand0, align);
1646		    }
1647		}
1648	    }
1649
1650	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1651	  return 1;
1652	}
1653    }
1654  else if (GET_CODE (operand0) == MEM)
1655    {
1656      if (mode == DFmode && operand1 == CONST0_RTX (mode)
1657	  && !(reload_in_progress || reload_completed))
1658	{
1659	  rtx temp = gen_reg_rtx (DFmode);
1660
1661	  emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1662	  emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1663	  return 1;
1664	}
1665      if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1666	{
1667	  /* Run this case quickly.  */
1668	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1669	  return 1;
1670	}
1671      if (! (reload_in_progress || reload_completed))
1672	{
1673	  operands[0] = validize_mem (operand0);
1674	  operands[1] = operand1 = force_reg (mode, operand1);
1675	}
1676    }
1677
1678  /* Simplify the source if we need to.
1679     Note we do have to handle function labels here, even though we do
1680     not consider them legitimate constants.  Loop optimizations can
1681     call the emit_move_xxx with one as a source.  */
1682  if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1683      || function_label_operand (operand1, mode)
1684      || (GET_CODE (operand1) == HIGH
1685	  && symbolic_operand (XEXP (operand1, 0), mode)))
1686    {
1687      int ishighonly = 0;
1688
1689      if (GET_CODE (operand1) == HIGH)
1690	{
1691	  ishighonly = 1;
1692	  operand1 = XEXP (operand1, 0);
1693	}
1694      if (symbolic_operand (operand1, mode))
1695	{
1696	  /* Argh.  The assembler and linker can't handle arithmetic
1697	     involving plabels.
1698
1699	     So we force the plabel into memory, load operand0 from
1700	     the memory location, then add in the constant part.  */
1701	  if ((GET_CODE (operand1) == CONST
1702	       && GET_CODE (XEXP (operand1, 0)) == PLUS
1703	       && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1704	      || function_label_operand (operand1, mode))
1705	    {
1706	      rtx temp, const_part;
1707
1708	      /* Figure out what (if any) scratch register to use.  */
1709	      if (reload_in_progress || reload_completed)
1710		{
1711		  scratch_reg = scratch_reg ? scratch_reg : operand0;
1712		  /* SCRATCH_REG will hold an address and maybe the actual
1713		     data.  We want it in WORD_MODE regardless of what mode it
1714		     was originally given to us.  */
1715		  scratch_reg = force_mode (word_mode, scratch_reg);
1716		}
1717	      else if (flag_pic)
1718		scratch_reg = gen_reg_rtx (Pmode);
1719
1720	      if (GET_CODE (operand1) == CONST)
1721		{
1722		  /* Save away the constant part of the expression.  */
1723		  const_part = XEXP (XEXP (operand1, 0), 1);
1724		  gcc_assert (GET_CODE (const_part) == CONST_INT);
1725
1726		  /* Force the function label into memory.  */
1727		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1728		}
1729	      else
1730		{
1731		  /* No constant part.  */
1732		  const_part = NULL_RTX;
1733
1734		  /* Force the function label into memory.  */
1735		  temp = force_const_mem (mode, operand1);
1736		}
1737
1738
1739	      /* Get the address of the memory location.  PIC-ify it if
1740		 necessary.  */
1741	      temp = XEXP (temp, 0);
1742	      if (flag_pic)
1743		temp = legitimize_pic_address (temp, mode, scratch_reg);
1744
1745	      /* Put the address of the memory location into our destination
1746		 register.  */
1747	      operands[1] = temp;
1748	      emit_move_sequence (operands, mode, scratch_reg);
1749
1750	      /* Now load from the memory location into our destination
1751		 register.  */
1752	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1753	      emit_move_sequence (operands, mode, scratch_reg);
1754
1755	      /* And add back in the constant part.  */
1756	      if (const_part != NULL_RTX)
1757		expand_inc (operand0, const_part);
1758
1759	      return 1;
1760	    }
1761
1762	  if (flag_pic)
1763	    {
1764	      rtx temp;
1765
1766	      if (reload_in_progress || reload_completed)
1767		{
1768		  temp = scratch_reg ? scratch_reg : operand0;
1769		  /* TEMP will hold an address and maybe the actual
1770		     data.  We want it in WORD_MODE regardless of what mode it
1771		     was originally given to us.  */
1772		  temp = force_mode (word_mode, temp);
1773		}
1774	      else
1775		temp = gen_reg_rtx (Pmode);
1776
1777	      /* (const (plus (symbol) (const_int))) must be forced to
1778		 memory during/after reload if the const_int will not fit
1779		 in 14 bits.  */
1780	      if (GET_CODE (operand1) == CONST
1781		       && GET_CODE (XEXP (operand1, 0)) == PLUS
1782		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1783		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1784		       && (reload_completed || reload_in_progress)
1785		       && flag_pic)
1786		{
1787		  rtx const_mem = force_const_mem (mode, operand1);
1788		  operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1789							mode, temp);
1790		  operands[1] = replace_equiv_address (const_mem, operands[1]);
1791		  emit_move_sequence (operands, mode, temp);
1792		}
1793	      else
1794		{
1795		  operands[1] = legitimize_pic_address (operand1, mode, temp);
1796		  if (REG_P (operand0) && REG_P (operands[1]))
1797		    copy_reg_pointer (operand0, operands[1]);
1798		  emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1799		}
1800	    }
1801	  /* On the HPPA, references to data space are supposed to use dp,
1802	     register 27, but showing it in the RTL inhibits various cse
1803	     and loop optimizations.  */
1804	  else
1805	    {
1806	      rtx temp, set;
1807
1808	      if (reload_in_progress || reload_completed)
1809		{
1810		  temp = scratch_reg ? scratch_reg : operand0;
1811		  /* TEMP will hold an address and maybe the actual
1812		     data.  We want it in WORD_MODE regardless of what mode it
1813		     was originally given to us.  */
1814		  temp = force_mode (word_mode, temp);
1815		}
1816	      else
1817		temp = gen_reg_rtx (mode);
1818
1819	      /* Loading a SYMBOL_REF into a register makes that register
1820		 safe to be used as the base in an indexed address.
1821
1822		 Don't mark hard registers though.  That loses.  */
1823	      if (GET_CODE (operand0) == REG
1824		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1825		mark_reg_pointer (operand0, BITS_PER_UNIT);
1826	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1827		mark_reg_pointer (temp, BITS_PER_UNIT);
1828
1829	      if (ishighonly)
1830		set = gen_rtx_SET (mode, operand0, temp);
1831	      else
1832		set = gen_rtx_SET (VOIDmode,
1833				   operand0,
1834				   gen_rtx_LO_SUM (mode, temp, operand1));
1835
1836	      emit_insn (gen_rtx_SET (VOIDmode,
1837				      temp,
1838				      gen_rtx_HIGH (mode, operand1)));
1839	      emit_insn (set);
1840
1841	    }
1842	  return 1;
1843	}
1844      else if (pa_tls_referenced_p (operand1))
1845	{
1846	  rtx tmp = operand1;
1847	  rtx addend = NULL;
1848
1849	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1850	    {
1851	      addend = XEXP (XEXP (tmp, 0), 1);
1852	      tmp = XEXP (XEXP (tmp, 0), 0);
1853	    }
1854
1855	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1856	  tmp = legitimize_tls_address (tmp);
1857	  if (addend)
1858	    {
1859	      tmp = gen_rtx_PLUS (mode, tmp, addend);
1860	      tmp = force_operand (tmp, operands[0]);
1861	    }
1862	  operands[1] = tmp;
1863	}
1864      else if (GET_CODE (operand1) != CONST_INT
1865	       || !cint_ok_for_move (INTVAL (operand1)))
1866	{
1867	  rtx insn, temp;
1868	  rtx op1 = operand1;
1869	  HOST_WIDE_INT value = 0;
1870	  HOST_WIDE_INT insv = 0;
1871	  int insert = 0;
1872
1873	  if (GET_CODE (operand1) == CONST_INT)
1874	    value = INTVAL (operand1);
1875
1876	  if (TARGET_64BIT
1877	      && GET_CODE (operand1) == CONST_INT
1878	      && HOST_BITS_PER_WIDE_INT > 32
1879	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1880	    {
1881	      HOST_WIDE_INT nval;
1882
1883	      /* Extract the low order 32 bits of the value and sign extend.
1884		 If the new value is the same as the original value, we can
1885		 can use the original value as-is.  If the new value is
1886		 different, we use it and insert the most-significant 32-bits
1887		 of the original value into the final result.  */
1888	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1889		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1890	      if (value != nval)
1891		{
1892#if HOST_BITS_PER_WIDE_INT > 32
1893		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1894#endif
1895		  insert = 1;
1896		  value = nval;
1897		  operand1 = GEN_INT (nval);
1898		}
1899	    }
1900
1901	  if (reload_in_progress || reload_completed)
1902	    temp = scratch_reg ? scratch_reg : operand0;
1903	  else
1904	    temp = gen_reg_rtx (mode);
1905
1906	  /* We don't directly split DImode constants on 32-bit targets
1907	     because PLUS uses an 11-bit immediate and the insn sequence
1908	     generated is not as efficient as the one using HIGH/LO_SUM.  */
1909	  if (GET_CODE (operand1) == CONST_INT
1910	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
1911	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1912	      && !insert)
1913	    {
1914	      /* Directly break constant into high and low parts.  This
1915		 provides better optimization opportunities because various
1916		 passes recognize constants split with PLUS but not LO_SUM.
1917		 We use a 14-bit signed low part except when the addition
1918		 of 0x4000 to the high part might change the sign of the
1919		 high part.  */
1920	      HOST_WIDE_INT low = value & 0x3fff;
1921	      HOST_WIDE_INT high = value & ~ 0x3fff;
1922
1923	      if (low >= 0x2000)
1924		{
1925		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1926		    high += 0x2000;
1927		  else
1928		    high += 0x4000;
1929		}
1930
1931	      low = value - high;
1932
1933	      emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1934	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1935	    }
1936	  else
1937	    {
1938	      emit_insn (gen_rtx_SET (VOIDmode, temp,
1939				      gen_rtx_HIGH (mode, operand1)));
1940	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1941	    }
1942
1943	  insn = emit_move_insn (operands[0], operands[1]);
1944
1945	  /* Now insert the most significant 32 bits of the value
1946	     into the register.  When we don't have a second register
1947	     available, it could take up to nine instructions to load
1948	     a 64-bit integer constant.  Prior to reload, we force
1949	     constants that would take more than three instructions
1950	     to load to the constant pool.  During and after reload,
1951	     we have to handle all possible values.  */
1952	  if (insert)
1953	    {
1954	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
1955		 register and the value to be inserted is outside the
1956		 range that can be loaded with three depdi instructions.  */
1957	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
1958		{
1959		  operand1 = GEN_INT (insv);
1960
1961		  emit_insn (gen_rtx_SET (VOIDmode, temp,
1962					  gen_rtx_HIGH (mode, operand1)));
1963		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
1964		  emit_insn (gen_insv (operand0, GEN_INT (32),
1965				       const0_rtx, temp));
1966		}
1967	      else
1968		{
1969		  int len = 5, pos = 27;
1970
1971		  /* Insert the bits using the depdi instruction.  */
1972		  while (pos >= 0)
1973		    {
1974		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
1975		      HOST_WIDE_INT sign = v5 < 0;
1976
1977		      /* Left extend the insertion.  */
1978		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
1979		      while (pos > 0 && (insv & 1) == sign)
1980			{
1981			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
1982			  len += 1;
1983			  pos -= 1;
1984			}
1985
1986		      emit_insn (gen_insv (operand0, GEN_INT (len),
1987					   GEN_INT (pos), GEN_INT (v5)));
1988
1989		      len = pos > 0 && pos < 5 ? pos : 5;
1990		      pos -= len;
1991		    }
1992		}
1993	    }
1994
1995	  REG_NOTES (insn)
1996	    = gen_rtx_EXPR_LIST (REG_EQUAL, op1, REG_NOTES (insn));
1997
1998	  return 1;
1999	}
2000    }
2001  /* Now have insn-emit do whatever it normally does.  */
2002  return 0;
2003}
2004
2005/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2006   it will need a link/runtime reloc).  */
2007
2008int
2009reloc_needed (tree exp)
2010{
2011  int reloc = 0;
2012
2013  switch (TREE_CODE (exp))
2014    {
2015    case ADDR_EXPR:
2016      return 1;
2017
2018    case PLUS_EXPR:
2019    case MINUS_EXPR:
2020      reloc = reloc_needed (TREE_OPERAND (exp, 0));
2021      reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2022      break;
2023
2024    case NOP_EXPR:
2025    case CONVERT_EXPR:
2026    case NON_LVALUE_EXPR:
2027      reloc = reloc_needed (TREE_OPERAND (exp, 0));
2028      break;
2029
2030    case CONSTRUCTOR:
2031      {
2032	tree value;
2033	unsigned HOST_WIDE_INT ix;
2034
2035	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2036	  if (value)
2037	    reloc |= reloc_needed (value);
2038      }
2039      break;
2040
2041    case ERROR_MARK:
2042      break;
2043
2044    default:
2045      break;
2046    }
2047  return reloc;
2048}
2049
2050/* Does operand (which is a symbolic_operand) live in text space?
2051   If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2052   will be true.  */
2053
2054int
2055read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2056{
2057  if (GET_CODE (operand) == CONST)
2058    operand = XEXP (XEXP (operand, 0), 0);
2059  if (flag_pic)
2060    {
2061      if (GET_CODE (operand) == SYMBOL_REF)
2062	return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2063    }
2064  else
2065    {
2066      if (GET_CODE (operand) == SYMBOL_REF)
2067	return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2068    }
2069  return 1;
2070}
2071
2072
2073/* Return the best assembler insn template
2074   for moving operands[1] into operands[0] as a fullword.  */
2075const char *
2076singlemove_string (rtx *operands)
2077{
2078  HOST_WIDE_INT intval;
2079
2080  if (GET_CODE (operands[0]) == MEM)
2081    return "stw %r1,%0";
2082  if (GET_CODE (operands[1]) == MEM)
2083    return "ldw %1,%0";
2084  if (GET_CODE (operands[1]) == CONST_DOUBLE)
2085    {
2086      long i;
2087      REAL_VALUE_TYPE d;
2088
2089      gcc_assert (GET_MODE (operands[1]) == SFmode);
2090
2091      /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2092	 bit pattern.  */
2093      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2094      REAL_VALUE_TO_TARGET_SINGLE (d, i);
2095
2096      operands[1] = GEN_INT (i);
2097      /* Fall through to CONST_INT case.  */
2098    }
2099  if (GET_CODE (operands[1]) == CONST_INT)
2100    {
2101      intval = INTVAL (operands[1]);
2102
2103      if (VAL_14_BITS_P (intval))
2104	return "ldi %1,%0";
2105      else if ((intval & 0x7ff) == 0)
2106	return "ldil L'%1,%0";
2107      else if (zdepi_cint_p (intval))
2108	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2109      else
2110	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2111    }
2112  return "copy %1,%0";
2113}
2114
2115
2116/* Compute position (in OP[1]) and width (in OP[2])
2117   useful for copying IMM to a register using the zdepi
2118   instructions.  Store the immediate value to insert in OP[0].  */
2119static void
2120compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2121{
2122  int lsb, len;
2123
2124  /* Find the least significant set bit in IMM.  */
2125  for (lsb = 0; lsb < 32; lsb++)
2126    {
2127      if ((imm & 1) != 0)
2128        break;
2129      imm >>= 1;
2130    }
2131
2132  /* Choose variants based on *sign* of the 5-bit field.  */
2133  if ((imm & 0x10) == 0)
2134    len = (lsb <= 28) ? 4 : 32 - lsb;
2135  else
2136    {
2137      /* Find the width of the bitstring in IMM.  */
2138      for (len = 5; len < 32; len++)
2139	{
2140	  if ((imm & (1 << len)) == 0)
2141	    break;
2142	}
2143
2144      /* Sign extend IMM as a 5-bit value.  */
2145      imm = (imm & 0xf) - 0x10;
2146    }
2147
2148  op[0] = imm;
2149  op[1] = 31 - lsb;
2150  op[2] = len;
2151}
2152
2153/* Compute position (in OP[1]) and width (in OP[2])
2154   useful for copying IMM to a register using the depdi,z
2155   instructions.  Store the immediate value to insert in OP[0].  */
2156void
2157compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2158{
2159  HOST_WIDE_INT lsb, len;
2160
2161  /* Find the least significant set bit in IMM.  */
2162  for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2163    {
2164      if ((imm & 1) != 0)
2165        break;
2166      imm >>= 1;
2167    }
2168
2169  /* Choose variants based on *sign* of the 5-bit field.  */
2170  if ((imm & 0x10) == 0)
2171    len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2172	   ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2173  else
2174    {
2175      /* Find the width of the bitstring in IMM.  */
2176      for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2177	{
2178	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2179	    break;
2180	}
2181
2182      /* Sign extend IMM as a 5-bit value.  */
2183      imm = (imm & 0xf) - 0x10;
2184    }
2185
2186  op[0] = imm;
2187  op[1] = 63 - lsb;
2188  op[2] = len;
2189}
2190
2191/* Output assembler code to perform a doubleword move insn
2192   with operands OPERANDS.  */
2193
2194const char *
2195output_move_double (rtx *operands)
2196{
2197  enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2198  rtx latehalf[2];
2199  rtx addreg0 = 0, addreg1 = 0;
2200
2201  /* First classify both operands.  */
2202
2203  if (REG_P (operands[0]))
2204    optype0 = REGOP;
2205  else if (offsettable_memref_p (operands[0]))
2206    optype0 = OFFSOP;
2207  else if (GET_CODE (operands[0]) == MEM)
2208    optype0 = MEMOP;
2209  else
2210    optype0 = RNDOP;
2211
2212  if (REG_P (operands[1]))
2213    optype1 = REGOP;
2214  else if (CONSTANT_P (operands[1]))
2215    optype1 = CNSTOP;
2216  else if (offsettable_memref_p (operands[1]))
2217    optype1 = OFFSOP;
2218  else if (GET_CODE (operands[1]) == MEM)
2219    optype1 = MEMOP;
2220  else
2221    optype1 = RNDOP;
2222
2223  /* Check for the cases that the operand constraints are not
2224     supposed to allow to happen.  */
2225  gcc_assert (optype0 == REGOP || optype1 == REGOP);
2226
2227  /* Handle copies between general and floating registers.  */
2228
2229  if (optype0 == REGOP && optype1 == REGOP
2230      && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2231    {
2232      if (FP_REG_P (operands[0]))
2233	{
2234	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2235	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2236	  return "{fldds|fldd} -16(%%sp),%0";
2237	}
2238      else
2239	{
2240	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2241	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2242	  return "{ldws|ldw} -12(%%sp),%R0";
2243	}
2244    }
2245
2246   /* Handle auto decrementing and incrementing loads and stores
2247     specifically, since the structure of the function doesn't work
2248     for them without major modification.  Do it better when we learn
2249     this port about the general inc/dec addressing of PA.
2250     (This was written by tege.  Chide him if it doesn't work.)  */
2251
2252  if (optype0 == MEMOP)
2253    {
2254      /* We have to output the address syntax ourselves, since print_operand
2255	 doesn't deal with the addresses we want to use.  Fix this later.  */
2256
2257      rtx addr = XEXP (operands[0], 0);
2258      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2259	{
2260	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2261
2262	  operands[0] = XEXP (addr, 0);
2263	  gcc_assert (GET_CODE (operands[1]) == REG
2264		      && GET_CODE (operands[0]) == REG);
2265
2266	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2267
2268	  /* No overlap between high target register and address
2269	     register.  (We do this in a non-obvious way to
2270	     save a register file writeback)  */
2271	  if (GET_CODE (addr) == POST_INC)
2272	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2273	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2274	}
2275      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2276	{
2277	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2278
2279	  operands[0] = XEXP (addr, 0);
2280	  gcc_assert (GET_CODE (operands[1]) == REG
2281		      && GET_CODE (operands[0]) == REG);
2282
2283	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2284	  /* No overlap between high target register and address
2285	     register.  (We do this in a non-obvious way to save a
2286	     register file writeback)  */
2287	  if (GET_CODE (addr) == PRE_INC)
2288	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2289	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2290	}
2291    }
2292  if (optype1 == MEMOP)
2293    {
2294      /* We have to output the address syntax ourselves, since print_operand
2295	 doesn't deal with the addresses we want to use.  Fix this later.  */
2296
2297      rtx addr = XEXP (operands[1], 0);
2298      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2299	{
2300	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2301
2302	  operands[1] = XEXP (addr, 0);
2303	  gcc_assert (GET_CODE (operands[0]) == REG
2304		      && GET_CODE (operands[1]) == REG);
2305
2306	  if (!reg_overlap_mentioned_p (high_reg, addr))
2307	    {
2308	      /* No overlap between high target register and address
2309		 register.  (We do this in a non-obvious way to
2310		 save a register file writeback)  */
2311	      if (GET_CODE (addr) == POST_INC)
2312		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2313	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2314	    }
2315	  else
2316	    {
2317	      /* This is an undefined situation.  We should load into the
2318		 address register *and* update that register.  Probably
2319		 we don't need to handle this at all.  */
2320	      if (GET_CODE (addr) == POST_INC)
2321		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2322	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2323	    }
2324	}
2325      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2326	{
2327	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2328
2329	  operands[1] = XEXP (addr, 0);
2330	  gcc_assert (GET_CODE (operands[0]) == REG
2331		      && GET_CODE (operands[1]) == REG);
2332
2333	  if (!reg_overlap_mentioned_p (high_reg, addr))
2334	    {
2335	      /* No overlap between high target register and address
2336		 register.  (We do this in a non-obvious way to
2337		 save a register file writeback)  */
2338	      if (GET_CODE (addr) == PRE_INC)
2339		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2340	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2341	    }
2342	  else
2343	    {
2344	      /* This is an undefined situation.  We should load into the
2345		 address register *and* update that register.  Probably
2346		 we don't need to handle this at all.  */
2347	      if (GET_CODE (addr) == PRE_INC)
2348		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2349	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2350	    }
2351	}
2352      else if (GET_CODE (addr) == PLUS
2353	       && GET_CODE (XEXP (addr, 0)) == MULT)
2354	{
2355	  rtx xoperands[4];
2356	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2357
2358	  if (!reg_overlap_mentioned_p (high_reg, addr))
2359	    {
2360	      xoperands[0] = high_reg;
2361	      xoperands[1] = XEXP (addr, 1);
2362	      xoperands[2] = XEXP (XEXP (addr, 0), 0);
2363	      xoperands[3] = XEXP (XEXP (addr, 0), 1);
2364	      output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2365			       xoperands);
2366	      return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2367	    }
2368	  else
2369	    {
2370	      xoperands[0] = high_reg;
2371	      xoperands[1] = XEXP (addr, 1);
2372	      xoperands[2] = XEXP (XEXP (addr, 0), 0);
2373	      xoperands[3] = XEXP (XEXP (addr, 0), 1);
2374	      output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2375			       xoperands);
2376	      return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2377	    }
2378	}
2379    }
2380
2381  /* If an operand is an unoffsettable memory ref, find a register
2382     we can increment temporarily to make it refer to the second word.  */
2383
2384  if (optype0 == MEMOP)
2385    addreg0 = find_addr_reg (XEXP (operands[0], 0));
2386
2387  if (optype1 == MEMOP)
2388    addreg1 = find_addr_reg (XEXP (operands[1], 0));
2389
2390  /* Ok, we can do one word at a time.
2391     Normally we do the low-numbered word first.
2392
2393     In either case, set up in LATEHALF the operands to use
2394     for the high-numbered word and in some cases alter the
2395     operands in OPERANDS to be suitable for the low-numbered word.  */
2396
2397  if (optype0 == REGOP)
2398    latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2399  else if (optype0 == OFFSOP)
2400    latehalf[0] = adjust_address (operands[0], SImode, 4);
2401  else
2402    latehalf[0] = operands[0];
2403
2404  if (optype1 == REGOP)
2405    latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2406  else if (optype1 == OFFSOP)
2407    latehalf[1] = adjust_address (operands[1], SImode, 4);
2408  else if (optype1 == CNSTOP)
2409    split_double (operands[1], &operands[1], &latehalf[1]);
2410  else
2411    latehalf[1] = operands[1];
2412
2413  /* If the first move would clobber the source of the second one,
2414     do them in the other order.
2415
2416     This can happen in two cases:
2417
2418	mem -> register where the first half of the destination register
2419 	is the same register used in the memory's address.  Reload
2420	can create such insns.
2421
2422	mem in this case will be either register indirect or register
2423	indirect plus a valid offset.
2424
2425	register -> register move where REGNO(dst) == REGNO(src + 1)
2426	someone (Tim/Tege?) claimed this can happen for parameter loads.
2427
2428     Handle mem -> register case first.  */
2429  if (optype0 == REGOP
2430      && (optype1 == MEMOP || optype1 == OFFSOP)
2431      && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2432			    operands[1], 0))
2433    {
2434      /* Do the late half first.  */
2435      if (addreg1)
2436	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2437      output_asm_insn (singlemove_string (latehalf), latehalf);
2438
2439      /* Then clobber.  */
2440      if (addreg1)
2441	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2442      return singlemove_string (operands);
2443    }
2444
2445  /* Now handle register -> register case.  */
2446  if (optype0 == REGOP && optype1 == REGOP
2447      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2448    {
2449      output_asm_insn (singlemove_string (latehalf), latehalf);
2450      return singlemove_string (operands);
2451    }
2452
2453  /* Normal case: do the two words, low-numbered first.  */
2454
2455  output_asm_insn (singlemove_string (operands), operands);
2456
2457  /* Make any unoffsettable addresses point at high-numbered word.  */
2458  if (addreg0)
2459    output_asm_insn ("ldo 4(%0),%0", &addreg0);
2460  if (addreg1)
2461    output_asm_insn ("ldo 4(%0),%0", &addreg1);
2462
2463  /* Do that word.  */
2464  output_asm_insn (singlemove_string (latehalf), latehalf);
2465
2466  /* Undo the adds we just did.  */
2467  if (addreg0)
2468    output_asm_insn ("ldo -4(%0),%0", &addreg0);
2469  if (addreg1)
2470    output_asm_insn ("ldo -4(%0),%0", &addreg1);
2471
2472  return "";
2473}
2474
2475const char *
2476output_fp_move_double (rtx *operands)
2477{
2478  if (FP_REG_P (operands[0]))
2479    {
2480      if (FP_REG_P (operands[1])
2481	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2482	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2483      else
2484	output_asm_insn ("fldd%F1 %1,%0", operands);
2485    }
2486  else if (FP_REG_P (operands[1]))
2487    {
2488      output_asm_insn ("fstd%F0 %1,%0", operands);
2489    }
2490  else
2491    {
2492      rtx xoperands[2];
2493
2494      gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2495
2496      /* This is a pain.  You have to be prepared to deal with an
2497	 arbitrary address here including pre/post increment/decrement.
2498
2499	 so avoid this in the MD.  */
2500      gcc_assert (GET_CODE (operands[0]) == REG);
2501
2502      xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2503      xoperands[0] = operands[0];
2504      output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2505    }
2506  return "";
2507}
2508
2509/* Return a REG that occurs in ADDR with coefficient 1.
2510   ADDR can be effectively incremented by incrementing REG.  */
2511
2512static rtx
2513find_addr_reg (rtx addr)
2514{
2515  while (GET_CODE (addr) == PLUS)
2516    {
2517      if (GET_CODE (XEXP (addr, 0)) == REG)
2518	addr = XEXP (addr, 0);
2519      else if (GET_CODE (XEXP (addr, 1)) == REG)
2520	addr = XEXP (addr, 1);
2521      else if (CONSTANT_P (XEXP (addr, 0)))
2522	addr = XEXP (addr, 1);
2523      else if (CONSTANT_P (XEXP (addr, 1)))
2524	addr = XEXP (addr, 0);
2525      else
2526	gcc_unreachable ();
2527    }
2528  gcc_assert (GET_CODE (addr) == REG);
2529  return addr;
2530}
2531
2532/* Emit code to perform a block move.
2533
2534   OPERANDS[0] is the destination pointer as a REG, clobbered.
2535   OPERANDS[1] is the source pointer as a REG, clobbered.
2536   OPERANDS[2] is a register for temporary storage.
2537   OPERANDS[3] is a register for temporary storage.
2538   OPERANDS[4] is the size as a CONST_INT
2539   OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2540   OPERANDS[6] is another temporary register.  */
2541
2542const char *
2543output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2544{
2545  HOST_WIDE_INT align = INTVAL (operands[5]);
2546  unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]);
2547
2548  /* We can't move more than a word at a time because the PA
2549     has no longer integer move insns.  (Could use fp mem ops?)  */
2550  if (align > (TARGET_64BIT ? 8 : 4))
2551    align = (TARGET_64BIT ? 8 : 4);
2552
2553  /* Note that we know each loop below will execute at least twice
2554     (else we would have open-coded the copy).  */
2555  switch (align)
2556    {
2557      case 8:
2558	/* Pre-adjust the loop counter.  */
2559	operands[4] = GEN_INT (n_bytes - 16);
2560	output_asm_insn ("ldi %4,%2", operands);
2561
2562	/* Copying loop.  */
2563	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2564	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2565	output_asm_insn ("std,ma %3,8(%0)", operands);
2566	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2567	output_asm_insn ("std,ma %6,8(%0)", operands);
2568
2569	/* Handle the residual.  There could be up to 7 bytes of
2570	   residual to copy!  */
2571	if (n_bytes % 16 != 0)
2572	  {
2573	    operands[4] = GEN_INT (n_bytes % 8);
2574	    if (n_bytes % 16 >= 8)
2575	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
2576	    if (n_bytes % 8 != 0)
2577	      output_asm_insn ("ldd 0(%1),%6", operands);
2578	    if (n_bytes % 16 >= 8)
2579	      output_asm_insn ("std,ma %3,8(%0)", operands);
2580	    if (n_bytes % 8 != 0)
2581	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
2582	  }
2583	return "";
2584
2585      case 4:
2586	/* Pre-adjust the loop counter.  */
2587	operands[4] = GEN_INT (n_bytes - 8);
2588	output_asm_insn ("ldi %4,%2", operands);
2589
2590	/* Copying loop.  */
2591	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2592	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2593	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2594	output_asm_insn ("addib,>= -8,%2,.-12", operands);
2595	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2596
2597	/* Handle the residual.  There could be up to 7 bytes of
2598	   residual to copy!  */
2599	if (n_bytes % 8 != 0)
2600	  {
2601	    operands[4] = GEN_INT (n_bytes % 4);
2602	    if (n_bytes % 8 >= 4)
2603	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2604	    if (n_bytes % 4 != 0)
2605	      output_asm_insn ("ldw 0(%1),%6", operands);
2606	    if (n_bytes % 8 >= 4)
2607	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2608	    if (n_bytes % 4 != 0)
2609	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2610	  }
2611	return "";
2612
2613      case 2:
2614	/* Pre-adjust the loop counter.  */
2615	operands[4] = GEN_INT (n_bytes - 4);
2616	output_asm_insn ("ldi %4,%2", operands);
2617
2618	/* Copying loop.  */
2619	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2620	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2621	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2622	output_asm_insn ("addib,>= -4,%2,.-12", operands);
2623	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2624
2625	/* Handle the residual.  */
2626	if (n_bytes % 4 != 0)
2627	  {
2628	    if (n_bytes % 4 >= 2)
2629	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2630	    if (n_bytes % 2 != 0)
2631	      output_asm_insn ("ldb 0(%1),%6", operands);
2632	    if (n_bytes % 4 >= 2)
2633	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2634	    if (n_bytes % 2 != 0)
2635	      output_asm_insn ("stb %6,0(%0)", operands);
2636	  }
2637	return "";
2638
2639      case 1:
2640	/* Pre-adjust the loop counter.  */
2641	operands[4] = GEN_INT (n_bytes - 2);
2642	output_asm_insn ("ldi %4,%2", operands);
2643
2644	/* Copying loop.  */
2645	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2646	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2647	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2648	output_asm_insn ("addib,>= -2,%2,.-12", operands);
2649	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2650
2651	/* Handle the residual.  */
2652	if (n_bytes % 2 != 0)
2653	  {
2654	    output_asm_insn ("ldb 0(%1),%3", operands);
2655	    output_asm_insn ("stb %3,0(%0)", operands);
2656	  }
2657	return "";
2658
2659      default:
2660	gcc_unreachable ();
2661    }
2662}
2663
2664/* Count the number of insns necessary to handle this block move.
2665
2666   Basic structure is the same as emit_block_move, except that we
2667   count insns rather than emit them.  */
2668
2669static int
2670compute_movmem_length (rtx insn)
2671{
2672  rtx pat = PATTERN (insn);
2673  unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2674  unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2675  unsigned int n_insns = 0;
2676
2677  /* We can't move more than four bytes at a time because the PA
2678     has no longer integer move insns.  (Could use fp mem ops?)  */
2679  if (align > (TARGET_64BIT ? 8 : 4))
2680    align = (TARGET_64BIT ? 8 : 4);
2681
2682  /* The basic copying loop.  */
2683  n_insns = 6;
2684
2685  /* Residuals.  */
2686  if (n_bytes % (2 * align) != 0)
2687    {
2688      if ((n_bytes % (2 * align)) >= align)
2689	n_insns += 2;
2690
2691      if ((n_bytes % align) != 0)
2692	n_insns += 2;
2693    }
2694
2695  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2696  return n_insns * 4;
2697}
2698
2699/* Emit code to perform a block clear.
2700
2701   OPERANDS[0] is the destination pointer as a REG, clobbered.
2702   OPERANDS[1] is a register for temporary storage.
2703   OPERANDS[2] is the size as a CONST_INT
2704   OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
2705
2706const char *
2707output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2708{
2709  HOST_WIDE_INT align = INTVAL (operands[3]);
2710  unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]);
2711
2712  /* We can't clear more than a word at a time because the PA
2713     has no longer integer move insns.  */
2714  if (align > (TARGET_64BIT ? 8 : 4))
2715    align = (TARGET_64BIT ? 8 : 4);
2716
2717  /* Note that we know each loop below will execute at least twice
2718     (else we would have open-coded the copy).  */
2719  switch (align)
2720    {
2721      case 8:
2722	/* Pre-adjust the loop counter.  */
2723	operands[2] = GEN_INT (n_bytes - 16);
2724	output_asm_insn ("ldi %2,%1", operands);
2725
2726	/* Loop.  */
2727	output_asm_insn ("std,ma %%r0,8(%0)", operands);
2728	output_asm_insn ("addib,>= -16,%1,.-4", operands);
2729	output_asm_insn ("std,ma %%r0,8(%0)", operands);
2730
2731	/* Handle the residual.  There could be up to 7 bytes of
2732	   residual to copy!  */
2733	if (n_bytes % 16 != 0)
2734	  {
2735	    operands[2] = GEN_INT (n_bytes % 8);
2736	    if (n_bytes % 16 >= 8)
2737	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
2738	    if (n_bytes % 8 != 0)
2739	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2740	  }
2741	return "";
2742
2743      case 4:
2744	/* Pre-adjust the loop counter.  */
2745	operands[2] = GEN_INT (n_bytes - 8);
2746	output_asm_insn ("ldi %2,%1", operands);
2747
2748	/* Loop.  */
2749	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2750	output_asm_insn ("addib,>= -8,%1,.-4", operands);
2751	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2752
2753	/* Handle the residual.  There could be up to 7 bytes of
2754	   residual to copy!  */
2755	if (n_bytes % 8 != 0)
2756	  {
2757	    operands[2] = GEN_INT (n_bytes % 4);
2758	    if (n_bytes % 8 >= 4)
2759	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2760	    if (n_bytes % 4 != 0)
2761	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2762	  }
2763	return "";
2764
2765      case 2:
2766	/* Pre-adjust the loop counter.  */
2767	operands[2] = GEN_INT (n_bytes - 4);
2768	output_asm_insn ("ldi %2,%1", operands);
2769
2770	/* Loop.  */
2771	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2772	output_asm_insn ("addib,>= -4,%1,.-4", operands);
2773	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2774
2775	/* Handle the residual.  */
2776	if (n_bytes % 4 != 0)
2777	  {
2778	    if (n_bytes % 4 >= 2)
2779	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2780	    if (n_bytes % 2 != 0)
2781	      output_asm_insn ("stb %%r0,0(%0)", operands);
2782	  }
2783	return "";
2784
2785      case 1:
2786	/* Pre-adjust the loop counter.  */
2787	operands[2] = GEN_INT (n_bytes - 2);
2788	output_asm_insn ("ldi %2,%1", operands);
2789
2790	/* Loop.  */
2791	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2792	output_asm_insn ("addib,>= -2,%1,.-4", operands);
2793	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2794
2795	/* Handle the residual.  */
2796	if (n_bytes % 2 != 0)
2797	  output_asm_insn ("stb %%r0,0(%0)", operands);
2798
2799	return "";
2800
2801      default:
2802	gcc_unreachable ();
2803    }
2804}
2805
2806/* Count the number of insns necessary to handle this block move.
2807
2808   Basic structure is the same as emit_block_move, except that we
2809   count insns rather than emit them.  */
2810
2811static int
2812compute_clrmem_length (rtx insn)
2813{
2814  rtx pat = PATTERN (insn);
2815  unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2816  unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2817  unsigned int n_insns = 0;
2818
2819  /* We can't clear more than a word at a time because the PA
2820     has no longer integer move insns.  */
2821  if (align > (TARGET_64BIT ? 8 : 4))
2822    align = (TARGET_64BIT ? 8 : 4);
2823
2824  /* The basic loop.  */
2825  n_insns = 4;
2826
2827  /* Residuals.  */
2828  if (n_bytes % (2 * align) != 0)
2829    {
2830      if ((n_bytes % (2 * align)) >= align)
2831	n_insns++;
2832
2833      if ((n_bytes % align) != 0)
2834	n_insns++;
2835    }
2836
2837  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2838  return n_insns * 4;
2839}
2840
2841
2842const char *
2843output_and (rtx *operands)
2844{
2845  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2846    {
2847      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2848      int ls0, ls1, ms0, p, len;
2849
2850      for (ls0 = 0; ls0 < 32; ls0++)
2851	if ((mask & (1 << ls0)) == 0)
2852	  break;
2853
2854      for (ls1 = ls0; ls1 < 32; ls1++)
2855	if ((mask & (1 << ls1)) != 0)
2856	  break;
2857
2858      for (ms0 = ls1; ms0 < 32; ms0++)
2859	if ((mask & (1 << ms0)) == 0)
2860	  break;
2861
2862      gcc_assert (ms0 == 32);
2863
2864      if (ls1 == 32)
2865	{
2866	  len = ls0;
2867
2868	  gcc_assert (len);
2869
2870	  operands[2] = GEN_INT (len);
2871	  return "{extru|extrw,u} %1,31,%2,%0";
2872	}
2873      else
2874	{
2875	  /* We could use this `depi' for the case above as well, but `depi'
2876	     requires one more register file access than an `extru'.  */
2877
2878	  p = 31 - ls0;
2879	  len = ls1 - ls0;
2880
2881	  operands[2] = GEN_INT (p);
2882	  operands[3] = GEN_INT (len);
2883	  return "{depi|depwi} 0,%2,%3,%0";
2884	}
2885    }
2886  else
2887    return "and %1,%2,%0";
2888}
2889
2890/* Return a string to perform a bitwise-and of operands[1] with operands[2]
2891   storing the result in operands[0].  */
2892const char *
2893output_64bit_and (rtx *operands)
2894{
2895  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2896    {
2897      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2898      int ls0, ls1, ms0, p, len;
2899
2900      for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2901	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2902	  break;
2903
2904      for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2905	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2906	  break;
2907
2908      for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2909	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2910	  break;
2911
2912      gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2913
2914      if (ls1 == HOST_BITS_PER_WIDE_INT)
2915	{
2916	  len = ls0;
2917
2918	  gcc_assert (len);
2919
2920	  operands[2] = GEN_INT (len);
2921	  return "extrd,u %1,63,%2,%0";
2922	}
2923      else
2924	{
2925	  /* We could use this `depi' for the case above as well, but `depi'
2926	     requires one more register file access than an `extru'.  */
2927
2928	  p = 63 - ls0;
2929	  len = ls1 - ls0;
2930
2931	  operands[2] = GEN_INT (p);
2932	  operands[3] = GEN_INT (len);
2933	  return "depdi 0,%2,%3,%0";
2934	}
2935    }
2936  else
2937    return "and %1,%2,%0";
2938}
2939
2940const char *
2941output_ior (rtx *operands)
2942{
2943  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2944  int bs0, bs1, p, len;
2945
2946  if (INTVAL (operands[2]) == 0)
2947    return "copy %1,%0";
2948
2949  for (bs0 = 0; bs0 < 32; bs0++)
2950    if ((mask & (1 << bs0)) != 0)
2951      break;
2952
2953  for (bs1 = bs0; bs1 < 32; bs1++)
2954    if ((mask & (1 << bs1)) == 0)
2955      break;
2956
2957  gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2958
2959  p = 31 - bs0;
2960  len = bs1 - bs0;
2961
2962  operands[2] = GEN_INT (p);
2963  operands[3] = GEN_INT (len);
2964  return "{depi|depwi} -1,%2,%3,%0";
2965}
2966
2967/* Return a string to perform a bitwise-and of operands[1] with operands[2]
2968   storing the result in operands[0].  */
2969const char *
2970output_64bit_ior (rtx *operands)
2971{
2972  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2973  int bs0, bs1, p, len;
2974
2975  if (INTVAL (operands[2]) == 0)
2976    return "copy %1,%0";
2977
2978  for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2979    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2980      break;
2981
2982  for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2983    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2984      break;
2985
2986  gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
2987	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2988
2989  p = 63 - bs0;
2990  len = bs1 - bs0;
2991
2992  operands[2] = GEN_INT (p);
2993  operands[3] = GEN_INT (len);
2994  return "depdi -1,%2,%3,%0";
2995}
2996
2997/* Target hook for assembling integer objects.  This code handles
2998   aligned SI and DI integers specially since function references
2999   must be preceded by P%.  */
3000
3001static bool
3002pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3003{
3004  if (size == UNITS_PER_WORD
3005      && aligned_p
3006      && function_label_operand (x, VOIDmode))
3007    {
3008      fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3009      output_addr_const (asm_out_file, x);
3010      fputc ('\n', asm_out_file);
3011      return true;
3012    }
3013  return default_assemble_integer (x, size, aligned_p);
3014}
3015
3016/* Output an ascii string.  */
3017void
3018output_ascii (FILE *file, const char *p, int size)
3019{
3020  int i;
3021  int chars_output;
3022  unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3023
3024  /* The HP assembler can only take strings of 256 characters at one
3025     time.  This is a limitation on input line length, *not* the
3026     length of the string.  Sigh.  Even worse, it seems that the
3027     restriction is in number of input characters (see \xnn &
3028     \whatever).  So we have to do this very carefully.  */
3029
3030  fputs ("\t.STRING \"", file);
3031
3032  chars_output = 0;
3033  for (i = 0; i < size; i += 4)
3034    {
3035      int co = 0;
3036      int io = 0;
3037      for (io = 0, co = 0; io < MIN (4, size - i); io++)
3038	{
3039	  register unsigned int c = (unsigned char) p[i + io];
3040
3041	  if (c == '\"' || c == '\\')
3042	    partial_output[co++] = '\\';
3043	  if (c >= ' ' && c < 0177)
3044	    partial_output[co++] = c;
3045	  else
3046	    {
3047	      unsigned int hexd;
3048	      partial_output[co++] = '\\';
3049	      partial_output[co++] = 'x';
3050	      hexd =  c  / 16 - 0 + '0';
3051	      if (hexd > '9')
3052		hexd -= '9' - 'a' + 1;
3053	      partial_output[co++] = hexd;
3054	      hexd =  c % 16 - 0 + '0';
3055	      if (hexd > '9')
3056		hexd -= '9' - 'a' + 1;
3057	      partial_output[co++] = hexd;
3058	    }
3059	}
3060      if (chars_output + co > 243)
3061	{
3062	  fputs ("\"\n\t.STRING \"", file);
3063	  chars_output = 0;
3064	}
3065      fwrite (partial_output, 1, (size_t) co, file);
3066      chars_output += co;
3067      co = 0;
3068    }
3069  fputs ("\"\n", file);
3070}
3071
3072/* Try to rewrite floating point comparisons & branches to avoid
3073   useless add,tr insns.
3074
3075   CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3076   to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3077   first attempt to remove useless add,tr insns.  It is zero
3078   for the second pass as reorg sometimes leaves bogus REG_DEAD
3079   notes lying around.
3080
3081   When CHECK_NOTES is zero we can only eliminate add,tr insns
3082   when there's a 1:1 correspondence between fcmp and ftest/fbranch
3083   instructions.  */
3084static void
3085remove_useless_addtr_insns (int check_notes)
3086{
3087  rtx insn;
3088  static int pass = 0;
3089
3090  /* This is fairly cheap, so always run it when optimizing.  */
3091  if (optimize > 0)
3092    {
3093      int fcmp_count = 0;
3094      int fbranch_count = 0;
3095
3096      /* Walk all the insns in this function looking for fcmp & fbranch
3097	 instructions.  Keep track of how many of each we find.  */
3098      for (insn = get_insns (); insn; insn = next_insn (insn))
3099	{
3100	  rtx tmp;
3101
3102	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3103	  if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3104	    continue;
3105
3106	  tmp = PATTERN (insn);
3107
3108	  /* It must be a set.  */
3109	  if (GET_CODE (tmp) != SET)
3110	    continue;
3111
3112	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3113	  tmp = SET_DEST (tmp);
3114	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3115	    {
3116	      fcmp_count++;
3117	      continue;
3118	    }
3119
3120	  tmp = PATTERN (insn);
3121	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3122	  if (GET_CODE (tmp) == SET
3123	      && SET_DEST (tmp) == pc_rtx
3124	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3125	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3126	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3127	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3128	    {
3129	      fbranch_count++;
3130	      continue;
3131	    }
3132	}
3133
3134
3135      /* Find all floating point compare + branch insns.  If possible,
3136	 reverse the comparison & the branch to avoid add,tr insns.  */
3137      for (insn = get_insns (); insn; insn = next_insn (insn))
3138	{
3139	  rtx tmp, next;
3140
3141	  /* Ignore anything that isn't an INSN.  */
3142	  if (GET_CODE (insn) != INSN)
3143	    continue;
3144
3145	  tmp = PATTERN (insn);
3146
3147	  /* It must be a set.  */
3148	  if (GET_CODE (tmp) != SET)
3149	    continue;
3150
3151	  /* The destination must be CCFP, which is register zero.  */
3152	  tmp = SET_DEST (tmp);
3153	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3154	    continue;
3155
3156	  /* INSN should be a set of CCFP.
3157
3158	     See if the result of this insn is used in a reversed FP
3159	     conditional branch.  If so, reverse our condition and
3160	     the branch.  Doing so avoids useless add,tr insns.  */
3161	  next = next_insn (insn);
3162	  while (next)
3163	    {
3164	      /* Jumps, calls and labels stop our search.  */
3165	      if (GET_CODE (next) == JUMP_INSN
3166		  || GET_CODE (next) == CALL_INSN
3167		  || GET_CODE (next) == CODE_LABEL)
3168		break;
3169
3170	      /* As does another fcmp insn.  */
3171	      if (GET_CODE (next) == INSN
3172		  && GET_CODE (PATTERN (next)) == SET
3173		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3174		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3175		break;
3176
3177	      next = next_insn (next);
3178	    }
3179
3180	  /* Is NEXT_INSN a branch?  */
3181	  if (next
3182	      && GET_CODE (next) == JUMP_INSN)
3183	    {
3184	      rtx pattern = PATTERN (next);
3185
3186	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3187		 and CCFP dies, then reverse our conditional and the branch
3188		 to avoid the add,tr.  */
3189	      if (GET_CODE (pattern) == SET
3190		  && SET_DEST (pattern) == pc_rtx
3191		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3192		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3193		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3194		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3195		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3196		  && (fcmp_count == fbranch_count
3197		      || (check_notes
3198			  && find_regno_note (next, REG_DEAD, 0))))
3199		{
3200		  /* Reverse the branch.  */
3201		  tmp = XEXP (SET_SRC (pattern), 1);
3202		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3203		  XEXP (SET_SRC (pattern), 2) = tmp;
3204		  INSN_CODE (next) = -1;
3205
3206		  /* Reverse our condition.  */
3207		  tmp = PATTERN (insn);
3208		  PUT_CODE (XEXP (tmp, 1),
3209			    (reverse_condition_maybe_unordered
3210			     (GET_CODE (XEXP (tmp, 1)))));
3211		}
3212	    }
3213	}
3214    }
3215
3216  pass = !pass;
3217
3218}
3219
3220/* You may have trouble believing this, but this is the 32 bit HP-PA
3221   stack layout.  Wow.
3222
3223   Offset		Contents
3224
3225   Variable arguments	(optional; any number may be allocated)
3226
3227   SP-(4*(N+9))		arg word N
3228   	:		    :
3229      SP-56		arg word 5
3230      SP-52		arg word 4
3231
3232   Fixed arguments	(must be allocated; may remain unused)
3233
3234      SP-48		arg word 3
3235      SP-44		arg word 2
3236      SP-40		arg word 1
3237      SP-36		arg word 0
3238
3239   Frame Marker
3240
3241      SP-32		External Data Pointer (DP)
3242      SP-28		External sr4
3243      SP-24		External/stub RP (RP')
3244      SP-20		Current RP
3245      SP-16		Static Link
3246      SP-12		Clean up
3247      SP-8		Calling Stub RP (RP'')
3248      SP-4		Previous SP
3249
3250   Top of Frame
3251
3252      SP-0		Stack Pointer (points to next available address)
3253
3254*/
3255
3256/* This function saves registers as follows.  Registers marked with ' are
3257   this function's registers (as opposed to the previous function's).
3258   If a frame_pointer isn't needed, r4 is saved as a general register;
3259   the space for the frame pointer is still allocated, though, to keep
3260   things simple.
3261
3262
3263   Top of Frame
3264
3265       SP (FP')		Previous FP
3266       SP + 4		Alignment filler (sigh)
3267       SP + 8		Space for locals reserved here.
3268       .
3269       .
3270       .
3271       SP + n		All call saved register used.
3272       .
3273       .
3274       .
3275       SP + o		All call saved fp registers used.
3276       .
3277       .
3278       .
3279       SP + p (SP')	points to next available address.
3280
3281*/
3282
3283/* Global variables set by output_function_prologue().  */
3284/* Size of frame.  Need to know this to emit return insns from
3285   leaf procedures.  */
3286static HOST_WIDE_INT actual_fsize, local_fsize;
3287static int save_fregs;
3288
3289/* Emit RTL to store REG at the memory location specified by BASE+DISP.
3290   Handle case where DISP > 8k by using the add_high_const patterns.
3291
3292   Note in DISP > 8k case, we will leave the high part of the address
3293   in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3294
3295static void
3296store_reg (int reg, HOST_WIDE_INT disp, int base)
3297{
3298  rtx insn, dest, src, basereg;
3299
3300  src = gen_rtx_REG (word_mode, reg);
3301  basereg = gen_rtx_REG (Pmode, base);
3302  if (VAL_14_BITS_P (disp))
3303    {
3304      dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3305      insn = emit_move_insn (dest, src);
3306    }
3307  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3308    {
3309      rtx delta = GEN_INT (disp);
3310      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3311
3312      emit_move_insn (tmpreg, delta);
3313      insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3314      if (DO_FRAME_NOTES)
3315	{
3316	  REG_NOTES (insn)
3317	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3318		gen_rtx_SET (VOIDmode, tmpreg,
3319			     gen_rtx_PLUS (Pmode, basereg, delta)),
3320                REG_NOTES (insn));
3321	  RTX_FRAME_RELATED_P (insn) = 1;
3322	}
3323      dest = gen_rtx_MEM (word_mode, tmpreg);
3324      insn = emit_move_insn (dest, src);
3325    }
3326  else
3327    {
3328      rtx delta = GEN_INT (disp);
3329      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3330      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3331
3332      emit_move_insn (tmpreg, high);
3333      dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3334      insn = emit_move_insn (dest, src);
3335      if (DO_FRAME_NOTES)
3336	{
3337	  REG_NOTES (insn)
3338	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3339		gen_rtx_SET (VOIDmode,
3340			     gen_rtx_MEM (word_mode,
3341					  gen_rtx_PLUS (word_mode, basereg,
3342							delta)),
3343                             src),
3344                REG_NOTES (insn));
3345	}
3346    }
3347
3348  if (DO_FRAME_NOTES)
3349    RTX_FRAME_RELATED_P (insn) = 1;
3350}
3351
3352/* Emit RTL to store REG at the memory location specified by BASE and then
3353   add MOD to BASE.  MOD must be <= 8k.  */
3354
3355static void
3356store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3357{
3358  rtx insn, basereg, srcreg, delta;
3359
3360  gcc_assert (VAL_14_BITS_P (mod));
3361
3362  basereg = gen_rtx_REG (Pmode, base);
3363  srcreg = gen_rtx_REG (word_mode, reg);
3364  delta = GEN_INT (mod);
3365
3366  insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3367  if (DO_FRAME_NOTES)
3368    {
3369      RTX_FRAME_RELATED_P (insn) = 1;
3370
3371      /* RTX_FRAME_RELATED_P must be set on each frame related set
3372	 in a parallel with more than one element.  */
3373      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3374      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3375    }
3376}
3377
3378/* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3379   where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3380   whether to add a frame note or not.
3381
3382   In the DISP > 8k case, we leave the high part of the address in %r1.
3383   There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3384
3385static void
3386set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3387{
3388  rtx insn;
3389
3390  if (VAL_14_BITS_P (disp))
3391    {
3392      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3393			     plus_constant (gen_rtx_REG (Pmode, base), disp));
3394    }
3395  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3396    {
3397      rtx basereg = gen_rtx_REG (Pmode, base);
3398      rtx delta = GEN_INT (disp);
3399      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3400
3401      emit_move_insn (tmpreg, delta);
3402      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3403			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3404      if (DO_FRAME_NOTES)
3405	REG_NOTES (insn)
3406	  = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3407	      gen_rtx_SET (VOIDmode, tmpreg,
3408			   gen_rtx_PLUS (Pmode, basereg, delta)),
3409	      REG_NOTES (insn));
3410    }
3411  else
3412    {
3413      rtx basereg = gen_rtx_REG (Pmode, base);
3414      rtx delta = GEN_INT (disp);
3415      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3416
3417      emit_move_insn (tmpreg,
3418		      gen_rtx_PLUS (Pmode, basereg,
3419				    gen_rtx_HIGH (Pmode, delta)));
3420      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3421			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3422    }
3423
3424  if (DO_FRAME_NOTES && note)
3425    RTX_FRAME_RELATED_P (insn) = 1;
3426}
3427
3428HOST_WIDE_INT
3429compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3430{
3431  int freg_saved = 0;
3432  int i, j;
3433
3434  /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3435     be consistent with the rounding and size calculation done here.
3436     Change them at the same time.  */
3437
3438  /* We do our own stack alignment.  First, round the size of the
3439     stack locals up to a word boundary.  */
3440  size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3441
3442  /* Space for previous frame pointer + filler.  If any frame is
3443     allocated, we need to add in the STARTING_FRAME_OFFSET.  We
3444     waste some space here for the sake of HP compatibility.  The
3445     first slot is only used when the frame pointer is needed.  */
3446  if (size || frame_pointer_needed)
3447    size += STARTING_FRAME_OFFSET;
3448
3449  /* If the current function calls __builtin_eh_return, then we need
3450     to allocate stack space for registers that will hold data for
3451     the exception handler.  */
3452  if (DO_FRAME_NOTES && current_function_calls_eh_return)
3453    {
3454      unsigned int i;
3455
3456      for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3457	continue;
3458      size += i * UNITS_PER_WORD;
3459    }
3460
3461  /* Account for space used by the callee general register saves.  */
3462  for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3463    if (regs_ever_live[i])
3464      size += UNITS_PER_WORD;
3465
3466  /* Account for space used by the callee floating point register saves.  */
3467  for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3468    if (regs_ever_live[i]
3469	|| (!TARGET_64BIT && regs_ever_live[i + 1]))
3470      {
3471	freg_saved = 1;
3472
3473	/* We always save both halves of the FP register, so always
3474	   increment the frame size by 8 bytes.  */
3475	size += 8;
3476      }
3477
3478  /* If any of the floating registers are saved, account for the
3479     alignment needed for the floating point register save block.  */
3480  if (freg_saved)
3481    {
3482      size = (size + 7) & ~7;
3483      if (fregs_live)
3484	*fregs_live = 1;
3485    }
3486
3487  /* The various ABIs include space for the outgoing parameters in the
3488     size of the current function's stack frame.  We don't need to align
3489     for the outgoing arguments as their alignment is set by the final
3490     rounding for the frame as a whole.  */
3491  size += current_function_outgoing_args_size;
3492
3493  /* Allocate space for the fixed frame marker.  This space must be
3494     allocated for any function that makes calls or allocates
3495     stack space.  */
3496  if (!current_function_is_leaf || size)
3497    size += TARGET_64BIT ? 48 : 32;
3498
3499  /* Finally, round to the preferred stack boundary.  */
3500  return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3501	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3502}
3503
3504/* Generate the assembly code for function entry.  FILE is a stdio
3505   stream to output the code to.  SIZE is an int: how many units of
3506   temporary storage to allocate.
3507
3508   Refer to the array `regs_ever_live' to determine which registers to
3509   save; `regs_ever_live[I]' is nonzero if register number I is ever
3510   used in the function.  This function is responsible for knowing
3511   which registers should not be saved even if used.  */
3512
3513/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3514   of memory.  If any fpu reg is used in the function, we allocate
3515   such a block here, at the bottom of the frame, just in case it's needed.
3516
3517   If this function is a leaf procedure, then we may choose not
3518   to do a "save" insn.  The decision about whether or not
3519   to do this is made in regclass.c.  */
3520
3521static void
3522pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3523{
3524  /* The function's label and associated .PROC must never be
3525     separated and must be output *after* any profiling declarations
3526     to avoid changing spaces/subspaces within a procedure.  */
3527  ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3528  fputs ("\t.PROC\n", file);
3529
3530  /* hppa_expand_prologue does the dirty work now.  We just need
3531     to output the assembler directives which denote the start
3532     of a function.  */
3533  fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3534  if (regs_ever_live[2])
3535    fputs (",CALLS,SAVE_RP", file);
3536  else
3537    fputs (",NO_CALLS", file);
3538
3539  /* The SAVE_SP flag is used to indicate that register %r3 is stored
3540     at the beginning of the frame and that it is used as the frame
3541     pointer for the frame.  We do this because our current frame
3542     layout doesn't conform to that specified in the HP runtime
3543     documentation and we need a way to indicate to programs such as
3544     GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3545     isn't used by HP compilers but is supported by the assembler.
3546     However, SAVE_SP is supposed to indicate that the previous stack
3547     pointer has been saved in the frame marker.  */
3548  if (frame_pointer_needed)
3549    fputs (",SAVE_SP", file);
3550
3551  /* Pass on information about the number of callee register saves
3552     performed in the prologue.
3553
3554     The compiler is supposed to pass the highest register number
3555     saved, the assembler then has to adjust that number before
3556     entering it into the unwind descriptor (to account for any
3557     caller saved registers with lower register numbers than the
3558     first callee saved register).  */
3559  if (gr_saved)
3560    fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3561
3562  if (fr_saved)
3563    fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3564
3565  fputs ("\n\t.ENTRY\n", file);
3566
3567  remove_useless_addtr_insns (0);
3568}
3569
3570void
3571hppa_expand_prologue (void)
3572{
3573  int merge_sp_adjust_with_store = 0;
3574  HOST_WIDE_INT size = get_frame_size ();
3575  HOST_WIDE_INT offset;
3576  int i;
3577  rtx insn, tmpreg;
3578
3579  gr_saved = 0;
3580  fr_saved = 0;
3581  save_fregs = 0;
3582
3583  /* Compute total size for frame pointer, filler, locals and rounding to
3584     the next word boundary.  Similar code appears in compute_frame_size
3585     and must be changed in tandem with this code.  */
3586  local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3587  if (local_fsize || frame_pointer_needed)
3588    local_fsize += STARTING_FRAME_OFFSET;
3589
3590  actual_fsize = compute_frame_size (size, &save_fregs);
3591
3592  /* Compute a few things we will use often.  */
3593  tmpreg = gen_rtx_REG (word_mode, 1);
3594
3595  /* Save RP first.  The calling conventions manual states RP will
3596     always be stored into the caller's frame at sp - 20 or sp - 16
3597     depending on which ABI is in use.  */
3598  if (regs_ever_live[2] || current_function_calls_eh_return)
3599    store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3600
3601  /* Allocate the local frame and set up the frame pointer if needed.  */
3602  if (actual_fsize != 0)
3603    {
3604      if (frame_pointer_needed)
3605	{
3606	  /* Copy the old frame pointer temporarily into %r1.  Set up the
3607	     new stack pointer, then store away the saved old frame pointer
3608	     into the stack at sp and at the same time update the stack
3609	     pointer by actual_fsize bytes.  Two versions, first
3610	     handles small (<8k) frames.  The second handles large (>=8k)
3611	     frames.  */
3612	  insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3613	  if (DO_FRAME_NOTES)
3614	    RTX_FRAME_RELATED_P (insn) = 1;
3615
3616	  insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3617	  if (DO_FRAME_NOTES)
3618	    RTX_FRAME_RELATED_P (insn) = 1;
3619
3620	  if (VAL_14_BITS_P (actual_fsize))
3621	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3622	  else
3623	    {
3624	      /* It is incorrect to store the saved frame pointer at *sp,
3625		 then increment sp (writes beyond the current stack boundary).
3626
3627		 So instead use stwm to store at *sp and post-increment the
3628		 stack pointer as an atomic operation.  Then increment sp to
3629		 finish allocating the new frame.  */
3630	      HOST_WIDE_INT adjust1 = 8192 - 64;
3631	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3632
3633	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3634	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3635			      adjust2, 1);
3636	    }
3637
3638	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3639	     we need to store the previous stack pointer (frame pointer)
3640	     into the frame marker on targets that use the HP unwind
3641	     library.  This allows the HP unwind library to be used to
3642	     unwind GCC frames.  However, we are not fully compatible
3643	     with the HP library because our frame layout differs from
3644	     that specified in the HP runtime specification.
3645
3646	     We don't want a frame note on this instruction as the frame
3647	     marker moves during dynamic stack allocation.
3648
3649	     This instruction also serves as a blockage to prevent
3650	     register spills from being scheduled before the stack
3651	     pointer is raised.  This is necessary as we store
3652	     registers using the frame pointer as a base register,
3653	     and the frame pointer is set before sp is raised.  */
3654	  if (TARGET_HPUX_UNWIND_LIBRARY)
3655	    {
3656	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3657				       GEN_INT (TARGET_64BIT ? -8 : -4));
3658
3659	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
3660			      frame_pointer_rtx);
3661	    }
3662	  else
3663	    emit_insn (gen_blockage ());
3664	}
3665      /* no frame pointer needed.  */
3666      else
3667	{
3668	  /* In some cases we can perform the first callee register save
3669	     and allocating the stack frame at the same time.   If so, just
3670	     make a note of it and defer allocating the frame until saving
3671	     the callee registers.  */
3672	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3673	    merge_sp_adjust_with_store = 1;
3674	  /* Can not optimize.  Adjust the stack frame by actual_fsize
3675	     bytes.  */
3676	  else
3677	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3678			    actual_fsize, 1);
3679	}
3680    }
3681
3682  /* Normal register save.
3683
3684     Do not save the frame pointer in the frame_pointer_needed case.  It
3685     was done earlier.  */
3686  if (frame_pointer_needed)
3687    {
3688      offset = local_fsize;
3689
3690      /* Saving the EH return data registers in the frame is the simplest
3691	 way to get the frame unwind information emitted.  We put them
3692	 just before the general registers.  */
3693      if (DO_FRAME_NOTES && current_function_calls_eh_return)
3694	{
3695	  unsigned int i, regno;
3696
3697	  for (i = 0; ; ++i)
3698	    {
3699	      regno = EH_RETURN_DATA_REGNO (i);
3700	      if (regno == INVALID_REGNUM)
3701		break;
3702
3703	      store_reg (regno, offset, FRAME_POINTER_REGNUM);
3704	      offset += UNITS_PER_WORD;
3705	    }
3706	}
3707
3708      for (i = 18; i >= 4; i--)
3709	if (regs_ever_live[i] && ! call_used_regs[i])
3710	  {
3711	    store_reg (i, offset, FRAME_POINTER_REGNUM);
3712	    offset += UNITS_PER_WORD;
3713	    gr_saved++;
3714	  }
3715      /* Account for %r3 which is saved in a special place.  */
3716      gr_saved++;
3717    }
3718  /* No frame pointer needed.  */
3719  else
3720    {
3721      offset = local_fsize - actual_fsize;
3722
3723      /* Saving the EH return data registers in the frame is the simplest
3724         way to get the frame unwind information emitted.  */
3725      if (DO_FRAME_NOTES && current_function_calls_eh_return)
3726	{
3727	  unsigned int i, regno;
3728
3729	  for (i = 0; ; ++i)
3730	    {
3731	      regno = EH_RETURN_DATA_REGNO (i);
3732	      if (regno == INVALID_REGNUM)
3733		break;
3734
3735	      /* If merge_sp_adjust_with_store is nonzero, then we can
3736		 optimize the first save.  */
3737	      if (merge_sp_adjust_with_store)
3738		{
3739		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3740		  merge_sp_adjust_with_store = 0;
3741		}
3742	      else
3743		store_reg (regno, offset, STACK_POINTER_REGNUM);
3744	      offset += UNITS_PER_WORD;
3745	    }
3746	}
3747
3748      for (i = 18; i >= 3; i--)
3749      	if (regs_ever_live[i] && ! call_used_regs[i])
3750	  {
3751	    /* If merge_sp_adjust_with_store is nonzero, then we can
3752	       optimize the first GR save.  */
3753	    if (merge_sp_adjust_with_store)
3754	      {
3755		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3756		merge_sp_adjust_with_store = 0;
3757	      }
3758	    else
3759	      store_reg (i, offset, STACK_POINTER_REGNUM);
3760	    offset += UNITS_PER_WORD;
3761	    gr_saved++;
3762	  }
3763
3764      /* If we wanted to merge the SP adjustment with a GR save, but we never
3765	 did any GR saves, then just emit the adjustment here.  */
3766      if (merge_sp_adjust_with_store)
3767	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3768			actual_fsize, 1);
3769    }
3770
3771  /* The hppa calling conventions say that %r19, the pic offset
3772     register, is saved at sp - 32 (in this function's frame)
3773     when generating PIC code.  FIXME:  What is the correct thing
3774     to do for functions which make no calls and allocate no
3775     frame?  Do we need to allocate a frame, or can we just omit
3776     the save?   For now we'll just omit the save.
3777
3778     We don't want a note on this insn as the frame marker can
3779     move if there is a dynamic stack allocation.  */
3780  if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3781    {
3782      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3783
3784      emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3785
3786    }
3787
3788  /* Align pointer properly (doubleword boundary).  */
3789  offset = (offset + 7) & ~7;
3790
3791  /* Floating point register store.  */
3792  if (save_fregs)
3793    {
3794      rtx base;
3795
3796      /* First get the frame or stack pointer to the start of the FP register
3797	 save area.  */
3798      if (frame_pointer_needed)
3799	{
3800	  set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3801	  base = frame_pointer_rtx;
3802	}
3803      else
3804	{
3805	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3806	  base = stack_pointer_rtx;
3807	}
3808
3809      /* Now actually save the FP registers.  */
3810      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3811	{
3812	  if (regs_ever_live[i]
3813	      || (! TARGET_64BIT && regs_ever_live[i + 1]))
3814	    {
3815	      rtx addr, insn, reg;
3816	      addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3817	      reg = gen_rtx_REG (DFmode, i);
3818	      insn = emit_move_insn (addr, reg);
3819	      if (DO_FRAME_NOTES)
3820		{
3821		  RTX_FRAME_RELATED_P (insn) = 1;
3822		  if (TARGET_64BIT)
3823		    {
3824		      rtx mem = gen_rtx_MEM (DFmode,
3825					     plus_constant (base, offset));
3826		      REG_NOTES (insn)
3827			= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3828					     gen_rtx_SET (VOIDmode, mem, reg),
3829					     REG_NOTES (insn));
3830		    }
3831		  else
3832		    {
3833		      rtx meml = gen_rtx_MEM (SFmode,
3834					      plus_constant (base, offset));
3835		      rtx memr = gen_rtx_MEM (SFmode,
3836					      plus_constant (base, offset + 4));
3837		      rtx regl = gen_rtx_REG (SFmode, i);
3838		      rtx regr = gen_rtx_REG (SFmode, i + 1);
3839		      rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3840		      rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3841		      rtvec vec;
3842
3843		      RTX_FRAME_RELATED_P (setl) = 1;
3844		      RTX_FRAME_RELATED_P (setr) = 1;
3845		      vec = gen_rtvec (2, setl, setr);
3846		      REG_NOTES (insn)
3847			= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3848					     gen_rtx_SEQUENCE (VOIDmode, vec),
3849					     REG_NOTES (insn));
3850		    }
3851		}
3852	      offset += GET_MODE_SIZE (DFmode);
3853	      fr_saved++;
3854	    }
3855	}
3856    }
3857}
3858
3859/* Emit RTL to load REG from the memory location specified by BASE+DISP.
3860   Handle case where DISP > 8k by using the add_high_const patterns.  */
3861
3862static void
3863load_reg (int reg, HOST_WIDE_INT disp, int base)
3864{
3865  rtx dest = gen_rtx_REG (word_mode, reg);
3866  rtx basereg = gen_rtx_REG (Pmode, base);
3867  rtx src;
3868
3869  if (VAL_14_BITS_P (disp))
3870    src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3871  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3872    {
3873      rtx delta = GEN_INT (disp);
3874      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3875
3876      emit_move_insn (tmpreg, delta);
3877      if (TARGET_DISABLE_INDEXING)
3878	{
3879	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3880	  src = gen_rtx_MEM (word_mode, tmpreg);
3881	}
3882      else
3883	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3884    }
3885  else
3886    {
3887      rtx delta = GEN_INT (disp);
3888      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3889      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3890
3891      emit_move_insn (tmpreg, high);
3892      src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3893    }
3894
3895  emit_move_insn (dest, src);
3896}
3897
3898/* Update the total code bytes output to the text section.  */
3899
3900static void
3901update_total_code_bytes (int nbytes)
3902{
3903  if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3904      && !IN_NAMED_SECTION_P (cfun->decl))
3905    {
3906      if (INSN_ADDRESSES_SET_P ())
3907	{
3908	  unsigned long old_total = total_code_bytes;
3909
3910	  total_code_bytes += nbytes;
3911
3912	  /* Be prepared to handle overflows.  */
3913	  if (old_total > total_code_bytes)
3914	    total_code_bytes = -1;
3915	}
3916      else
3917	total_code_bytes = -1;
3918    }
3919}
3920
3921/* This function generates the assembly code for function exit.
3922   Args are as for output_function_prologue ().
3923
3924   The function epilogue should not depend on the current stack
3925   pointer!  It should use the frame pointer only.  This is mandatory
3926   because of alloca; we also take advantage of it to omit stack
3927   adjustments before returning.  */
3928
3929static void
3930pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3931{
3932  rtx insn = get_last_insn ();
3933
3934  last_address = 0;
3935
3936  /* hppa_expand_epilogue does the dirty work now.  We just need
3937     to output the assembler directives which denote the end
3938     of a function.
3939
3940     To make debuggers happy, emit a nop if the epilogue was completely
3941     eliminated due to a volatile call as the last insn in the
3942     current function.  That way the return address (in %r2) will
3943     always point to a valid instruction in the current function.  */
3944
3945  /* Get the last real insn.  */
3946  if (GET_CODE (insn) == NOTE)
3947    insn = prev_real_insn (insn);
3948
3949  /* If it is a sequence, then look inside.  */
3950  if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3951    insn = XVECEXP (PATTERN (insn), 0, 0);
3952
3953  /* If insn is a CALL_INSN, then it must be a call to a volatile
3954     function (otherwise there would be epilogue insns).  */
3955  if (insn && GET_CODE (insn) == CALL_INSN)
3956    {
3957      fputs ("\tnop\n", file);
3958      last_address += 4;
3959    }
3960
3961  fputs ("\t.EXIT\n\t.PROCEND\n", file);
3962
3963  if (TARGET_SOM && TARGET_GAS)
3964    {
3965      /* We done with this subspace except possibly for some additional
3966	 debug information.  Forget that we are in this subspace to ensure
3967	 that the next function is output in its own subspace.  */
3968      forget_section ();
3969    }
3970
3971  if (INSN_ADDRESSES_SET_P ())
3972    {
3973      insn = get_last_nonnote_insn ();
3974      last_address += INSN_ADDRESSES (INSN_UID (insn));
3975      if (INSN_P (insn))
3976	last_address += insn_default_length (insn);
3977      last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
3978		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
3979    }
3980
3981  /* Finally, update the total number of code bytes output so far.  */
3982  update_total_code_bytes (last_address);
3983}
3984
3985void
3986hppa_expand_epilogue (void)
3987{
3988  rtx tmpreg;
3989  HOST_WIDE_INT offset;
3990  HOST_WIDE_INT ret_off = 0;
3991  int i;
3992  int merge_sp_adjust_with_load = 0;
3993
3994  /* We will use this often.  */
3995  tmpreg = gen_rtx_REG (word_mode, 1);
3996
3997  /* Try to restore RP early to avoid load/use interlocks when
3998     RP gets used in the return (bv) instruction.  This appears to still
3999     be necessary even when we schedule the prologue and epilogue.  */
4000  if (regs_ever_live [2] || current_function_calls_eh_return)
4001    {
4002      ret_off = TARGET_64BIT ? -16 : -20;
4003      if (frame_pointer_needed)
4004	{
4005	  load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4006	  ret_off = 0;
4007	}
4008      else
4009	{
4010	  /* No frame pointer, and stack is smaller than 8k.  */
4011	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4012	    {
4013	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4014	      ret_off = 0;
4015	    }
4016	}
4017    }
4018
4019  /* General register restores.  */
4020  if (frame_pointer_needed)
4021    {
4022      offset = local_fsize;
4023
4024      /* If the current function calls __builtin_eh_return, then we need
4025         to restore the saved EH data registers.  */
4026      if (DO_FRAME_NOTES && current_function_calls_eh_return)
4027	{
4028	  unsigned int i, regno;
4029
4030	  for (i = 0; ; ++i)
4031	    {
4032	      regno = EH_RETURN_DATA_REGNO (i);
4033	      if (regno == INVALID_REGNUM)
4034		break;
4035
4036	      load_reg (regno, offset, FRAME_POINTER_REGNUM);
4037	      offset += UNITS_PER_WORD;
4038	    }
4039	}
4040
4041      for (i = 18; i >= 4; i--)
4042	if (regs_ever_live[i] && ! call_used_regs[i])
4043	  {
4044	    load_reg (i, offset, FRAME_POINTER_REGNUM);
4045	    offset += UNITS_PER_WORD;
4046	  }
4047    }
4048  else
4049    {
4050      offset = local_fsize - actual_fsize;
4051
4052      /* If the current function calls __builtin_eh_return, then we need
4053         to restore the saved EH data registers.  */
4054      if (DO_FRAME_NOTES && current_function_calls_eh_return)
4055	{
4056	  unsigned int i, regno;
4057
4058	  for (i = 0; ; ++i)
4059	    {
4060	      regno = EH_RETURN_DATA_REGNO (i);
4061	      if (regno == INVALID_REGNUM)
4062		break;
4063
4064	      /* Only for the first load.
4065	         merge_sp_adjust_with_load holds the register load
4066	         with which we will merge the sp adjustment.  */
4067	      if (merge_sp_adjust_with_load == 0
4068		  && local_fsize == 0
4069		  && VAL_14_BITS_P (-actual_fsize))
4070	        merge_sp_adjust_with_load = regno;
4071	      else
4072		load_reg (regno, offset, STACK_POINTER_REGNUM);
4073	      offset += UNITS_PER_WORD;
4074	    }
4075	}
4076
4077      for (i = 18; i >= 3; i--)
4078	{
4079	  if (regs_ever_live[i] && ! call_used_regs[i])
4080	    {
4081	      /* Only for the first load.
4082	         merge_sp_adjust_with_load holds the register load
4083	         with which we will merge the sp adjustment.  */
4084	      if (merge_sp_adjust_with_load == 0
4085		  && local_fsize == 0
4086		  && VAL_14_BITS_P (-actual_fsize))
4087	        merge_sp_adjust_with_load = i;
4088	      else
4089		load_reg (i, offset, STACK_POINTER_REGNUM);
4090	      offset += UNITS_PER_WORD;
4091	    }
4092	}
4093    }
4094
4095  /* Align pointer properly (doubleword boundary).  */
4096  offset = (offset + 7) & ~7;
4097
4098  /* FP register restores.  */
4099  if (save_fregs)
4100    {
4101      /* Adjust the register to index off of.  */
4102      if (frame_pointer_needed)
4103	set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4104      else
4105	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4106
4107      /* Actually do the restores now.  */
4108      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4109	if (regs_ever_live[i]
4110	    || (! TARGET_64BIT && regs_ever_live[i + 1]))
4111	  {
4112	    rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4113	    rtx dest = gen_rtx_REG (DFmode, i);
4114	    emit_move_insn (dest, src);
4115	  }
4116    }
4117
4118  /* Emit a blockage insn here to keep these insns from being moved to
4119     an earlier spot in the epilogue, or into the main instruction stream.
4120
4121     This is necessary as we must not cut the stack back before all the
4122     restores are finished.  */
4123  emit_insn (gen_blockage ());
4124
4125  /* Reset stack pointer (and possibly frame pointer).  The stack
4126     pointer is initially set to fp + 64 to avoid a race condition.  */
4127  if (frame_pointer_needed)
4128    {
4129      rtx delta = GEN_INT (-64);
4130
4131      set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4132      emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4133    }
4134  /* If we were deferring a callee register restore, do it now.  */
4135  else if (merge_sp_adjust_with_load)
4136    {
4137      rtx delta = GEN_INT (-actual_fsize);
4138      rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4139
4140      emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4141    }
4142  else if (actual_fsize != 0)
4143    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4144		    - actual_fsize, 0);
4145
4146  /* If we haven't restored %r2 yet (no frame pointer, and a stack
4147     frame greater than 8k), do so now.  */
4148  if (ret_off != 0)
4149    load_reg (2, ret_off, STACK_POINTER_REGNUM);
4150
4151  if (DO_FRAME_NOTES && current_function_calls_eh_return)
4152    {
4153      rtx sa = EH_RETURN_STACKADJ_RTX;
4154
4155      emit_insn (gen_blockage ());
4156      emit_insn (TARGET_64BIT
4157		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4158		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4159    }
4160}
4161
4162rtx
4163hppa_pic_save_rtx (void)
4164{
4165  return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4166}
4167
4168#ifndef NO_DEFERRED_PROFILE_COUNTERS
4169#define NO_DEFERRED_PROFILE_COUNTERS 0
4170#endif
4171
4172/* Define heap vector type for funcdef numbers.  */
4173DEF_VEC_I(int);
4174DEF_VEC_ALLOC_I(int,heap);
4175
4176/* Vector of funcdef numbers.  */
4177static VEC(int,heap) *funcdef_nos;
4178
4179/* Output deferred profile counters.  */
4180static void
4181output_deferred_profile_counters (void)
4182{
4183  unsigned int i;
4184  int align, n;
4185
4186  if (VEC_empty (int, funcdef_nos))
4187   return;
4188
4189  data_section ();
4190  align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4191  ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4192
4193  for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4194    {
4195      targetm.asm_out.internal_label (asm_out_file, "LP", n);
4196      assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4197    }
4198
4199  VEC_free (int, heap, funcdef_nos);
4200}
4201
4202void
4203hppa_profile_hook (int label_no)
4204{
4205  /* We use SImode for the address of the function in both 32 and
4206     64-bit code to avoid having to provide DImode versions of the
4207     lcla2 and load_offset_label_address insn patterns.  */
4208  rtx reg = gen_reg_rtx (SImode);
4209  rtx label_rtx = gen_label_rtx ();
4210  rtx begin_label_rtx, call_insn;
4211  char begin_label_name[16];
4212
4213  ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4214			       label_no);
4215  begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4216
4217  if (TARGET_64BIT)
4218    emit_move_insn (arg_pointer_rtx,
4219		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4220				  GEN_INT (64)));
4221
4222  emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4223
4224  /* The address of the function is loaded into %r25 with a instruction-
4225     relative sequence that avoids the use of relocations.  The sequence
4226     is split so that the load_offset_label_address instruction can
4227     occupy the delay slot of the call to _mcount.  */
4228  if (TARGET_PA_20)
4229    emit_insn (gen_lcla2 (reg, label_rtx));
4230  else
4231    emit_insn (gen_lcla1 (reg, label_rtx));
4232
4233  emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4234					    reg, begin_label_rtx, label_rtx));
4235
4236#if !NO_DEFERRED_PROFILE_COUNTERS
4237  {
4238    rtx count_label_rtx, addr, r24;
4239    char count_label_name[16];
4240
4241    VEC_safe_push (int, heap, funcdef_nos, label_no);
4242    ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4243    count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4244
4245    addr = force_reg (Pmode, count_label_rtx);
4246    r24 = gen_rtx_REG (Pmode, 24);
4247    emit_move_insn (r24, addr);
4248
4249    call_insn =
4250      emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4251					     gen_rtx_SYMBOL_REF (Pmode,
4252								 "_mcount")),
4253				GEN_INT (TARGET_64BIT ? 24 : 12)));
4254
4255    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4256  }
4257#else
4258
4259  call_insn =
4260    emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4261					   gen_rtx_SYMBOL_REF (Pmode,
4262							       "_mcount")),
4263			      GEN_INT (TARGET_64BIT ? 16 : 8)));
4264
4265#endif
4266
4267  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4268  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4269
4270  /* Indicate the _mcount call cannot throw, nor will it execute a
4271     non-local goto.  */
4272  REG_NOTES (call_insn)
4273    = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4274}
4275
4276/* Fetch the return address for the frame COUNT steps up from
4277   the current frame, after the prologue.  FRAMEADDR is the
4278   frame pointer of the COUNT frame.
4279
4280   We want to ignore any export stub remnants here.  To handle this,
4281   we examine the code at the return address, and if it is an export
4282   stub, we return a memory rtx for the stub return address stored
4283   at frame-24.
4284
4285   The value returned is used in two different ways:
4286
4287	1. To find a function's caller.
4288
4289	2. To change the return address for a function.
4290
4291   This function handles most instances of case 1; however, it will
4292   fail if there are two levels of stubs to execute on the return
4293   path.  The only way I believe that can happen is if the return value
4294   needs a parameter relocation, which never happens for C code.
4295
4296   This function handles most instances of case 2; however, it will
4297   fail if we did not originally have stub code on the return path
4298   but will need stub code on the new return path.  This can happen if
4299   the caller & callee are both in the main program, but the new
4300   return location is in a shared library.  */
4301
4302rtx
4303return_addr_rtx (int count, rtx frameaddr)
4304{
4305  rtx label;
4306  rtx rp;
4307  rtx saved_rp;
4308  rtx ins;
4309
4310  if (count != 0)
4311    return NULL_RTX;
4312
4313  rp = get_hard_reg_initial_val (Pmode, 2);
4314
4315  if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4316    return rp;
4317
4318  saved_rp = gen_reg_rtx (Pmode);
4319  emit_move_insn (saved_rp, rp);
4320
4321  /* Get pointer to the instruction stream.  We have to mask out the
4322     privilege level from the two low order bits of the return address
4323     pointer here so that ins will point to the start of the first
4324     instruction that would have been executed if we returned.  */
4325  ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4326  label = gen_label_rtx ();
4327
4328  /* Check the instruction stream at the normal return address for the
4329     export stub:
4330
4331	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4332	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4333	0x00011820 | stub+16:  mtsp r1,sr0
4334	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4335
4336     If it is an export stub, than our return address is really in
4337     -24[frameaddr].  */
4338
4339  emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4340		 NULL_RTX, SImode, 1);
4341  emit_jump_insn (gen_bne (label));
4342
4343  emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4344		 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4345  emit_jump_insn (gen_bne (label));
4346
4347  emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4348		 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4349  emit_jump_insn (gen_bne (label));
4350
4351  /* 0xe0400002 must be specified as -532676606 so that it won't be
4352     rejected as an invalid immediate operand on 64-bit hosts.  */
4353  emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4354		 GEN_INT (-532676606), NE, NULL_RTX, SImode, 1);
4355
4356  /* If there is no export stub then just use the value saved from
4357     the return pointer register.  */
4358
4359  emit_jump_insn (gen_bne (label));
4360
4361  /* Here we know that our return address points to an export
4362     stub.  We don't want to return the address of the export stub,
4363     but rather the return address of the export stub.  That return
4364     address is stored at -24[frameaddr].  */
4365
4366  emit_move_insn (saved_rp,
4367		  gen_rtx_MEM (Pmode,
4368			       memory_address (Pmode,
4369					       plus_constant (frameaddr,
4370							      -24))));
4371
4372  emit_label (label);
4373  return saved_rp;
4374}
4375
4376/* This is only valid once reload has completed because it depends on
4377   knowing exactly how much (if any) frame there is and...
4378
4379   It's only valid if there is no frame marker to de-allocate and...
4380
4381   It's only valid if %r2 hasn't been saved into the caller's frame
4382   (we're not profiling and %r2 isn't live anywhere).  */
4383int
4384hppa_can_use_return_insn_p (void)
4385{
4386  return (reload_completed
4387	  && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4388	  && ! regs_ever_live[2]
4389	  && ! frame_pointer_needed);
4390}
4391
4392void
4393emit_bcond_fp (enum rtx_code code, rtx operand0)
4394{
4395  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4396			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4397						     gen_rtx_fmt_ee (code,
4398							      VOIDmode,
4399							      gen_rtx_REG (CCFPmode, 0),
4400							      const0_rtx),
4401						     gen_rtx_LABEL_REF (VOIDmode, operand0),
4402						     pc_rtx)));
4403
4404}
4405
4406rtx
4407gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4408{
4409  return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4410		      gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4411}
4412
4413/* Adjust the cost of a scheduling dependency.  Return the new cost of
4414   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4415
4416static int
4417pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4418{
4419  enum attr_type attr_type;
4420
4421  /* Don't adjust costs for a pa8000 chip, also do not adjust any
4422     true dependencies as they are described with bypasses now.  */
4423  if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4424    return cost;
4425
4426  if (! recog_memoized (insn))
4427    return 0;
4428
4429  attr_type = get_attr_type (insn);
4430
4431  switch (REG_NOTE_KIND (link))
4432    {
4433    case REG_DEP_ANTI:
4434      /* Anti dependency; DEP_INSN reads a register that INSN writes some
4435	 cycles later.  */
4436
4437      if (attr_type == TYPE_FPLOAD)
4438	{
4439	  rtx pat = PATTERN (insn);
4440	  rtx dep_pat = PATTERN (dep_insn);
4441	  if (GET_CODE (pat) == PARALLEL)
4442	    {
4443	      /* This happens for the fldXs,mb patterns.  */
4444	      pat = XVECEXP (pat, 0, 0);
4445	    }
4446	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4447	    /* If this happens, we have to extend this to schedule
4448	       optimally.  Return 0 for now.  */
4449	  return 0;
4450
4451	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4452	    {
4453	      if (! recog_memoized (dep_insn))
4454		return 0;
4455	      switch (get_attr_type (dep_insn))
4456		{
4457		case TYPE_FPALU:
4458		case TYPE_FPMULSGL:
4459		case TYPE_FPMULDBL:
4460		case TYPE_FPDIVSGL:
4461		case TYPE_FPDIVDBL:
4462		case TYPE_FPSQRTSGL:
4463		case TYPE_FPSQRTDBL:
4464		  /* A fpload can't be issued until one cycle before a
4465		     preceding arithmetic operation has finished if
4466		     the target of the fpload is any of the sources
4467		     (or destination) of the arithmetic operation.  */
4468		  return insn_default_latency (dep_insn) - 1;
4469
4470		default:
4471		  return 0;
4472		}
4473	    }
4474	}
4475      else if (attr_type == TYPE_FPALU)
4476	{
4477	  rtx pat = PATTERN (insn);
4478	  rtx dep_pat = PATTERN (dep_insn);
4479	  if (GET_CODE (pat) == PARALLEL)
4480	    {
4481	      /* This happens for the fldXs,mb patterns.  */
4482	      pat = XVECEXP (pat, 0, 0);
4483	    }
4484	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4485	    /* If this happens, we have to extend this to schedule
4486	       optimally.  Return 0 for now.  */
4487	  return 0;
4488
4489	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4490	    {
4491	      if (! recog_memoized (dep_insn))
4492		return 0;
4493	      switch (get_attr_type (dep_insn))
4494		{
4495		case TYPE_FPDIVSGL:
4496		case TYPE_FPDIVDBL:
4497		case TYPE_FPSQRTSGL:
4498		case TYPE_FPSQRTDBL:
4499		  /* An ALU flop can't be issued until two cycles before a
4500		     preceding divide or sqrt operation has finished if
4501		     the target of the ALU flop is any of the sources
4502		     (or destination) of the divide or sqrt operation.  */
4503		  return insn_default_latency (dep_insn) - 2;
4504
4505		default:
4506		  return 0;
4507		}
4508	    }
4509	}
4510
4511      /* For other anti dependencies, the cost is 0.  */
4512      return 0;
4513
4514    case REG_DEP_OUTPUT:
4515      /* Output dependency; DEP_INSN writes a register that INSN writes some
4516	 cycles later.  */
4517      if (attr_type == TYPE_FPLOAD)
4518	{
4519	  rtx pat = PATTERN (insn);
4520	  rtx dep_pat = PATTERN (dep_insn);
4521	  if (GET_CODE (pat) == PARALLEL)
4522	    {
4523	      /* This happens for the fldXs,mb patterns.  */
4524	      pat = XVECEXP (pat, 0, 0);
4525	    }
4526	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4527	    /* If this happens, we have to extend this to schedule
4528	       optimally.  Return 0 for now.  */
4529	  return 0;
4530
4531	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4532	    {
4533	      if (! recog_memoized (dep_insn))
4534		return 0;
4535	      switch (get_attr_type (dep_insn))
4536		{
4537		case TYPE_FPALU:
4538		case TYPE_FPMULSGL:
4539		case TYPE_FPMULDBL:
4540		case TYPE_FPDIVSGL:
4541		case TYPE_FPDIVDBL:
4542		case TYPE_FPSQRTSGL:
4543		case TYPE_FPSQRTDBL:
4544		  /* A fpload can't be issued until one cycle before a
4545		     preceding arithmetic operation has finished if
4546		     the target of the fpload is the destination of the
4547		     arithmetic operation.
4548
4549		     Exception: For PA7100LC, PA7200 and PA7300, the cost
4550		     is 3 cycles, unless they bundle together.   We also
4551		     pay the penalty if the second insn is a fpload.  */
4552		  return insn_default_latency (dep_insn) - 1;
4553
4554		default:
4555		  return 0;
4556		}
4557	    }
4558	}
4559      else if (attr_type == TYPE_FPALU)
4560	{
4561	  rtx pat = PATTERN (insn);
4562	  rtx dep_pat = PATTERN (dep_insn);
4563	  if (GET_CODE (pat) == PARALLEL)
4564	    {
4565	      /* This happens for the fldXs,mb patterns.  */
4566	      pat = XVECEXP (pat, 0, 0);
4567	    }
4568	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4569	    /* If this happens, we have to extend this to schedule
4570	       optimally.  Return 0 for now.  */
4571	  return 0;
4572
4573	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4574	    {
4575	      if (! recog_memoized (dep_insn))
4576		return 0;
4577	      switch (get_attr_type (dep_insn))
4578		{
4579		case TYPE_FPDIVSGL:
4580		case TYPE_FPDIVDBL:
4581		case TYPE_FPSQRTSGL:
4582		case TYPE_FPSQRTDBL:
4583		  /* An ALU flop can't be issued until two cycles before a
4584		     preceding divide or sqrt operation has finished if
4585		     the target of the ALU flop is also the target of
4586		     the divide or sqrt operation.  */
4587		  return insn_default_latency (dep_insn) - 2;
4588
4589		default:
4590		  return 0;
4591		}
4592	    }
4593	}
4594
4595      /* For other output dependencies, the cost is 0.  */
4596      return 0;
4597
4598    default:
4599      gcc_unreachable ();
4600    }
4601}
4602
4603/* Adjust scheduling priorities.  We use this to try and keep addil
4604   and the next use of %r1 close together.  */
4605static int
4606pa_adjust_priority (rtx insn, int priority)
4607{
4608  rtx set = single_set (insn);
4609  rtx src, dest;
4610  if (set)
4611    {
4612      src = SET_SRC (set);
4613      dest = SET_DEST (set);
4614      if (GET_CODE (src) == LO_SUM
4615	  && symbolic_operand (XEXP (src, 1), VOIDmode)
4616	  && ! read_only_operand (XEXP (src, 1), VOIDmode))
4617	priority >>= 3;
4618
4619      else if (GET_CODE (src) == MEM
4620	       && GET_CODE (XEXP (src, 0)) == LO_SUM
4621	       && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4622	       && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4623	priority >>= 1;
4624
4625      else if (GET_CODE (dest) == MEM
4626	       && GET_CODE (XEXP (dest, 0)) == LO_SUM
4627	       && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4628	       && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4629	priority >>= 3;
4630    }
4631  return priority;
4632}
4633
4634/* The 700 can only issue a single insn at a time.
4635   The 7XXX processors can issue two insns at a time.
4636   The 8000 can issue 4 insns at a time.  */
4637static int
4638pa_issue_rate (void)
4639{
4640  switch (pa_cpu)
4641    {
4642    case PROCESSOR_700:		return 1;
4643    case PROCESSOR_7100:	return 2;
4644    case PROCESSOR_7100LC:	return 2;
4645    case PROCESSOR_7200:	return 2;
4646    case PROCESSOR_7300:	return 2;
4647    case PROCESSOR_8000:	return 4;
4648
4649    default:
4650      gcc_unreachable ();
4651    }
4652}
4653
4654
4655
4656/* Return any length adjustment needed by INSN which already has its length
4657   computed as LENGTH.   Return zero if no adjustment is necessary.
4658
4659   For the PA: function calls, millicode calls, and backwards short
4660   conditional branches with unfilled delay slots need an adjustment by +1
4661   (to account for the NOP which will be inserted into the instruction stream).
4662
4663   Also compute the length of an inline block move here as it is too
4664   complicated to express as a length attribute in pa.md.  */
4665int
4666pa_adjust_insn_length (rtx insn, int length)
4667{
4668  rtx pat = PATTERN (insn);
4669
4670  /* Jumps inside switch tables which have unfilled delay slots need
4671     adjustment.  */
4672  if (GET_CODE (insn) == JUMP_INSN
4673      && GET_CODE (pat) == PARALLEL
4674      && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4675    return 4;
4676  /* Millicode insn with an unfilled delay slot.  */
4677  else if (GET_CODE (insn) == INSN
4678	   && GET_CODE (pat) != SEQUENCE
4679	   && GET_CODE (pat) != USE
4680	   && GET_CODE (pat) != CLOBBER
4681	   && get_attr_type (insn) == TYPE_MILLI)
4682    return 4;
4683  /* Block move pattern.  */
4684  else if (GET_CODE (insn) == INSN
4685	   && GET_CODE (pat) == PARALLEL
4686	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4687	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4688	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4689	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4690	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4691    return compute_movmem_length (insn) - 4;
4692  /* Block clear pattern.  */
4693  else if (GET_CODE (insn) == INSN
4694	   && GET_CODE (pat) == PARALLEL
4695	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4696	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4697	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4698	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4699    return compute_clrmem_length (insn) - 4;
4700  /* Conditional branch with an unfilled delay slot.  */
4701  else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4702    {
4703      /* Adjust a short backwards conditional with an unfilled delay slot.  */
4704      if (GET_CODE (pat) == SET
4705	  && length == 4
4706	  && ! forward_branch_p (insn))
4707	return 4;
4708      else if (GET_CODE (pat) == PARALLEL
4709	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4710	       && length == 4)
4711	return 4;
4712      /* Adjust dbra insn with short backwards conditional branch with
4713	 unfilled delay slot -- only for case where counter is in a
4714	 general register register.  */
4715      else if (GET_CODE (pat) == PARALLEL
4716	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4717	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4718 	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4719	       && length == 4
4720	       && ! forward_branch_p (insn))
4721	return 4;
4722      else
4723	return 0;
4724    }
4725  return 0;
4726}
4727
4728/* Print operand X (an rtx) in assembler syntax to file FILE.
4729   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4730   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
4731
4732void
4733print_operand (FILE *file, rtx x, int code)
4734{
4735  switch (code)
4736    {
4737    case '#':
4738      /* Output a 'nop' if there's nothing for the delay slot.  */
4739      if (dbr_sequence_length () == 0)
4740	fputs ("\n\tnop", file);
4741      return;
4742    case '*':
4743      /* Output a nullification completer if there's nothing for the */
4744      /* delay slot or nullification is requested.  */
4745      if (dbr_sequence_length () == 0 ||
4746	  (final_sequence &&
4747	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4748        fputs (",n", file);
4749      return;
4750    case 'R':
4751      /* Print out the second register name of a register pair.
4752	 I.e., R (6) => 7.  */
4753      fputs (reg_names[REGNO (x) + 1], file);
4754      return;
4755    case 'r':
4756      /* A register or zero.  */
4757      if (x == const0_rtx
4758	  || (x == CONST0_RTX (DFmode))
4759	  || (x == CONST0_RTX (SFmode)))
4760	{
4761	  fputs ("%r0", file);
4762	  return;
4763	}
4764      else
4765	break;
4766    case 'f':
4767      /* A register or zero (floating point).  */
4768      if (x == const0_rtx
4769	  || (x == CONST0_RTX (DFmode))
4770	  || (x == CONST0_RTX (SFmode)))
4771	{
4772	  fputs ("%fr0", file);
4773	  return;
4774	}
4775      else
4776	break;
4777    case 'A':
4778      {
4779	rtx xoperands[2];
4780
4781	xoperands[0] = XEXP (XEXP (x, 0), 0);
4782	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4783	output_global_address (file, xoperands[1], 0);
4784        fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4785	return;
4786      }
4787
4788    case 'C':			/* Plain (C)ondition */
4789    case 'X':
4790      switch (GET_CODE (x))
4791	{
4792	case EQ:
4793	  fputs ("=", file);  break;
4794	case NE:
4795	  fputs ("<>", file);  break;
4796	case GT:
4797	  fputs (">", file);  break;
4798	case GE:
4799	  fputs (">=", file);  break;
4800	case GEU:
4801	  fputs (">>=", file);  break;
4802	case GTU:
4803	  fputs (">>", file);  break;
4804	case LT:
4805	  fputs ("<", file);  break;
4806	case LE:
4807	  fputs ("<=", file);  break;
4808	case LEU:
4809	  fputs ("<<=", file);  break;
4810	case LTU:
4811	  fputs ("<<", file);  break;
4812	default:
4813	  gcc_unreachable ();
4814	}
4815      return;
4816    case 'N':			/* Condition, (N)egated */
4817      switch (GET_CODE (x))
4818	{
4819	case EQ:
4820	  fputs ("<>", file);  break;
4821	case NE:
4822	  fputs ("=", file);  break;
4823	case GT:
4824	  fputs ("<=", file);  break;
4825	case GE:
4826	  fputs ("<", file);  break;
4827	case GEU:
4828	  fputs ("<<", file);  break;
4829	case GTU:
4830	  fputs ("<<=", file);  break;
4831	case LT:
4832	  fputs (">=", file);  break;
4833	case LE:
4834	  fputs (">", file);  break;
4835	case LEU:
4836	  fputs (">>", file);  break;
4837	case LTU:
4838	  fputs (">>=", file);  break;
4839	default:
4840	  gcc_unreachable ();
4841	}
4842      return;
4843    /* For floating point comparisons.  Note that the output
4844       predicates are the complement of the desired mode.  The
4845       conditions for GT, GE, LT, LE and LTGT cause an invalid
4846       operation exception if the result is unordered and this
4847       exception is enabled in the floating-point status register.  */
4848    case 'Y':
4849      switch (GET_CODE (x))
4850	{
4851	case EQ:
4852	  fputs ("!=", file);  break;
4853	case NE:
4854	  fputs ("=", file);  break;
4855	case GT:
4856	  fputs ("!>", file);  break;
4857	case GE:
4858	  fputs ("!>=", file);  break;
4859	case LT:
4860	  fputs ("!<", file);  break;
4861	case LE:
4862	  fputs ("!<=", file);  break;
4863	case LTGT:
4864	  fputs ("!<>", file);  break;
4865	case UNLE:
4866	  fputs ("!?<=", file);  break;
4867	case UNLT:
4868	  fputs ("!?<", file);  break;
4869	case UNGE:
4870	  fputs ("!?>=", file);  break;
4871	case UNGT:
4872	  fputs ("!?>", file);  break;
4873	case UNEQ:
4874	  fputs ("!?=", file);  break;
4875	case UNORDERED:
4876	  fputs ("!?", file);  break;
4877	case ORDERED:
4878	  fputs ("?", file);  break;
4879	default:
4880	  gcc_unreachable ();
4881	}
4882      return;
4883    case 'S':			/* Condition, operands are (S)wapped.  */
4884      switch (GET_CODE (x))
4885	{
4886	case EQ:
4887	  fputs ("=", file);  break;
4888	case NE:
4889	  fputs ("<>", file);  break;
4890	case GT:
4891	  fputs ("<", file);  break;
4892	case GE:
4893	  fputs ("<=", file);  break;
4894	case GEU:
4895	  fputs ("<<=", file);  break;
4896	case GTU:
4897	  fputs ("<<", file);  break;
4898	case LT:
4899	  fputs (">", file);  break;
4900	case LE:
4901	  fputs (">=", file);  break;
4902	case LEU:
4903	  fputs (">>=", file);  break;
4904	case LTU:
4905	  fputs (">>", file);  break;
4906	default:
4907	  gcc_unreachable ();
4908	}
4909      return;
4910    case 'B':			/* Condition, (B)oth swapped and negate.  */
4911      switch (GET_CODE (x))
4912	{
4913	case EQ:
4914	  fputs ("<>", file);  break;
4915	case NE:
4916	  fputs ("=", file);  break;
4917	case GT:
4918	  fputs (">=", file);  break;
4919	case GE:
4920	  fputs (">", file);  break;
4921	case GEU:
4922	  fputs (">>", file);  break;
4923	case GTU:
4924	  fputs (">>=", file);  break;
4925	case LT:
4926	  fputs ("<=", file);  break;
4927	case LE:
4928	  fputs ("<", file);  break;
4929	case LEU:
4930	  fputs ("<<", file);  break;
4931	case LTU:
4932	  fputs ("<<=", file);  break;
4933	default:
4934	  gcc_unreachable ();
4935	}
4936      return;
4937    case 'k':
4938      gcc_assert (GET_CODE (x) == CONST_INT);
4939      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4940      return;
4941    case 'Q':
4942      gcc_assert (GET_CODE (x) == CONST_INT);
4943      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4944      return;
4945    case 'L':
4946      gcc_assert (GET_CODE (x) == CONST_INT);
4947      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4948      return;
4949    case 'O':
4950      gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
4951      fprintf (file, "%d", exact_log2 (INTVAL (x)));
4952      return;
4953    case 'p':
4954      gcc_assert (GET_CODE (x) == CONST_INT);
4955      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4956      return;
4957    case 'P':
4958      gcc_assert (GET_CODE (x) == CONST_INT);
4959      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4960      return;
4961    case 'I':
4962      if (GET_CODE (x) == CONST_INT)
4963	fputs ("i", file);
4964      return;
4965    case 'M':
4966    case 'F':
4967      switch (GET_CODE (XEXP (x, 0)))
4968	{
4969	case PRE_DEC:
4970	case PRE_INC:
4971	  if (ASSEMBLER_DIALECT == 0)
4972	    fputs ("s,mb", file);
4973	  else
4974	    fputs (",mb", file);
4975	  break;
4976	case POST_DEC:
4977	case POST_INC:
4978	  if (ASSEMBLER_DIALECT == 0)
4979	    fputs ("s,ma", file);
4980	  else
4981	    fputs (",ma", file);
4982	  break;
4983	case PLUS:
4984	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
4985	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
4986	    {
4987	      if (ASSEMBLER_DIALECT == 0)
4988		fputs ("x", file);
4989	    }
4990	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4991		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4992	    {
4993	      if (ASSEMBLER_DIALECT == 0)
4994		fputs ("x,s", file);
4995	      else
4996		fputs (",s", file);
4997	    }
4998	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4999	    fputs ("s", file);
5000	  break;
5001	default:
5002	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5003	    fputs ("s", file);
5004	  break;
5005	}
5006      return;
5007    case 'G':
5008      output_global_address (file, x, 0);
5009      return;
5010    case 'H':
5011      output_global_address (file, x, 1);
5012      return;
5013    case 0:			/* Don't do anything special */
5014      break;
5015    case 'Z':
5016      {
5017	unsigned op[3];
5018	compute_zdepwi_operands (INTVAL (x), op);
5019	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5020	return;
5021      }
5022    case 'z':
5023      {
5024	unsigned op[3];
5025	compute_zdepdi_operands (INTVAL (x), op);
5026	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5027	return;
5028      }
5029    case 'c':
5030      /* We can get here from a .vtable_inherit due to our
5031	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5032	 addresses.  */
5033      break;
5034    default:
5035      gcc_unreachable ();
5036    }
5037  if (GET_CODE (x) == REG)
5038    {
5039      fputs (reg_names [REGNO (x)], file);
5040      if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5041	{
5042	  fputs ("R", file);
5043	  return;
5044	}
5045      if (FP_REG_P (x)
5046	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5047	  && (REGNO (x) & 1) == 0)
5048	fputs ("L", file);
5049    }
5050  else if (GET_CODE (x) == MEM)
5051    {
5052      int size = GET_MODE_SIZE (GET_MODE (x));
5053      rtx base = NULL_RTX;
5054      switch (GET_CODE (XEXP (x, 0)))
5055	{
5056	case PRE_DEC:
5057	case POST_DEC:
5058          base = XEXP (XEXP (x, 0), 0);
5059	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5060	  break;
5061	case PRE_INC:
5062	case POST_INC:
5063          base = XEXP (XEXP (x, 0), 0);
5064	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5065	  break;
5066	case PLUS:
5067	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5068	    fprintf (file, "%s(%s)",
5069		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5070		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5071	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5072	    fprintf (file, "%s(%s)",
5073		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5074		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5075	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5076		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5077	    {
5078	      /* Because the REG_POINTER flag can get lost during reload,
5079		 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5080		 index and base registers in the combined move patterns.  */
5081	      rtx base = XEXP (XEXP (x, 0), 1);
5082	      rtx index = XEXP (XEXP (x, 0), 0);
5083
5084	      fprintf (file, "%s(%s)",
5085		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5086	    }
5087	  else
5088	    output_address (XEXP (x, 0));
5089	  break;
5090	default:
5091	  output_address (XEXP (x, 0));
5092	  break;
5093	}
5094    }
5095  else
5096    output_addr_const (file, x);
5097}
5098
5099/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5100
5101void
5102output_global_address (FILE *file, rtx x, int round_constant)
5103{
5104
5105  /* Imagine  (high (const (plus ...))).  */
5106  if (GET_CODE (x) == HIGH)
5107    x = XEXP (x, 0);
5108
5109  if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5110    output_addr_const (file, x);
5111  else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5112    {
5113      output_addr_const (file, x);
5114      fputs ("-$global$", file);
5115    }
5116  else if (GET_CODE (x) == CONST)
5117    {
5118      const char *sep = "";
5119      int offset = 0;		/* assembler wants -$global$ at end */
5120      rtx base = NULL_RTX;
5121
5122      switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5123	{
5124	case SYMBOL_REF:
5125	  base = XEXP (XEXP (x, 0), 0);
5126	  output_addr_const (file, base);
5127	  break;
5128	case CONST_INT:
5129	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5130	  break;
5131	default:
5132	  gcc_unreachable ();
5133	}
5134
5135      switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5136	{
5137	case SYMBOL_REF:
5138	  base = XEXP (XEXP (x, 0), 1);
5139	  output_addr_const (file, base);
5140	  break;
5141	case CONST_INT:
5142	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5143	  break;
5144	default:
5145	  gcc_unreachable ();
5146	}
5147
5148      /* How bogus.  The compiler is apparently responsible for
5149	 rounding the constant if it uses an LR field selector.
5150
5151	 The linker and/or assembler seem a better place since
5152	 they have to do this kind of thing already.
5153
5154	 If we fail to do this, HP's optimizing linker may eliminate
5155	 an addil, but not update the ldw/stw/ldo instruction that
5156	 uses the result of the addil.  */
5157      if (round_constant)
5158	offset = ((offset + 0x1000) & ~0x1fff);
5159
5160      switch (GET_CODE (XEXP (x, 0)))
5161	{
5162	case PLUS:
5163	  if (offset < 0)
5164	    {
5165	      offset = -offset;
5166	      sep = "-";
5167	    }
5168	  else
5169	    sep = "+";
5170	  break;
5171
5172	case MINUS:
5173	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5174	  sep = "-";
5175	  break;
5176
5177	default:
5178	  gcc_unreachable ();
5179	}
5180
5181      if (!read_only_operand (base, VOIDmode) && !flag_pic)
5182	fputs ("-$global$", file);
5183      if (offset)
5184	fprintf (file, "%s%d", sep, offset);
5185    }
5186  else
5187    output_addr_const (file, x);
5188}
5189
5190/* Output boilerplate text to appear at the beginning of the file.
5191   There are several possible versions.  */
5192#define aputs(x) fputs(x, asm_out_file)
5193static inline void
5194pa_file_start_level (void)
5195{
5196  if (TARGET_64BIT)
5197    aputs ("\t.LEVEL 2.0w\n");
5198  else if (TARGET_PA_20)
5199    aputs ("\t.LEVEL 2.0\n");
5200  else if (TARGET_PA_11)
5201    aputs ("\t.LEVEL 1.1\n");
5202  else
5203    aputs ("\t.LEVEL 1.0\n");
5204}
5205
5206static inline void
5207pa_file_start_space (int sortspace)
5208{
5209  aputs ("\t.SPACE $PRIVATE$");
5210  if (sortspace)
5211    aputs (",SORT=16");
5212  aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5213         "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5214         "\n\t.SPACE $TEXT$");
5215  if (sortspace)
5216    aputs (",SORT=8");
5217  aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5218         "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5219}
5220
5221static inline void
5222pa_file_start_file (int want_version)
5223{
5224  if (write_symbols != NO_DEBUG)
5225    {
5226      output_file_directive (asm_out_file, main_input_filename);
5227      if (want_version)
5228	aputs ("\t.version\t\"01.01\"\n");
5229    }
5230}
5231
5232static inline void
5233pa_file_start_mcount (const char *aswhat)
5234{
5235  if (profile_flag)
5236    fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5237}
5238
5239static void
5240pa_elf_file_start (void)
5241{
5242  pa_file_start_level ();
5243  pa_file_start_mcount ("ENTRY");
5244  pa_file_start_file (0);
5245}
5246
5247static void
5248pa_som_file_start (void)
5249{
5250  pa_file_start_level ();
5251  pa_file_start_space (0);
5252  aputs ("\t.IMPORT $global$,DATA\n"
5253         "\t.IMPORT $$dyncall,MILLICODE\n");
5254  pa_file_start_mcount ("CODE");
5255  pa_file_start_file (0);
5256}
5257
5258static void
5259pa_linux_file_start (void)
5260{
5261  pa_file_start_file (0);
5262  pa_file_start_level ();
5263  pa_file_start_mcount ("CODE");
5264}
5265
5266static void
5267pa_hpux64_gas_file_start (void)
5268{
5269  pa_file_start_level ();
5270#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5271  if (profile_flag)
5272    ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5273#endif
5274  pa_file_start_file (1);
5275}
5276
5277static void
5278pa_hpux64_hpas_file_start (void)
5279{
5280  pa_file_start_level ();
5281  pa_file_start_space (1);
5282  pa_file_start_mcount ("CODE");
5283  pa_file_start_file (0);
5284}
5285#undef aputs
5286
5287/* Search the deferred plabel list for SYMBOL and return its internal
5288   label.  If an entry for SYMBOL is not found, a new entry is created.  */
5289
5290rtx
5291get_deferred_plabel (rtx symbol)
5292{
5293  const char *fname = XSTR (symbol, 0);
5294  size_t i;
5295
5296  /* See if we have already put this function on the list of deferred
5297     plabels.  This list is generally small, so a liner search is not
5298     too ugly.  If it proves too slow replace it with something faster.  */
5299  for (i = 0; i < n_deferred_plabels; i++)
5300    if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5301      break;
5302
5303  /* If the deferred plabel list is empty, or this entry was not found
5304     on the list, create a new entry on the list.  */
5305  if (deferred_plabels == NULL || i == n_deferred_plabels)
5306    {
5307      tree id;
5308
5309      if (deferred_plabels == 0)
5310	deferred_plabels = (struct deferred_plabel *)
5311	  ggc_alloc (sizeof (struct deferred_plabel));
5312      else
5313	deferred_plabels = (struct deferred_plabel *)
5314	  ggc_realloc (deferred_plabels,
5315		       ((n_deferred_plabels + 1)
5316			* sizeof (struct deferred_plabel)));
5317
5318      i = n_deferred_plabels++;
5319      deferred_plabels[i].internal_label = gen_label_rtx ();
5320      deferred_plabels[i].symbol = symbol;
5321
5322      /* Gross.  We have just implicitly taken the address of this
5323	 function.  Mark it in the same manner as assemble_name.  */
5324      id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5325      if (id)
5326	mark_referenced (id);
5327    }
5328
5329  return deferred_plabels[i].internal_label;
5330}
5331
5332static void
5333output_deferred_plabels (void)
5334{
5335  size_t i;
5336  /* If we have deferred plabels, then we need to switch into the data
5337     section and align it to a 4 byte boundary before we output the
5338     deferred plabels.  */
5339  if (n_deferred_plabels)
5340    {
5341      data_section ();
5342      ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5343    }
5344
5345  /* Now output the deferred plabels.  */
5346  for (i = 0; i < n_deferred_plabels; i++)
5347    {
5348      (*targetm.asm_out.internal_label) (asm_out_file, "L",
5349		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5350      assemble_integer (deferred_plabels[i].symbol,
5351			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5352    }
5353}
5354
5355#ifdef HPUX_LONG_DOUBLE_LIBRARY
5356/* Initialize optabs to point to HPUX long double emulation routines.  */
5357static void
5358pa_hpux_init_libfuncs (void)
5359{
5360  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5361  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5362  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5363  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5364  set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5365  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5366  set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5367  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5368  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5369
5370  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5371  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5372  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5373  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5374  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5375  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5376  set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5377
5378  set_conv_libfunc (sext_optab,   TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5379  set_conv_libfunc (sext_optab,   TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5380  set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5381  set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5382
5383  set_conv_libfunc (sfix_optab,   SImode, TFmode, TARGET_64BIT
5384						  ? "__U_Qfcnvfxt_quad_to_sgl"
5385						  : "_U_Qfcnvfxt_quad_to_sgl");
5386  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5387  set_conv_libfunc (ufix_optab,   SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5388  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5389
5390  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5391  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5392}
5393#endif
5394
5395/* HP's millicode routines mean something special to the assembler.
5396   Keep track of which ones we have used.  */
5397
5398enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5399static void import_milli (enum millicodes);
5400static char imported[(int) end1000];
5401static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5402static const char import_string[] = ".IMPORT $$....,MILLICODE";
5403#define MILLI_START 10
5404
5405static void
5406import_milli (enum millicodes code)
5407{
5408  char str[sizeof (import_string)];
5409
5410  if (!imported[(int) code])
5411    {
5412      imported[(int) code] = 1;
5413      strcpy (str, import_string);
5414      strncpy (str + MILLI_START, milli_names[(int) code], 4);
5415      output_asm_insn (str, 0);
5416    }
5417}
5418
5419/* The register constraints have put the operands and return value in
5420   the proper registers.  */
5421
5422const char *
5423output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5424{
5425  import_milli (mulI);
5426  return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5427}
5428
5429/* Emit the rtl for doing a division by a constant.  */
5430
5431/* Do magic division millicodes exist for this value? */
5432const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5433
5434/* We'll use an array to keep track of the magic millicodes and
5435   whether or not we've used them already. [n][0] is signed, [n][1] is
5436   unsigned.  */
5437
5438static int div_milli[16][2];
5439
5440int
5441emit_hpdiv_const (rtx *operands, int unsignedp)
5442{
5443  if (GET_CODE (operands[2]) == CONST_INT
5444      && INTVAL (operands[2]) > 0
5445      && INTVAL (operands[2]) < 16
5446      && magic_milli[INTVAL (operands[2])])
5447    {
5448      rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5449
5450      emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5451      emit
5452	(gen_rtx_PARALLEL
5453	 (VOIDmode,
5454	  gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5455				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5456						     SImode,
5457						     gen_rtx_REG (SImode, 26),
5458						     operands[2])),
5459		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5460		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5461		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5462		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5463		     gen_rtx_CLOBBER (VOIDmode, ret))));
5464      emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5465      return 1;
5466    }
5467  return 0;
5468}
5469
5470const char *
5471output_div_insn (rtx *operands, int unsignedp, rtx insn)
5472{
5473  HOST_WIDE_INT divisor;
5474
5475  /* If the divisor is a constant, try to use one of the special
5476     opcodes .*/
5477  if (GET_CODE (operands[0]) == CONST_INT)
5478    {
5479      static char buf[100];
5480      divisor = INTVAL (operands[0]);
5481      if (!div_milli[divisor][unsignedp])
5482	{
5483	  div_milli[divisor][unsignedp] = 1;
5484	  if (unsignedp)
5485	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5486	  else
5487	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5488	}
5489      if (unsignedp)
5490	{
5491	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5492		   INTVAL (operands[0]));
5493	  return output_millicode_call (insn,
5494					gen_rtx_SYMBOL_REF (SImode, buf));
5495	}
5496      else
5497	{
5498	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5499		   INTVAL (operands[0]));
5500	  return output_millicode_call (insn,
5501					gen_rtx_SYMBOL_REF (SImode, buf));
5502	}
5503    }
5504  /* Divisor isn't a special constant.  */
5505  else
5506    {
5507      if (unsignedp)
5508	{
5509	  import_milli (divU);
5510	  return output_millicode_call (insn,
5511					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5512	}
5513      else
5514	{
5515	  import_milli (divI);
5516	  return output_millicode_call (insn,
5517					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5518	}
5519    }
5520}
5521
5522/* Output a $$rem millicode to do mod.  */
5523
5524const char *
5525output_mod_insn (int unsignedp, rtx insn)
5526{
5527  if (unsignedp)
5528    {
5529      import_milli (remU);
5530      return output_millicode_call (insn,
5531				    gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5532    }
5533  else
5534    {
5535      import_milli (remI);
5536      return output_millicode_call (insn,
5537				    gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5538    }
5539}
5540
5541void
5542output_arg_descriptor (rtx call_insn)
5543{
5544  const char *arg_regs[4];
5545  enum machine_mode arg_mode;
5546  rtx link;
5547  int i, output_flag = 0;
5548  int regno;
5549
5550  /* We neither need nor want argument location descriptors for the
5551     64bit runtime environment or the ELF32 environment.  */
5552  if (TARGET_64BIT || TARGET_ELF32)
5553    return;
5554
5555  for (i = 0; i < 4; i++)
5556    arg_regs[i] = 0;
5557
5558  /* Specify explicitly that no argument relocations should take place
5559     if using the portable runtime calling conventions.  */
5560  if (TARGET_PORTABLE_RUNTIME)
5561    {
5562      fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5563	     asm_out_file);
5564      return;
5565    }
5566
5567  gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5568  for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5569       link; link = XEXP (link, 1))
5570    {
5571      rtx use = XEXP (link, 0);
5572
5573      if (! (GET_CODE (use) == USE
5574	     && GET_CODE (XEXP (use, 0)) == REG
5575	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5576	continue;
5577
5578      arg_mode = GET_MODE (XEXP (use, 0));
5579      regno = REGNO (XEXP (use, 0));
5580      if (regno >= 23 && regno <= 26)
5581	{
5582	  arg_regs[26 - regno] = "GR";
5583	  if (arg_mode == DImode)
5584	    arg_regs[25 - regno] = "GR";
5585	}
5586      else if (regno >= 32 && regno <= 39)
5587	{
5588	  if (arg_mode == SFmode)
5589	    arg_regs[(regno - 32) / 2] = "FR";
5590	  else
5591	    {
5592#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5593	      arg_regs[(regno - 34) / 2] = "FR";
5594	      arg_regs[(regno - 34) / 2 + 1] = "FU";
5595#else
5596	      arg_regs[(regno - 34) / 2] = "FU";
5597	      arg_regs[(regno - 34) / 2 + 1] = "FR";
5598#endif
5599	    }
5600	}
5601    }
5602  fputs ("\t.CALL ", asm_out_file);
5603  for (i = 0; i < 4; i++)
5604    {
5605      if (arg_regs[i])
5606	{
5607	  if (output_flag++)
5608	    fputc (',', asm_out_file);
5609	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5610	}
5611    }
5612  fputc ('\n', asm_out_file);
5613}
5614
5615/* Return the class of any secondary reload register that is needed to
5616   move IN into a register in class CLASS using mode MODE.
5617
5618   Profiling has showed this routine and its descendants account for
5619   a significant amount of compile time (~7%).  So it has been
5620   optimized to reduce redundant computations and eliminate useless
5621   function calls.
5622
5623   It might be worthwhile to try and make this a leaf function too.  */
5624
5625enum reg_class
5626secondary_reload_class (enum reg_class class, enum machine_mode mode, rtx in)
5627{
5628  int regno, is_symbolic;
5629
5630  /* Trying to load a constant into a FP register during PIC code
5631     generation will require %r1 as a scratch register.  */
5632  if (flag_pic
5633      && GET_MODE_CLASS (mode) == MODE_INT
5634      && FP_REG_CLASS_P (class)
5635      && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5636    return R1_REGS;
5637
5638  /* Profiling showed the PA port spends about 1.3% of its compilation
5639     time in true_regnum from calls inside secondary_reload_class.  */
5640
5641  if (GET_CODE (in) == REG)
5642    {
5643      regno = REGNO (in);
5644      if (regno >= FIRST_PSEUDO_REGISTER)
5645	regno = true_regnum (in);
5646    }
5647  else if (GET_CODE (in) == SUBREG)
5648    regno = true_regnum (in);
5649  else
5650    regno = -1;
5651
5652  /* If we have something like (mem (mem (...)), we can safely assume the
5653     inner MEM will end up in a general register after reloading, so there's
5654     no need for a secondary reload.  */
5655  if (GET_CODE (in) == MEM
5656      && GET_CODE (XEXP (in, 0)) == MEM)
5657    return NO_REGS;
5658
5659  /* Handle out of range displacement for integer mode loads/stores of
5660     FP registers.  */
5661  if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5662       && GET_MODE_CLASS (mode) == MODE_INT
5663       && FP_REG_CLASS_P (class))
5664      || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5665    return GENERAL_REGS;
5666
5667  /* A SAR<->FP register copy requires a secondary register (GPR) as
5668     well as secondary memory.  */
5669  if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5670      && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5671	  || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5672    return GENERAL_REGS;
5673
5674  if (GET_CODE (in) == HIGH)
5675    in = XEXP (in, 0);
5676
5677  /* Profiling has showed GCC spends about 2.6% of its compilation
5678     time in symbolic_operand from calls inside secondary_reload_class.
5679
5680     We use an inline copy and only compute its return value once to avoid
5681     useless work.  */
5682  switch (GET_CODE (in))
5683    {
5684      rtx tmp;
5685
5686      case SYMBOL_REF:
5687      case LABEL_REF:
5688        is_symbolic = 1;
5689        break;
5690      case CONST:
5691	tmp = XEXP (in, 0);
5692	is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5693			|| GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5694		       && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5695        break;
5696
5697      default:
5698        is_symbolic = 0;
5699        break;
5700    }
5701
5702  if (!flag_pic
5703      && is_symbolic
5704      && read_only_operand (in, VOIDmode))
5705    return NO_REGS;
5706
5707  if (class != R1_REGS && is_symbolic)
5708    return R1_REGS;
5709
5710  return NO_REGS;
5711}
5712
5713/* In the 32-bit runtime, arguments larger than eight bytes are passed
5714   by invisible reference.  As a GCC extension, we also pass anything
5715   with a zero or variable size by reference.
5716
5717   The 64-bit runtime does not describe passing any types by invisible
5718   reference.  The internals of GCC can't currently handle passing
5719   empty structures, and zero or variable length arrays when they are
5720   not passed entirely on the stack or by reference.  Thus, as a GCC
5721   extension, we pass these types by reference.  The HP compiler doesn't
5722   support these types, so hopefully there shouldn't be any compatibility
5723   issues.  This may have to be revisited when HP releases a C99 compiler
5724   or updates the ABI.  */
5725
5726static bool
5727pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5728		      enum machine_mode mode, tree type,
5729		      bool named ATTRIBUTE_UNUSED)
5730{
5731  HOST_WIDE_INT size;
5732
5733  if (type)
5734    size = int_size_in_bytes (type);
5735  else
5736    size = GET_MODE_SIZE (mode);
5737
5738  if (TARGET_64BIT)
5739    return size <= 0;
5740  else
5741    return size <= 0 || size > 8;
5742}
5743
5744enum direction
5745function_arg_padding (enum machine_mode mode, tree type)
5746{
5747  if (mode == BLKmode
5748      || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5749    {
5750      /* Return none if justification is not required.  */
5751      if (type
5752	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5753	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5754	return none;
5755
5756      /* The directions set here are ignored when a BLKmode argument larger
5757	 than a word is placed in a register.  Different code is used for
5758	 the stack and registers.  This makes it difficult to have a
5759	 consistent data representation for both the stack and registers.
5760	 For both runtimes, the justification and padding for arguments on
5761	 the stack and in registers should be identical.  */
5762      if (TARGET_64BIT)
5763	/* The 64-bit runtime specifies left justification for aggregates.  */
5764        return upward;
5765      else
5766	/* The 32-bit runtime architecture specifies right justification.
5767	   When the argument is passed on the stack, the argument is padded
5768	   with garbage on the left.  The HP compiler pads with zeros.  */
5769	return downward;
5770    }
5771
5772  if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5773    return downward;
5774  else
5775    return none;
5776}
5777
5778
5779/* Do what is necessary for `va_start'.  We look at the current function
5780   to determine if stdargs or varargs is used and fill in an initial
5781   va_list.  A pointer to this constructor is returned.  */
5782
5783static rtx
5784hppa_builtin_saveregs (void)
5785{
5786  rtx offset, dest;
5787  tree fntype = TREE_TYPE (current_function_decl);
5788  int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5789		   && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5790		       != void_type_node)))
5791		? UNITS_PER_WORD : 0);
5792
5793  if (argadj)
5794    offset = plus_constant (current_function_arg_offset_rtx, argadj);
5795  else
5796    offset = current_function_arg_offset_rtx;
5797
5798  if (TARGET_64BIT)
5799    {
5800      int i, off;
5801
5802      /* Adjust for varargs/stdarg differences.  */
5803      if (argadj)
5804	offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5805      else
5806	offset = current_function_arg_offset_rtx;
5807
5808      /* We need to save %r26 .. %r19 inclusive starting at offset -64
5809	 from the incoming arg pointer and growing to larger addresses.  */
5810      for (i = 26, off = -64; i >= 19; i--, off += 8)
5811	emit_move_insn (gen_rtx_MEM (word_mode,
5812				     plus_constant (arg_pointer_rtx, off)),
5813			gen_rtx_REG (word_mode, i));
5814
5815      /* The incoming args pointer points just beyond the flushback area;
5816	 normally this is not a serious concern.  However, when we are doing
5817	 varargs/stdargs we want to make the arg pointer point to the start
5818	 of the incoming argument area.  */
5819      emit_move_insn (virtual_incoming_args_rtx,
5820		      plus_constant (arg_pointer_rtx, -64));
5821
5822      /* Now return a pointer to the first anonymous argument.  */
5823      return copy_to_reg (expand_binop (Pmode, add_optab,
5824					virtual_incoming_args_rtx,
5825					offset, 0, 0, OPTAB_LIB_WIDEN));
5826    }
5827
5828  /* Store general registers on the stack.  */
5829  dest = gen_rtx_MEM (BLKmode,
5830		      plus_constant (current_function_internal_arg_pointer,
5831				     -16));
5832  set_mem_alias_set (dest, get_varargs_alias_set ());
5833  set_mem_align (dest, BITS_PER_WORD);
5834  move_block_from_reg (23, dest, 4);
5835
5836  /* move_block_from_reg will emit code to store the argument registers
5837     individually as scalar stores.
5838
5839     However, other insns may later load from the same addresses for
5840     a structure load (passing a struct to a varargs routine).
5841
5842     The alias code assumes that such aliasing can never happen, so we
5843     have to keep memory referencing insns from moving up beyond the
5844     last argument register store.  So we emit a blockage insn here.  */
5845  emit_insn (gen_blockage ());
5846
5847  return copy_to_reg (expand_binop (Pmode, add_optab,
5848				    current_function_internal_arg_pointer,
5849				    offset, 0, 0, OPTAB_LIB_WIDEN));
5850}
5851
5852void
5853hppa_va_start (tree valist, rtx nextarg)
5854{
5855  nextarg = expand_builtin_saveregs ();
5856  std_expand_builtin_va_start (valist, nextarg);
5857}
5858
5859static tree
5860hppa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, tree *post_p)
5861{
5862  if (TARGET_64BIT)
5863    {
5864      /* Args grow upward.  We can use the generic routines.  */
5865      return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5866    }
5867  else /* !TARGET_64BIT */
5868    {
5869      tree ptr = build_pointer_type (type);
5870      tree valist_type;
5871      tree t, u;
5872      unsigned int size, ofs;
5873      bool indirect;
5874
5875      indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
5876      if (indirect)
5877	{
5878	  type = ptr;
5879	  ptr = build_pointer_type (type);
5880	}
5881      size = int_size_in_bytes (type);
5882      valist_type = TREE_TYPE (valist);
5883
5884      /* Args grow down.  Not handled by generic routines.  */
5885
5886      u = fold_convert (valist_type, size_in_bytes (type));
5887      t = build (MINUS_EXPR, valist_type, valist, u);
5888
5889      /* Copied from va-pa.h, but we probably don't need to align to
5890	 word size, since we generate and preserve that invariant.  */
5891      u = build_int_cst (valist_type, (size > 4 ? -8 : -4));
5892      t = build (BIT_AND_EXPR, valist_type, t, u);
5893
5894      t = build (MODIFY_EXPR, valist_type, valist, t);
5895
5896      ofs = (8 - size) % 4;
5897      if (ofs != 0)
5898	{
5899	  u = fold_convert (valist_type, size_int (ofs));
5900	  t = build (PLUS_EXPR, valist_type, t, u);
5901	}
5902
5903      t = fold_convert (ptr, t);
5904      t = build_va_arg_indirect_ref (t);
5905
5906      if (indirect)
5907	t = build_va_arg_indirect_ref (t);
5908
5909      return t;
5910    }
5911}
5912
5913/* True if MODE is valid for the target.  By "valid", we mean able to
5914   be manipulated in non-trivial ways.  In particular, this means all
5915   the arithmetic is supported.
5916
5917   Currently, TImode is not valid as the HP 64-bit runtime documentation
5918   doesn't document the alignment and calling conventions for this type.
5919   Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
5920   2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
5921
5922static bool
5923pa_scalar_mode_supported_p (enum machine_mode mode)
5924{
5925  int precision = GET_MODE_PRECISION (mode);
5926
5927  switch (GET_MODE_CLASS (mode))
5928    {
5929    case MODE_PARTIAL_INT:
5930    case MODE_INT:
5931      if (precision == CHAR_TYPE_SIZE)
5932	return true;
5933      if (precision == SHORT_TYPE_SIZE)
5934	return true;
5935      if (precision == INT_TYPE_SIZE)
5936	return true;
5937      if (precision == LONG_TYPE_SIZE)
5938	return true;
5939      if (precision == LONG_LONG_TYPE_SIZE)
5940	return true;
5941      return false;
5942
5943    case MODE_FLOAT:
5944      if (precision == FLOAT_TYPE_SIZE)
5945	return true;
5946      if (precision == DOUBLE_TYPE_SIZE)
5947	return true;
5948      if (precision == LONG_DOUBLE_TYPE_SIZE)
5949	return true;
5950      return false;
5951
5952    default:
5953      gcc_unreachable ();
5954    }
5955}
5956
5957/* This routine handles all the normal conditional branch sequences we
5958   might need to generate.  It handles compare immediate vs compare
5959   register, nullification of delay slots, varying length branches,
5960   negated branches, and all combinations of the above.  It returns the
5961   output appropriate to emit the branch corresponding to all given
5962   parameters.  */
5963
5964const char *
5965output_cbranch (rtx *operands, int nullify, int length, int negated, rtx insn)
5966{
5967  static char buf[100];
5968  int useskip = 0;
5969  rtx xoperands[5];
5970
5971  /* A conditional branch to the following instruction (e.g. the delay slot)
5972     is asking for a disaster.  This can happen when not optimizing and
5973     when jump optimization fails.
5974
5975     While it is usually safe to emit nothing, this can fail if the
5976     preceding instruction is a nullified branch with an empty delay
5977     slot and the same branch target as this branch.  We could check
5978     for this but jump optimization should eliminate nop jumps.  It
5979     is always safe to emit a nop.  */
5980  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5981    return "nop";
5982
5983  /* The doubleword form of the cmpib instruction doesn't have the LEU
5984     and GTU conditions while the cmpb instruction does.  Since we accept
5985     zero for cmpb, we must ensure that we use cmpb for the comparison.  */
5986  if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
5987    operands[2] = gen_rtx_REG (DImode, 0);
5988  if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
5989    operands[1] = gen_rtx_REG (DImode, 0);
5990
5991  /* If this is a long branch with its delay slot unfilled, set `nullify'
5992     as it can nullify the delay slot and save a nop.  */
5993  if (length == 8 && dbr_sequence_length () == 0)
5994    nullify = 1;
5995
5996  /* If this is a short forward conditional branch which did not get
5997     its delay slot filled, the delay slot can still be nullified.  */
5998  if (! nullify && length == 4 && dbr_sequence_length () == 0)
5999    nullify = forward_branch_p (insn);
6000
6001  /* A forward branch over a single nullified insn can be done with a
6002     comclr instruction.  This avoids a single cycle penalty due to
6003     mis-predicted branch if we fall through (branch not taken).  */
6004  if (length == 4
6005      && next_real_insn (insn) != 0
6006      && get_attr_length (next_real_insn (insn)) == 4
6007      && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6008      && nullify)
6009    useskip = 1;
6010
6011  switch (length)
6012    {
6013      /* All short conditional branches except backwards with an unfilled
6014	 delay slot.  */
6015      case 4:
6016	if (useskip)
6017	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6018	else
6019	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6020	if (GET_MODE (operands[1]) == DImode)
6021	  strcat (buf, "*");
6022	if (negated)
6023	  strcat (buf, "%B3");
6024	else
6025	  strcat (buf, "%S3");
6026	if (useskip)
6027	  strcat (buf, " %2,%r1,%%r0");
6028	else if (nullify)
6029	  strcat (buf, ",n %2,%r1,%0");
6030	else
6031	  strcat (buf, " %2,%r1,%0");
6032	break;
6033
6034     /* All long conditionals.  Note a short backward branch with an
6035	unfilled delay slot is treated just like a long backward branch
6036	with an unfilled delay slot.  */
6037      case 8:
6038	/* Handle weird backwards branch with a filled delay slot
6039	   with is nullified.  */
6040	if (dbr_sequence_length () != 0
6041	    && ! forward_branch_p (insn)
6042	    && nullify)
6043	  {
6044	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6045	    if (GET_MODE (operands[1]) == DImode)
6046	      strcat (buf, "*");
6047	    if (negated)
6048	      strcat (buf, "%S3");
6049	    else
6050	      strcat (buf, "%B3");
6051	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6052	  }
6053	/* Handle short backwards branch with an unfilled delay slot.
6054	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6055	   taken and untaken branches.  */
6056	else if (dbr_sequence_length () == 0
6057		 && ! forward_branch_p (insn)
6058		 && INSN_ADDRESSES_SET_P ()
6059		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6060				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6061	  {
6062	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6063	    if (GET_MODE (operands[1]) == DImode)
6064	      strcat (buf, "*");
6065	    if (negated)
6066	      strcat (buf, "%B3 %2,%r1,%0%#");
6067	    else
6068	      strcat (buf, "%S3 %2,%r1,%0%#");
6069	  }
6070	else
6071	  {
6072	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6073	    if (GET_MODE (operands[1]) == DImode)
6074	      strcat (buf, "*");
6075	    if (negated)
6076	      strcat (buf, "%S3");
6077	    else
6078	      strcat (buf, "%B3");
6079	    if (nullify)
6080	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6081	    else
6082	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6083	  }
6084	break;
6085
6086      case 20:
6087      case 28:
6088	xoperands[0] = operands[0];
6089	xoperands[1] = operands[1];
6090	xoperands[2] = operands[2];
6091	xoperands[3] = operands[3];
6092
6093	/* The reversed conditional branch must branch over one additional
6094	   instruction if the delay slot is filled.  If the delay slot
6095	   is empty, the instruction after the reversed condition branch
6096	   must be nullified.  */
6097	nullify = dbr_sequence_length () == 0;
6098	xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
6099
6100	/* Create a reversed conditional branch which branches around
6101	   the following insns.  */
6102	if (GET_MODE (operands[1]) != DImode)
6103	  {
6104	    if (nullify)
6105	      {
6106		if (negated)
6107		  strcpy (buf,
6108		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6109		else
6110		  strcpy (buf,
6111		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6112	      }
6113	    else
6114	      {
6115		if (negated)
6116		  strcpy (buf,
6117		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6118		else
6119		  strcpy (buf,
6120		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6121	      }
6122	  }
6123	else
6124	  {
6125	    if (nullify)
6126	      {
6127		if (negated)
6128		  strcpy (buf,
6129		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6130		else
6131		  strcpy (buf,
6132		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6133	      }
6134	    else
6135	      {
6136		if (negated)
6137		  strcpy (buf,
6138		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6139		else
6140		  strcpy (buf,
6141		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6142	      }
6143	  }
6144
6145	output_asm_insn (buf, xoperands);
6146	return output_lbranch (operands[0], insn);
6147
6148      default:
6149	gcc_unreachable ();
6150    }
6151  return buf;
6152}
6153
6154/* This routine handles long unconditional branches that exceed the
6155   maximum range of a simple branch instruction.  */
6156
6157const char *
6158output_lbranch (rtx dest, rtx insn)
6159{
6160  rtx xoperands[2];
6161
6162  xoperands[0] = dest;
6163
6164  /* First, free up the delay slot.  */
6165  if (dbr_sequence_length () != 0)
6166    {
6167      /* We can't handle a jump in the delay slot.  */
6168      gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6169
6170      final_scan_insn (NEXT_INSN (insn), asm_out_file,
6171		       optimize, 0, NULL);
6172
6173      /* Now delete the delay insn.  */
6174      PUT_CODE (NEXT_INSN (insn), NOTE);
6175      NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6176      NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6177    }
6178
6179  /* Output an insn to save %r1.  The runtime documentation doesn't
6180     specify whether the "Clean Up" slot in the callers frame can
6181     be clobbered by the callee.  It isn't copied by HP's builtin
6182     alloca, so this suggests that it can be clobbered if necessary.
6183     The "Static Link" location is copied by HP builtin alloca, so
6184     we avoid using it.  Using the cleanup slot might be a problem
6185     if we have to interoperate with languages that pass cleanup
6186     information.  However, it should be possible to handle these
6187     situations with GCC's asm feature.
6188
6189     The "Current RP" slot is reserved for the called procedure, so
6190     we try to use it when we don't have a frame of our own.  It's
6191     rather unlikely that we won't have a frame when we need to emit
6192     a very long branch.
6193
6194     Really the way to go long term is a register scavenger; goto
6195     the target of the jump and find a register which we can use
6196     as a scratch to hold the value in %r1.  Then, we wouldn't have
6197     to free up the delay slot or clobber a slot that may be needed
6198     for other purposes.  */
6199  if (TARGET_64BIT)
6200    {
6201      if (actual_fsize == 0 && !regs_ever_live[2])
6202	/* Use the return pointer slot in the frame marker.  */
6203	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6204      else
6205	/* Use the slot at -40 in the frame marker since HP builtin
6206	   alloca doesn't copy it.  */
6207	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6208    }
6209  else
6210    {
6211      if (actual_fsize == 0 && !regs_ever_live[2])
6212	/* Use the return pointer slot in the frame marker.  */
6213	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6214      else
6215	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6216	   the only other use of this location is for copying a
6217	   floating point double argument from a floating-point
6218	   register to two general registers.  The copy is done
6219	   as an "atomic" operation when outputting a call, so it
6220	   won't interfere with our using the location here.  */
6221	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6222    }
6223
6224  if (TARGET_PORTABLE_RUNTIME)
6225    {
6226      output_asm_insn ("ldil L'%0,%%r1", xoperands);
6227      output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6228      output_asm_insn ("bv %%r0(%%r1)", xoperands);
6229    }
6230  else if (flag_pic)
6231    {
6232      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6233      if (TARGET_SOM || !TARGET_GAS)
6234	{
6235	  xoperands[1] = gen_label_rtx ();
6236	  output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6237	  (*targetm.asm_out.internal_label) (asm_out_file, "L",
6238					     CODE_LABEL_NUMBER (xoperands[1]));
6239	  output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6240	}
6241      else
6242	{
6243	  output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6244	  output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6245	}
6246      output_asm_insn ("bv %%r0(%%r1)", xoperands);
6247    }
6248  else
6249    /* Now output a very long branch to the original target.  */
6250    output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6251
6252  /* Now restore the value of %r1 in the delay slot.  */
6253  if (TARGET_64BIT)
6254    {
6255      if (actual_fsize == 0 && !regs_ever_live[2])
6256	return "ldd -16(%%r30),%%r1";
6257      else
6258	return "ldd -40(%%r30),%%r1";
6259    }
6260  else
6261    {
6262      if (actual_fsize == 0 && !regs_ever_live[2])
6263	return "ldw -20(%%r30),%%r1";
6264      else
6265	return "ldw -12(%%r30),%%r1";
6266    }
6267}
6268
6269/* This routine handles all the branch-on-bit conditional branch sequences we
6270   might need to generate.  It handles nullification of delay slots,
6271   varying length branches, negated branches and all combinations of the
6272   above.  it returns the appropriate output template to emit the branch.  */
6273
6274const char *
6275output_bb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6276	   int negated, rtx insn, int which)
6277{
6278  static char buf[100];
6279  int useskip = 0;
6280
6281  /* A conditional branch to the following instruction (e.g. the delay slot) is
6282     asking for a disaster.  I do not think this can happen as this pattern
6283     is only used when optimizing; jump optimization should eliminate the
6284     jump.  But be prepared just in case.  */
6285
6286  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6287    return "nop";
6288
6289  /* If this is a long branch with its delay slot unfilled, set `nullify'
6290     as it can nullify the delay slot and save a nop.  */
6291  if (length == 8 && dbr_sequence_length () == 0)
6292    nullify = 1;
6293
6294  /* If this is a short forward conditional branch which did not get
6295     its delay slot filled, the delay slot can still be nullified.  */
6296  if (! nullify && length == 4 && dbr_sequence_length () == 0)
6297    nullify = forward_branch_p (insn);
6298
6299  /* A forward branch over a single nullified insn can be done with a
6300     extrs instruction.  This avoids a single cycle penalty due to
6301     mis-predicted branch if we fall through (branch not taken).  */
6302
6303  if (length == 4
6304      && next_real_insn (insn) != 0
6305      && get_attr_length (next_real_insn (insn)) == 4
6306      && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6307      && nullify)
6308    useskip = 1;
6309
6310  switch (length)
6311    {
6312
6313      /* All short conditional branches except backwards with an unfilled
6314	 delay slot.  */
6315      case 4:
6316	if (useskip)
6317	  strcpy (buf, "{extrs,|extrw,s,}");
6318	else
6319	  strcpy (buf, "bb,");
6320	if (useskip && GET_MODE (operands[0]) == DImode)
6321	  strcpy (buf, "extrd,s,*");
6322	else if (GET_MODE (operands[0]) == DImode)
6323	  strcpy (buf, "bb,*");
6324	if ((which == 0 && negated)
6325	     || (which == 1 && ! negated))
6326	  strcat (buf, ">=");
6327	else
6328	  strcat (buf, "<");
6329	if (useskip)
6330	  strcat (buf, " %0,%1,1,%%r0");
6331	else if (nullify && negated)
6332	  strcat (buf, ",n %0,%1,%3");
6333	else if (nullify && ! negated)
6334	  strcat (buf, ",n %0,%1,%2");
6335	else if (! nullify && negated)
6336	  strcat (buf, "%0,%1,%3");
6337	else if (! nullify && ! negated)
6338	  strcat (buf, " %0,%1,%2");
6339	break;
6340
6341     /* All long conditionals.  Note a short backward branch with an
6342	unfilled delay slot is treated just like a long backward branch
6343	with an unfilled delay slot.  */
6344      case 8:
6345	/* Handle weird backwards branch with a filled delay slot
6346	   with is nullified.  */
6347	if (dbr_sequence_length () != 0
6348	    && ! forward_branch_p (insn)
6349	    && nullify)
6350	  {
6351	    strcpy (buf, "bb,");
6352	    if (GET_MODE (operands[0]) == DImode)
6353	      strcat (buf, "*");
6354	    if ((which == 0 && negated)
6355		|| (which == 1 && ! negated))
6356	      strcat (buf, "<");
6357	    else
6358	      strcat (buf, ">=");
6359	    if (negated)
6360	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
6361	    else
6362	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
6363	  }
6364	/* Handle short backwards branch with an unfilled delay slot.
6365	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
6366	   taken and untaken branches.  */
6367	else if (dbr_sequence_length () == 0
6368		 && ! forward_branch_p (insn)
6369		 && INSN_ADDRESSES_SET_P ()
6370		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6371				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6372	  {
6373	    strcpy (buf, "bb,");
6374	    if (GET_MODE (operands[0]) == DImode)
6375	      strcat (buf, "*");
6376	    if ((which == 0 && negated)
6377		|| (which == 1 && ! negated))
6378	      strcat (buf, ">=");
6379	    else
6380	      strcat (buf, "<");
6381	    if (negated)
6382	      strcat (buf, " %0,%1,%3%#");
6383	    else
6384	      strcat (buf, " %0,%1,%2%#");
6385	  }
6386	else
6387	  {
6388	    strcpy (buf, "{extrs,|extrw,s,}");
6389	    if (GET_MODE (operands[0]) == DImode)
6390	      strcpy (buf, "extrd,s,*");
6391	    if ((which == 0 && negated)
6392		|| (which == 1 && ! negated))
6393	      strcat (buf, "<");
6394	    else
6395	      strcat (buf, ">=");
6396	    if (nullify && negated)
6397	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6398	    else if (nullify && ! negated)
6399	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6400	    else if (negated)
6401	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6402	    else
6403	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6404	  }
6405	break;
6406
6407      default:
6408	gcc_unreachable ();
6409    }
6410  return buf;
6411}
6412
6413/* This routine handles all the branch-on-variable-bit conditional branch
6414   sequences we might need to generate.  It handles nullification of delay
6415   slots, varying length branches, negated branches and all combinations
6416   of the above.  it returns the appropriate output template to emit the
6417   branch.  */
6418
6419const char *
6420output_bvb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6421	    int negated, rtx insn, int which)
6422{
6423  static char buf[100];
6424  int useskip = 0;
6425
6426  /* A conditional branch to the following instruction (e.g. the delay slot) is
6427     asking for a disaster.  I do not think this can happen as this pattern
6428     is only used when optimizing; jump optimization should eliminate the
6429     jump.  But be prepared just in case.  */
6430
6431  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6432    return "nop";
6433
6434  /* If this is a long branch with its delay slot unfilled, set `nullify'
6435     as it can nullify the delay slot and save a nop.  */
6436  if (length == 8 && dbr_sequence_length () == 0)
6437    nullify = 1;
6438
6439  /* If this is a short forward conditional branch which did not get
6440     its delay slot filled, the delay slot can still be nullified.  */
6441  if (! nullify && length == 4 && dbr_sequence_length () == 0)
6442    nullify = forward_branch_p (insn);
6443
6444  /* A forward branch over a single nullified insn can be done with a
6445     extrs instruction.  This avoids a single cycle penalty due to
6446     mis-predicted branch if we fall through (branch not taken).  */
6447
6448  if (length == 4
6449      && next_real_insn (insn) != 0
6450      && get_attr_length (next_real_insn (insn)) == 4
6451      && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6452      && nullify)
6453    useskip = 1;
6454
6455  switch (length)
6456    {
6457
6458      /* All short conditional branches except backwards with an unfilled
6459	 delay slot.  */
6460      case 4:
6461	if (useskip)
6462	  strcpy (buf, "{vextrs,|extrw,s,}");
6463	else
6464	  strcpy (buf, "{bvb,|bb,}");
6465	if (useskip && GET_MODE (operands[0]) == DImode)
6466	  strcpy (buf, "extrd,s,*");
6467	else if (GET_MODE (operands[0]) == DImode)
6468	  strcpy (buf, "bb,*");
6469	if ((which == 0 && negated)
6470	     || (which == 1 && ! negated))
6471	  strcat (buf, ">=");
6472	else
6473	  strcat (buf, "<");
6474	if (useskip)
6475	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6476	else if (nullify && negated)
6477	  strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6478	else if (nullify && ! negated)
6479	  strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6480	else if (! nullify && negated)
6481	  strcat (buf, "{%0,%3|%0,%%sar,%3}");
6482	else if (! nullify && ! negated)
6483	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6484	break;
6485
6486     /* All long conditionals.  Note a short backward branch with an
6487	unfilled delay slot is treated just like a long backward branch
6488	with an unfilled delay slot.  */
6489      case 8:
6490	/* Handle weird backwards branch with a filled delay slot
6491	   with is nullified.  */
6492	if (dbr_sequence_length () != 0
6493	    && ! forward_branch_p (insn)
6494	    && nullify)
6495	  {
6496	    strcpy (buf, "{bvb,|bb,}");
6497	    if (GET_MODE (operands[0]) == DImode)
6498	      strcat (buf, "*");
6499	    if ((which == 0 && negated)
6500		|| (which == 1 && ! negated))
6501	      strcat (buf, "<");
6502	    else
6503	      strcat (buf, ">=");
6504	    if (negated)
6505	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6506	    else
6507	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6508	  }
6509	/* Handle short backwards branch with an unfilled delay slot.
6510	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
6511	   taken and untaken branches.  */
6512	else if (dbr_sequence_length () == 0
6513		 && ! forward_branch_p (insn)
6514		 && INSN_ADDRESSES_SET_P ()
6515		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6516				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6517	  {
6518	    strcpy (buf, "{bvb,|bb,}");
6519	    if (GET_MODE (operands[0]) == DImode)
6520	      strcat (buf, "*");
6521	    if ((which == 0 && negated)
6522		|| (which == 1 && ! negated))
6523	      strcat (buf, ">=");
6524	    else
6525	      strcat (buf, "<");
6526	    if (negated)
6527	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6528	    else
6529	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6530	  }
6531	else
6532	  {
6533	    strcpy (buf, "{vextrs,|extrw,s,}");
6534	    if (GET_MODE (operands[0]) == DImode)
6535	      strcpy (buf, "extrd,s,*");
6536	    if ((which == 0 && negated)
6537		|| (which == 1 && ! negated))
6538	      strcat (buf, "<");
6539	    else
6540	      strcat (buf, ">=");
6541	    if (nullify && negated)
6542	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6543	    else if (nullify && ! negated)
6544	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6545	    else if (negated)
6546	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6547	    else
6548	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6549	  }
6550	break;
6551
6552      default:
6553	gcc_unreachable ();
6554    }
6555  return buf;
6556}
6557
6558/* Return the output template for emitting a dbra type insn.
6559
6560   Note it may perform some output operations on its own before
6561   returning the final output string.  */
6562const char *
6563output_dbra (rtx *operands, rtx insn, int which_alternative)
6564{
6565
6566  /* A conditional branch to the following instruction (e.g. the delay slot) is
6567     asking for a disaster.  Be prepared!  */
6568
6569  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6570    {
6571      if (which_alternative == 0)
6572	return "ldo %1(%0),%0";
6573      else if (which_alternative == 1)
6574	{
6575	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6576	  output_asm_insn ("ldw -16(%%r30),%4", operands);
6577	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6578	  return "{fldws|fldw} -16(%%r30),%0";
6579	}
6580      else
6581	{
6582	  output_asm_insn ("ldw %0,%4", operands);
6583	  return "ldo %1(%4),%4\n\tstw %4,%0";
6584	}
6585    }
6586
6587  if (which_alternative == 0)
6588    {
6589      int nullify = INSN_ANNULLED_BRANCH_P (insn);
6590      int length = get_attr_length (insn);
6591
6592      /* If this is a long branch with its delay slot unfilled, set `nullify'
6593	 as it can nullify the delay slot and save a nop.  */
6594      if (length == 8 && dbr_sequence_length () == 0)
6595	nullify = 1;
6596
6597      /* If this is a short forward conditional branch which did not get
6598	 its delay slot filled, the delay slot can still be nullified.  */
6599      if (! nullify && length == 4 && dbr_sequence_length () == 0)
6600	nullify = forward_branch_p (insn);
6601
6602      switch (length)
6603	{
6604	case 4:
6605	  if (nullify)
6606	    return "addib,%C2,n %1,%0,%3";
6607	  else
6608	    return "addib,%C2 %1,%0,%3";
6609
6610	case 8:
6611	  /* Handle weird backwards branch with a fulled delay slot
6612	     which is nullified.  */
6613	  if (dbr_sequence_length () != 0
6614	      && ! forward_branch_p (insn)
6615	      && nullify)
6616	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
6617	  /* Handle short backwards branch with an unfilled delay slot.
6618	     Using a addb;nop rather than addi;bl saves 1 cycle for both
6619	     taken and untaken branches.  */
6620	  else if (dbr_sequence_length () == 0
6621		   && ! forward_branch_p (insn)
6622		   && INSN_ADDRESSES_SET_P ()
6623		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6624				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6625	      return "addib,%C2 %1,%0,%3%#";
6626
6627	  /* Handle normal cases.  */
6628	  if (nullify)
6629	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
6630	  else
6631	    return "addi,%N2 %1,%0,%0\n\tb %3";
6632
6633	default:
6634	  gcc_unreachable ();
6635	}
6636
6637    }
6638  /* Deal with gross reload from FP register case.  */
6639  else if (which_alternative == 1)
6640    {
6641      /* Move loop counter from FP register to MEM then into a GR,
6642	 increment the GR, store the GR into MEM, and finally reload
6643	 the FP register from MEM from within the branch's delay slot.  */
6644      output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6645		       operands);
6646      output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6647      if (get_attr_length (insn) == 24)
6648	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6649      else
6650	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6651    }
6652  /* Deal with gross reload from memory case.  */
6653  else
6654    {
6655      /* Reload loop counter from memory, the store back to memory
6656	 happens in the branch's delay slot.  */
6657      output_asm_insn ("ldw %0,%4", operands);
6658      if (get_attr_length (insn) == 12)
6659	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6660      else
6661	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6662    }
6663}
6664
6665/* Return the output template for emitting a dbra type insn.
6666
6667   Note it may perform some output operations on its own before
6668   returning the final output string.  */
6669const char *
6670output_movb (rtx *operands, rtx insn, int which_alternative,
6671	     int reverse_comparison)
6672{
6673
6674  /* A conditional branch to the following instruction (e.g. the delay slot) is
6675     asking for a disaster.  Be prepared!  */
6676
6677  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6678    {
6679      if (which_alternative == 0)
6680	return "copy %1,%0";
6681      else if (which_alternative == 1)
6682	{
6683	  output_asm_insn ("stw %1,-16(%%r30)", operands);
6684	  return "{fldws|fldw} -16(%%r30),%0";
6685	}
6686      else if (which_alternative == 2)
6687	return "stw %1,%0";
6688      else
6689	return "mtsar %r1";
6690    }
6691
6692  /* Support the second variant.  */
6693  if (reverse_comparison)
6694    PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6695
6696  if (which_alternative == 0)
6697    {
6698      int nullify = INSN_ANNULLED_BRANCH_P (insn);
6699      int length = get_attr_length (insn);
6700
6701      /* If this is a long branch with its delay slot unfilled, set `nullify'
6702	 as it can nullify the delay slot and save a nop.  */
6703      if (length == 8 && dbr_sequence_length () == 0)
6704	nullify = 1;
6705
6706      /* If this is a short forward conditional branch which did not get
6707	 its delay slot filled, the delay slot can still be nullified.  */
6708      if (! nullify && length == 4 && dbr_sequence_length () == 0)
6709	nullify = forward_branch_p (insn);
6710
6711      switch (length)
6712	{
6713	case 4:
6714	  if (nullify)
6715	    return "movb,%C2,n %1,%0,%3";
6716	  else
6717	    return "movb,%C2 %1,%0,%3";
6718
6719	case 8:
6720	  /* Handle weird backwards branch with a filled delay slot
6721	     which is nullified.  */
6722	  if (dbr_sequence_length () != 0
6723	      && ! forward_branch_p (insn)
6724	      && nullify)
6725	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
6726
6727	  /* Handle short backwards branch with an unfilled delay slot.
6728	     Using a movb;nop rather than or;bl saves 1 cycle for both
6729	     taken and untaken branches.  */
6730	  else if (dbr_sequence_length () == 0
6731		   && ! forward_branch_p (insn)
6732		   && INSN_ADDRESSES_SET_P ()
6733		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6734				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6735	    return "movb,%C2 %1,%0,%3%#";
6736	  /* Handle normal cases.  */
6737	  if (nullify)
6738	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6739	  else
6740	    return "or,%N2 %1,%%r0,%0\n\tb %3";
6741
6742	default:
6743	  gcc_unreachable ();
6744	}
6745    }
6746  /* Deal with gross reload from FP register case.  */
6747  else if (which_alternative == 1)
6748    {
6749      /* Move loop counter from FP register to MEM then into a GR,
6750	 increment the GR, store the GR into MEM, and finally reload
6751	 the FP register from MEM from within the branch's delay slot.  */
6752      output_asm_insn ("stw %1,-16(%%r30)", operands);
6753      if (get_attr_length (insn) == 12)
6754	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6755      else
6756	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6757    }
6758  /* Deal with gross reload from memory case.  */
6759  else if (which_alternative == 2)
6760    {
6761      /* Reload loop counter from memory, the store back to memory
6762	 happens in the branch's delay slot.  */
6763      if (get_attr_length (insn) == 8)
6764	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6765      else
6766	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6767    }
6768  /* Handle SAR as a destination.  */
6769  else
6770    {
6771      if (get_attr_length (insn) == 8)
6772	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6773      else
6774	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
6775    }
6776}
6777
6778/* Copy any FP arguments in INSN into integer registers.  */
6779static void
6780copy_fp_args (rtx insn)
6781{
6782  rtx link;
6783  rtx xoperands[2];
6784
6785  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6786    {
6787      int arg_mode, regno;
6788      rtx use = XEXP (link, 0);
6789
6790      if (! (GET_CODE (use) == USE
6791	  && GET_CODE (XEXP (use, 0)) == REG
6792	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6793	continue;
6794
6795      arg_mode = GET_MODE (XEXP (use, 0));
6796      regno = REGNO (XEXP (use, 0));
6797
6798      /* Is it a floating point register?  */
6799      if (regno >= 32 && regno <= 39)
6800	{
6801	  /* Copy the FP register into an integer register via memory.  */
6802	  if (arg_mode == SFmode)
6803	    {
6804	      xoperands[0] = XEXP (use, 0);
6805	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6806	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6807	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6808	    }
6809	  else
6810	    {
6811	      xoperands[0] = XEXP (use, 0);
6812	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6813	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6814	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6815	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6816	    }
6817	}
6818    }
6819}
6820
6821/* Compute length of the FP argument copy sequence for INSN.  */
6822static int
6823length_fp_args (rtx insn)
6824{
6825  int length = 0;
6826  rtx link;
6827
6828  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6829    {
6830      int arg_mode, regno;
6831      rtx use = XEXP (link, 0);
6832
6833      if (! (GET_CODE (use) == USE
6834	  && GET_CODE (XEXP (use, 0)) == REG
6835	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6836	continue;
6837
6838      arg_mode = GET_MODE (XEXP (use, 0));
6839      regno = REGNO (XEXP (use, 0));
6840
6841      /* Is it a floating point register?  */
6842      if (regno >= 32 && regno <= 39)
6843	{
6844	  if (arg_mode == SFmode)
6845	    length += 8;
6846	  else
6847	    length += 12;
6848	}
6849    }
6850
6851  return length;
6852}
6853
6854/* Return the attribute length for the millicode call instruction INSN.
6855   The length must match the code generated by output_millicode_call.
6856   We include the delay slot in the returned length as it is better to
6857   over estimate the length than to under estimate it.  */
6858
6859int
6860attr_length_millicode_call (rtx insn)
6861{
6862  unsigned long distance = -1;
6863  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6864
6865  if (INSN_ADDRESSES_SET_P ())
6866    {
6867      distance = (total + insn_current_reference_address (insn));
6868      if (distance < total)
6869	distance = -1;
6870    }
6871
6872  if (TARGET_64BIT)
6873    {
6874      if (!TARGET_LONG_CALLS && distance < 7600000)
6875	return 8;
6876
6877      return 20;
6878    }
6879  else if (TARGET_PORTABLE_RUNTIME)
6880    return 24;
6881  else
6882    {
6883      if (!TARGET_LONG_CALLS && distance < 240000)
6884	return 8;
6885
6886      if (TARGET_LONG_ABS_CALL && !flag_pic)
6887	return 12;
6888
6889      return 24;
6890    }
6891}
6892
6893/* INSN is a function call.  It may have an unconditional jump
6894   in its delay slot.
6895
6896   CALL_DEST is the routine we are calling.  */
6897
6898const char *
6899output_millicode_call (rtx insn, rtx call_dest)
6900{
6901  int attr_length = get_attr_length (insn);
6902  int seq_length = dbr_sequence_length ();
6903  int distance;
6904  rtx seq_insn;
6905  rtx xoperands[3];
6906
6907  xoperands[0] = call_dest;
6908  xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6909
6910  /* Handle the common case where we are sure that the branch will
6911     reach the beginning of the $CODE$ subspace.  The within reach
6912     form of the $$sh_func_adrs call has a length of 28.  Because
6913     it has an attribute type of multi, it never has a nonzero
6914     sequence length.  The length of the $$sh_func_adrs is the same
6915     as certain out of reach PIC calls to other routines.  */
6916  if (!TARGET_LONG_CALLS
6917      && ((seq_length == 0
6918	   && (attr_length == 12
6919	       || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6920	  || (seq_length != 0 && attr_length == 8)))
6921    {
6922      output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6923    }
6924  else
6925    {
6926      if (TARGET_64BIT)
6927	{
6928	  /* It might seem that one insn could be saved by accessing
6929	     the millicode function using the linkage table.  However,
6930	     this doesn't work in shared libraries and other dynamically
6931	     loaded objects.  Using a pc-relative sequence also avoids
6932	     problems related to the implicit use of the gp register.  */
6933	  output_asm_insn ("b,l .+8,%%r1", xoperands);
6934
6935	  if (TARGET_GAS)
6936	    {
6937	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6938	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6939	    }
6940	  else
6941	    {
6942	      xoperands[1] = gen_label_rtx ();
6943	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6944	      (*targetm.asm_out.internal_label) (asm_out_file, "L",
6945					 CODE_LABEL_NUMBER (xoperands[1]));
6946	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6947	    }
6948
6949	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6950	}
6951      else if (TARGET_PORTABLE_RUNTIME)
6952	{
6953	  /* Pure portable runtime doesn't allow be/ble; we also don't
6954	     have PIC support in the assembler/linker, so this sequence
6955	     is needed.  */
6956
6957	  /* Get the address of our target into %r1.  */
6958	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
6959	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6960
6961	  /* Get our return address into %r31.  */
6962	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
6963	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6964
6965	  /* Jump to our target address in %r1.  */
6966	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
6967	}
6968      else if (!flag_pic)
6969	{
6970	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
6971	  if (TARGET_PA_20)
6972	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6973	  else
6974	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6975	}
6976      else
6977	{
6978	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6979	  output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
6980
6981	  if (TARGET_SOM || !TARGET_GAS)
6982	    {
6983	      /* The HP assembler can generate relocations for the
6984		 difference of two symbols.  GAS can do this for a
6985		 millicode symbol but not an arbitrary external
6986		 symbol when generating SOM output.  */
6987	      xoperands[1] = gen_label_rtx ();
6988	      (*targetm.asm_out.internal_label) (asm_out_file, "L",
6989					 CODE_LABEL_NUMBER (xoperands[1]));
6990	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6991	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6992	    }
6993	  else
6994	    {
6995	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
6996	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
6997			       xoperands);
6998	    }
6999
7000	  /* Jump to our target address in %r1.  */
7001	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7002	}
7003    }
7004
7005  if (seq_length == 0)
7006    output_asm_insn ("nop", xoperands);
7007
7008  /* We are done if there isn't a jump in the delay slot.  */
7009  if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7010    return "";
7011
7012  /* This call has an unconditional jump in its delay slot.  */
7013  xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7014
7015  /* See if the return address can be adjusted.  Use the containing
7016     sequence insn's address.  */
7017  if (INSN_ADDRESSES_SET_P ())
7018    {
7019      seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7020      distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7021		  - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7022
7023      if (VAL_14_BITS_P (distance))
7024	{
7025	  xoperands[1] = gen_label_rtx ();
7026	  output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7027	  (*targetm.asm_out.internal_label) (asm_out_file, "L",
7028					     CODE_LABEL_NUMBER (xoperands[1]));
7029	}
7030      else
7031	/* ??? This branch may not reach its target.  */
7032	output_asm_insn ("nop\n\tb,n %0", xoperands);
7033    }
7034  else
7035    /* ??? This branch may not reach its target.  */
7036    output_asm_insn ("nop\n\tb,n %0", xoperands);
7037
7038  /* Delete the jump.  */
7039  PUT_CODE (NEXT_INSN (insn), NOTE);
7040  NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7041  NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7042
7043  return "";
7044}
7045
7046/* Return the attribute length of the call instruction INSN.  The SIBCALL
7047   flag indicates whether INSN is a regular call or a sibling call.  The
7048   length returned must be longer than the code actually generated by
7049   output_call.  Since branch shortening is done before delay branch
7050   sequencing, there is no way to determine whether or not the delay
7051   slot will be filled during branch shortening.  Even when the delay
7052   slot is filled, we may have to add a nop if the delay slot contains
7053   a branch that can't reach its target.  Thus, we always have to include
7054   the delay slot in the length estimate.  This used to be done in
7055   pa_adjust_insn_length but we do it here now as some sequences always
7056   fill the delay slot and we can save four bytes in the estimate for
7057   these sequences.  */
7058
7059int
7060attr_length_call (rtx insn, int sibcall)
7061{
7062  int local_call;
7063  rtx call_dest;
7064  tree call_decl;
7065  int length = 0;
7066  rtx pat = PATTERN (insn);
7067  unsigned long distance = -1;
7068
7069  if (INSN_ADDRESSES_SET_P ())
7070    {
7071      unsigned long total;
7072
7073      total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7074      distance = (total + insn_current_reference_address (insn));
7075      if (distance < total)
7076	distance = -1;
7077    }
7078
7079  /* Determine if this is a local call.  */
7080  if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7081    call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7082  else
7083    call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7084
7085  call_decl = SYMBOL_REF_DECL (call_dest);
7086  local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7087
7088  /* pc-relative branch.  */
7089  if (!TARGET_LONG_CALLS
7090      && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7091	  || distance < 240000))
7092    length += 8;
7093
7094  /* 64-bit plabel sequence.  */
7095  else if (TARGET_64BIT && !local_call)
7096    length += sibcall ? 28 : 24;
7097
7098  /* non-pic long absolute branch sequence.  */
7099  else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7100    length += 12;
7101
7102  /* long pc-relative branch sequence.  */
7103  else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7104	   || (TARGET_64BIT && !TARGET_GAS)
7105	   || (TARGET_GAS && !TARGET_SOM
7106	       && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7107    {
7108      length += 20;
7109
7110      if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7111	length += 8;
7112    }
7113
7114  /* 32-bit plabel sequence.  */
7115  else
7116    {
7117      length += 32;
7118
7119      if (TARGET_SOM)
7120	length += length_fp_args (insn);
7121
7122      if (flag_pic)
7123	length += 4;
7124
7125      if (!TARGET_PA_20)
7126	{
7127	  if (!sibcall)
7128	    length += 8;
7129
7130	  if (!TARGET_NO_SPACE_REGS)
7131	    length += 8;
7132	}
7133    }
7134
7135  return length;
7136}
7137
7138/* INSN is a function call.  It may have an unconditional jump
7139   in its delay slot.
7140
7141   CALL_DEST is the routine we are calling.  */
7142
7143const char *
7144output_call (rtx insn, rtx call_dest, int sibcall)
7145{
7146  int delay_insn_deleted = 0;
7147  int delay_slot_filled = 0;
7148  int seq_length = dbr_sequence_length ();
7149  tree call_decl = SYMBOL_REF_DECL (call_dest);
7150  int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7151  rtx xoperands[2];
7152
7153  xoperands[0] = call_dest;
7154
7155  /* Handle the common case where we're sure that the branch will reach
7156     the beginning of the "$CODE$" subspace.  This is the beginning of
7157     the current function if we are in a named section.  */
7158  if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7159    {
7160      xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7161      output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7162    }
7163  else
7164    {
7165      if (TARGET_64BIT && !local_call)
7166	{
7167	  /* ??? As far as I can tell, the HP linker doesn't support the
7168	     long pc-relative sequence described in the 64-bit runtime
7169	     architecture.  So, we use a slightly longer indirect call.  */
7170	  xoperands[0] = get_deferred_plabel (call_dest);
7171	  xoperands[1] = gen_label_rtx ();
7172
7173	  /* If this isn't a sibcall, we put the load of %r27 into the
7174	     delay slot.  We can't do this in a sibcall as we don't
7175	     have a second call-clobbered scratch register available.  */
7176	  if (seq_length != 0
7177	      && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7178	      && !sibcall)
7179	    {
7180	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
7181			       optimize, 0, NULL);
7182
7183	      /* Now delete the delay insn.  */
7184	      PUT_CODE (NEXT_INSN (insn), NOTE);
7185	      NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7186	      NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7187	      delay_insn_deleted = 1;
7188	    }
7189
7190	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
7191	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7192	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7193
7194	  if (sibcall)
7195	    {
7196	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7197	      output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7198	      output_asm_insn ("bve (%%r1)", xoperands);
7199	    }
7200	  else
7201	    {
7202	      output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7203	      output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7204	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7205	      delay_slot_filled = 1;
7206	    }
7207	}
7208      else
7209	{
7210	  int indirect_call = 0;
7211
7212	  /* Emit a long call.  There are several different sequences
7213	     of increasing length and complexity.  In most cases,
7214             they don't allow an instruction in the delay slot.  */
7215	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7216	      && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7217	      && !(TARGET_GAS && !TARGET_SOM
7218		   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7219	      && !TARGET_64BIT)
7220	    indirect_call = 1;
7221
7222	  if (seq_length != 0
7223	      && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7224	      && !sibcall
7225	      && (!TARGET_PA_20 || indirect_call))
7226	    {
7227	      /* A non-jump insn in the delay slot.  By definition we can
7228		 emit this insn before the call (and in fact before argument
7229		 relocating.  */
7230	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7231			       NULL);
7232
7233	      /* Now delete the delay insn.  */
7234	      PUT_CODE (NEXT_INSN (insn), NOTE);
7235	      NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7236	      NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7237	      delay_insn_deleted = 1;
7238	    }
7239
7240	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7241	    {
7242	      /* This is the best sequence for making long calls in
7243		 non-pic code.  Unfortunately, GNU ld doesn't provide
7244		 the stub needed for external calls, and GAS's support
7245		 for this with the SOM linker is buggy.  It is safe
7246		 to use this for local calls.  */
7247	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7248	      if (sibcall)
7249		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7250	      else
7251		{
7252		  if (TARGET_PA_20)
7253		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7254				     xoperands);
7255		  else
7256		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7257
7258		  output_asm_insn ("copy %%r31,%%r2", xoperands);
7259		  delay_slot_filled = 1;
7260		}
7261	    }
7262	  else
7263	    {
7264	      if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7265		  || (TARGET_64BIT && !TARGET_GAS))
7266		{
7267		  /* The HP assembler and linker can handle relocations
7268		     for the difference of two symbols.  GAS and the HP
7269		     linker can't do this when one of the symbols is
7270		     external.  */
7271		  xoperands[1] = gen_label_rtx ();
7272		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7273		  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7274		  (*targetm.asm_out.internal_label) (asm_out_file, "L",
7275					     CODE_LABEL_NUMBER (xoperands[1]));
7276		  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7277		}
7278	      else if (TARGET_GAS && !TARGET_SOM
7279		       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7280		{
7281		  /*  GAS currently can't generate the relocations that
7282		      are needed for the SOM linker under HP-UX using this
7283		      sequence.  The GNU linker doesn't generate the stubs
7284		      that are needed for external calls on TARGET_ELF32
7285		      with this sequence.  For now, we have to use a
7286		      longer plabel sequence when using GAS.  */
7287		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7288		  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7289				   xoperands);
7290		  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7291				   xoperands);
7292		}
7293	      else
7294		{
7295		  /* Emit a long plabel-based call sequence.  This is
7296		     essentially an inline implementation of $$dyncall.
7297		     We don't actually try to call $$dyncall as this is
7298		     as difficult as calling the function itself.  */
7299		  xoperands[0] = get_deferred_plabel (call_dest);
7300		  xoperands[1] = gen_label_rtx ();
7301
7302		  /* Since the call is indirect, FP arguments in registers
7303		     need to be copied to the general registers.  Then, the
7304		     argument relocation stub will copy them back.  */
7305		  if (TARGET_SOM)
7306		    copy_fp_args (insn);
7307
7308		  if (flag_pic)
7309		    {
7310		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
7311		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7312		      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7313		    }
7314		  else
7315		    {
7316		      output_asm_insn ("addil LR'%0-$global$,%%r27",
7317				       xoperands);
7318		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7319				       xoperands);
7320		    }
7321
7322		  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7323		  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7324		  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7325		  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7326
7327		  if (!sibcall && !TARGET_PA_20)
7328		    {
7329		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7330		      if (TARGET_NO_SPACE_REGS)
7331			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7332		      else
7333			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7334		    }
7335		}
7336
7337	      if (TARGET_PA_20)
7338		{
7339		  if (sibcall)
7340		    output_asm_insn ("bve (%%r1)", xoperands);
7341		  else
7342		    {
7343		      if (indirect_call)
7344			{
7345			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7346			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7347			  delay_slot_filled = 1;
7348			}
7349		      else
7350			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7351		    }
7352		}
7353	      else
7354		{
7355		  if (!TARGET_NO_SPACE_REGS)
7356		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7357				     xoperands);
7358
7359		  if (sibcall)
7360		    {
7361		      if (TARGET_NO_SPACE_REGS)
7362			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7363		      else
7364			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7365		    }
7366		  else
7367		    {
7368		      if (TARGET_NO_SPACE_REGS)
7369			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7370		      else
7371			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7372
7373		      if (indirect_call)
7374			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7375		      else
7376			output_asm_insn ("copy %%r31,%%r2", xoperands);
7377		      delay_slot_filled = 1;
7378		    }
7379		}
7380	    }
7381	}
7382    }
7383
7384  if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7385    output_asm_insn ("nop", xoperands);
7386
7387  /* We are done if there isn't a jump in the delay slot.  */
7388  if (seq_length == 0
7389      || delay_insn_deleted
7390      || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7391    return "";
7392
7393  /* A sibcall should never have a branch in the delay slot.  */
7394  gcc_assert (!sibcall);
7395
7396  /* This call has an unconditional jump in its delay slot.  */
7397  xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7398
7399  if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7400    {
7401      /* See if the return address can be adjusted.  Use the containing
7402         sequence insn's address.  */
7403      rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7404      int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7405		      - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7406
7407      if (VAL_14_BITS_P (distance))
7408	{
7409	  xoperands[1] = gen_label_rtx ();
7410	  output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7411	  (*targetm.asm_out.internal_label) (asm_out_file, "L",
7412					     CODE_LABEL_NUMBER (xoperands[1]));
7413	}
7414      else
7415	output_asm_insn ("nop\n\tb,n %0", xoperands);
7416    }
7417  else
7418    output_asm_insn ("b,n %0", xoperands);
7419
7420  /* Delete the jump.  */
7421  PUT_CODE (NEXT_INSN (insn), NOTE);
7422  NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7423  NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7424
7425  return "";
7426}
7427
7428/* Return the attribute length of the indirect call instruction INSN.
7429   The length must match the code generated by output_indirect call.
7430   The returned length includes the delay slot.  Currently, the delay
7431   slot of an indirect call sequence is not exposed and it is used by
7432   the sequence itself.  */
7433
7434int
7435attr_length_indirect_call (rtx insn)
7436{
7437  unsigned long distance = -1;
7438  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7439
7440  if (INSN_ADDRESSES_SET_P ())
7441    {
7442      distance = (total + insn_current_reference_address (insn));
7443      if (distance < total)
7444	distance = -1;
7445    }
7446
7447  if (TARGET_64BIT)
7448    return 12;
7449
7450  if (TARGET_FAST_INDIRECT_CALLS
7451      || (!TARGET_PORTABLE_RUNTIME
7452	  && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7453	      || distance < 240000)))
7454    return 8;
7455
7456  if (flag_pic)
7457    return 24;
7458
7459  if (TARGET_PORTABLE_RUNTIME)
7460    return 20;
7461
7462  /* Out of reach, can use ble.  */
7463  return 12;
7464}
7465
7466const char *
7467output_indirect_call (rtx insn, rtx call_dest)
7468{
7469  rtx xoperands[1];
7470
7471  if (TARGET_64BIT)
7472    {
7473      xoperands[0] = call_dest;
7474      output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7475      output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7476      return "";
7477    }
7478
7479  /* First the special case for kernels, level 0 systems, etc.  */
7480  if (TARGET_FAST_INDIRECT_CALLS)
7481    return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7482
7483  /* Now the normal case -- we can reach $$dyncall directly or
7484     we're sure that we can get there via a long-branch stub.
7485
7486     No need to check target flags as the length uniquely identifies
7487     the remaining cases.  */
7488  if (attr_length_indirect_call (insn) == 8)
7489    {
7490      /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7491	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
7492	 variant of the B,L instruction can't be used on the SOM target.  */
7493      if (TARGET_PA_20 && !TARGET_SOM)
7494	return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7495      else
7496	return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7497    }
7498
7499  /* Long millicode call, but we are not generating PIC or portable runtime
7500     code.  */
7501  if (attr_length_indirect_call (insn) == 12)
7502    return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7503
7504  /* Long millicode call for portable runtime.  */
7505  if (attr_length_indirect_call (insn) == 20)
7506    return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7507
7508  /* We need a long PIC call to $$dyncall.  */
7509  xoperands[0] = NULL_RTX;
7510  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7511  if (TARGET_SOM || !TARGET_GAS)
7512    {
7513      xoperands[0] = gen_label_rtx ();
7514      output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7515      (*targetm.asm_out.internal_label) (asm_out_file, "L",
7516					 CODE_LABEL_NUMBER (xoperands[0]));
7517      output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7518    }
7519  else
7520    {
7521      output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7522      output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7523		       xoperands);
7524    }
7525  output_asm_insn ("blr %%r0,%%r2", xoperands);
7526  output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7527  return "";
7528}
7529
7530/* Return the total length of the save and restore instructions needed for
7531   the data linkage table pointer (i.e., the PIC register) across the call
7532   instruction INSN.  No-return calls do not require a save and restore.
7533   In addition, we may be able to avoid the save and restore for calls
7534   within the same translation unit.  */
7535
7536int
7537attr_length_save_restore_dltp (rtx insn)
7538{
7539  if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7540    return 0;
7541
7542  return 8;
7543}
7544
7545/* In HPUX 8.0's shared library scheme, special relocations are needed
7546   for function labels if they might be passed to a function
7547   in a shared library (because shared libraries don't live in code
7548   space), and special magic is needed to construct their address.  */
7549
7550void
7551hppa_encode_label (rtx sym)
7552{
7553  const char *str = XSTR (sym, 0);
7554  int len = strlen (str) + 1;
7555  char *newstr, *p;
7556
7557  p = newstr = alloca (len + 1);
7558  *p++ = '@';
7559  strcpy (p, str);
7560
7561  XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7562}
7563
7564static void
7565pa_encode_section_info (tree decl, rtx rtl, int first)
7566{
7567  int old_referenced = 0;
7568
7569  if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
7570    old_referenced
7571      = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
7572
7573  default_encode_section_info (decl, rtl, first);
7574
7575  if (first && TEXT_SPACE_P (decl))
7576    {
7577      SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7578      if (TREE_CODE (decl) == FUNCTION_DECL)
7579	hppa_encode_label (XEXP (rtl, 0));
7580    }
7581  else if (old_referenced)
7582    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
7583}
7584
7585/* This is sort of inverse to pa_encode_section_info.  */
7586
7587static const char *
7588pa_strip_name_encoding (const char *str)
7589{
7590  str += (*str == '@');
7591  str += (*str == '*');
7592  return str;
7593}
7594
7595int
7596function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7597{
7598  return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7599}
7600
7601/* Returns 1 if OP is a function label involved in a simple addition
7602   with a constant.  Used to keep certain patterns from matching
7603   during instruction combination.  */
7604int
7605is_function_label_plus_const (rtx op)
7606{
7607  /* Strip off any CONST.  */
7608  if (GET_CODE (op) == CONST)
7609    op = XEXP (op, 0);
7610
7611  return (GET_CODE (op) == PLUS
7612	  && function_label_operand (XEXP (op, 0), Pmode)
7613	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
7614}
7615
7616/* Output assembly code for a thunk to FUNCTION.  */
7617
7618static void
7619pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7620			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7621			tree function)
7622{
7623  static unsigned int current_thunk_number;
7624  int val_14 = VAL_14_BITS_P (delta);
7625  int nbytes = 0;
7626  char label[16];
7627  rtx xoperands[4];
7628
7629  xoperands[0] = XEXP (DECL_RTL (function), 0);
7630  xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
7631  xoperands[2] = GEN_INT (delta);
7632
7633  ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
7634  fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7635
7636  /* Output the thunk.  We know that the function is in the same
7637     translation unit (i.e., the same space) as the thunk, and that
7638     thunks are output after their method.  Thus, we don't need an
7639     external branch to reach the function.  With SOM and GAS,
7640     functions and thunks are effectively in different sections.
7641     Thus, we can always use a IA-relative branch and the linker
7642     will add a long branch stub if necessary.
7643
7644     However, we have to be careful when generating PIC code on the
7645     SOM port to ensure that the sequence does not transfer to an
7646     import stub for the target function as this could clobber the
7647     return value saved at SP-24.  This would also apply to the
7648     32-bit linux port if the multi-space model is implemented.  */
7649  if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7650       && !(flag_pic && TREE_PUBLIC (function))
7651       && (TARGET_GAS || last_address < 262132))
7652      || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7653	  && ((targetm.have_named_sections
7654	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
7655	       /* The GNU 64-bit linker has rather poor stub management.
7656		  So, we use a long branch from thunks that aren't in
7657		  the same section as the target function.  */
7658	       && ((!TARGET_64BIT
7659		    && (DECL_SECTION_NAME (thunk_fndecl)
7660			!= DECL_SECTION_NAME (function)))
7661		   || ((DECL_SECTION_NAME (thunk_fndecl)
7662			== DECL_SECTION_NAME (function))
7663		       && last_address < 262132)))
7664	      || (!targetm.have_named_sections && last_address < 262132))))
7665    {
7666      if (!val_14)
7667	output_asm_insn ("addil L'%2,%%r26", xoperands);
7668
7669      output_asm_insn ("b %0", xoperands);
7670
7671      if (val_14)
7672	{
7673	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7674	  nbytes += 8;
7675	}
7676      else
7677	{
7678	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7679	  nbytes += 12;
7680	}
7681    }
7682  else if (TARGET_64BIT)
7683    {
7684      /* We only have one call-clobbered scratch register, so we can't
7685         make use of the delay slot if delta doesn't fit in 14 bits.  */
7686      if (!val_14)
7687	{
7688	  output_asm_insn ("addil L'%2,%%r26", xoperands);
7689	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7690	}
7691
7692      output_asm_insn ("b,l .+8,%%r1", xoperands);
7693
7694      if (TARGET_GAS)
7695	{
7696	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7697	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7698	}
7699      else
7700	{
7701	  xoperands[3] = GEN_INT (val_14 ? 8 : 16);
7702	  output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
7703	}
7704
7705      if (val_14)
7706	{
7707	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7708	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7709	  nbytes += 20;
7710	}
7711      else
7712	{
7713	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
7714	  nbytes += 24;
7715	}
7716    }
7717  else if (TARGET_PORTABLE_RUNTIME)
7718    {
7719      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7720      output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
7721
7722      if (!val_14)
7723	output_asm_insn ("addil L'%2,%%r26", xoperands);
7724
7725      output_asm_insn ("bv %%r0(%%r22)", xoperands);
7726
7727      if (val_14)
7728	{
7729	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7730	  nbytes += 16;
7731	}
7732      else
7733	{
7734	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7735	  nbytes += 20;
7736	}
7737    }
7738  else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7739    {
7740      /* The function is accessible from outside this module.  The only
7741	 way to avoid an import stub between the thunk and function is to
7742	 call the function directly with an indirect sequence similar to
7743	 that used by $$dyncall.  This is possible because $$dyncall acts
7744	 as the import stub in an indirect call.  */
7745      ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7746      xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
7747      output_asm_insn ("addil LT'%3,%%r19", xoperands);
7748      output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
7749      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7750      output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
7751      output_asm_insn ("depi 0,31,2,%%r22", xoperands);
7752      output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
7753      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7754
7755      if (!val_14)
7756	{
7757	  output_asm_insn ("addil L'%2,%%r26", xoperands);
7758	  nbytes += 4;
7759	}
7760
7761      if (TARGET_PA_20)
7762	{
7763	  output_asm_insn ("bve (%%r22)", xoperands);
7764	  nbytes += 36;
7765	}
7766      else if (TARGET_NO_SPACE_REGS)
7767	{
7768	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
7769	  nbytes += 36;
7770	}
7771      else
7772	{
7773	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
7774	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
7775	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
7776	  nbytes += 44;
7777	}
7778
7779      if (val_14)
7780	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7781      else
7782	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7783    }
7784  else if (flag_pic)
7785    {
7786      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7787
7788      if (TARGET_SOM || !TARGET_GAS)
7789	{
7790	  output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
7791	  output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
7792	}
7793      else
7794	{
7795	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7796	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
7797	}
7798
7799      if (!val_14)
7800	output_asm_insn ("addil L'%2,%%r26", xoperands);
7801
7802      output_asm_insn ("bv %%r0(%%r22)", xoperands);
7803
7804      if (val_14)
7805	{
7806	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7807	  nbytes += 20;
7808	}
7809      else
7810	{
7811	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7812	  nbytes += 24;
7813	}
7814    }
7815  else
7816    {
7817      if (!val_14)
7818	output_asm_insn ("addil L'%2,%%r26", xoperands);
7819
7820      output_asm_insn ("ldil L'%0,%%r22", xoperands);
7821      output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
7822
7823      if (val_14)
7824	{
7825	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7826	  nbytes += 12;
7827	}
7828      else
7829	{
7830	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7831	  nbytes += 16;
7832	}
7833    }
7834
7835  fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7836
7837  if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7838    {
7839      data_section ();
7840      output_asm_insn (".align 4", xoperands);
7841      ASM_OUTPUT_LABEL (file, label);
7842      output_asm_insn (".word P'%0", xoperands);
7843    }
7844  else if (TARGET_SOM && TARGET_GAS)
7845    forget_section ();
7846
7847  current_thunk_number++;
7848  nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7849	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7850  last_address += nbytes;
7851  update_total_code_bytes (nbytes);
7852}
7853
7854/* Only direct calls to static functions are allowed to be sibling (tail)
7855   call optimized.
7856
7857   This restriction is necessary because some linker generated stubs will
7858   store return pointers into rp' in some cases which might clobber a
7859   live value already in rp'.
7860
7861   In a sibcall the current function and the target function share stack
7862   space.  Thus if the path to the current function and the path to the
7863   target function save a value in rp', they save the value into the
7864   same stack slot, which has undesirable consequences.
7865
7866   Because of the deferred binding nature of shared libraries any function
7867   with external scope could be in a different load module and thus require
7868   rp' to be saved when calling that function.  So sibcall optimizations
7869   can only be safe for static function.
7870
7871   Note that GCC never needs return value relocations, so we don't have to
7872   worry about static calls with return value relocations (which require
7873   saving rp').
7874
7875   It is safe to perform a sibcall optimization when the target function
7876   will never return.  */
7877static bool
7878pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7879{
7880  if (TARGET_PORTABLE_RUNTIME)
7881    return false;
7882
7883  /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7884     single subspace mode and the call is not indirect.  As far as I know,
7885     there is no operating system support for the multiple subspace mode.
7886     It might be possible to support indirect calls if we didn't use
7887     $$dyncall (see the indirect sequence generated in output_call).  */
7888  if (TARGET_ELF32)
7889    return (decl != NULL_TREE);
7890
7891  /* Sibcalls are not ok because the arg pointer register is not a fixed
7892     register.  This prevents the sibcall optimization from occurring.  In
7893     addition, there are problems with stub placement using GNU ld.  This
7894     is because a normal sibcall branch uses a 17-bit relocation while
7895     a regular call branch uses a 22-bit relocation.  As a result, more
7896     care needs to be taken in the placement of long-branch stubs.  */
7897  if (TARGET_64BIT)
7898    return false;
7899
7900  /* Sibcalls are only ok within a translation unit.  */
7901  return (decl && !TREE_PUBLIC (decl));
7902}
7903
7904/* ??? Addition is not commutative on the PA due to the weird implicit
7905   space register selection rules for memory addresses.  Therefore, we
7906   don't consider a + b == b + a, as this might be inside a MEM.  */
7907static bool
7908pa_commutative_p (rtx x, int outer_code)
7909{
7910  return (COMMUTATIVE_P (x)
7911	  && (TARGET_NO_SPACE_REGS
7912	      || (outer_code != UNKNOWN && outer_code != MEM)
7913	      || GET_CODE (x) != PLUS));
7914}
7915
7916/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7917   use in fmpyadd instructions.  */
7918int
7919fmpyaddoperands (rtx *operands)
7920{
7921  enum machine_mode mode = GET_MODE (operands[0]);
7922
7923  /* Must be a floating point mode.  */
7924  if (mode != SFmode && mode != DFmode)
7925    return 0;
7926
7927  /* All modes must be the same.  */
7928  if (! (mode == GET_MODE (operands[1])
7929	 && mode == GET_MODE (operands[2])
7930	 && mode == GET_MODE (operands[3])
7931	 && mode == GET_MODE (operands[4])
7932	 && mode == GET_MODE (operands[5])))
7933    return 0;
7934
7935  /* All operands must be registers.  */
7936  if (! (GET_CODE (operands[1]) == REG
7937	 && GET_CODE (operands[2]) == REG
7938	 && GET_CODE (operands[3]) == REG
7939	 && GET_CODE (operands[4]) == REG
7940	 && GET_CODE (operands[5]) == REG))
7941    return 0;
7942
7943  /* Only 2 real operands to the addition.  One of the input operands must
7944     be the same as the output operand.  */
7945  if (! rtx_equal_p (operands[3], operands[4])
7946      && ! rtx_equal_p (operands[3], operands[5]))
7947    return 0;
7948
7949  /* Inout operand of add cannot conflict with any operands from multiply.  */
7950  if (rtx_equal_p (operands[3], operands[0])
7951     || rtx_equal_p (operands[3], operands[1])
7952     || rtx_equal_p (operands[3], operands[2]))
7953    return 0;
7954
7955  /* multiply cannot feed into addition operands.  */
7956  if (rtx_equal_p (operands[4], operands[0])
7957      || rtx_equal_p (operands[5], operands[0]))
7958    return 0;
7959
7960  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
7961  if (mode == SFmode
7962      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7963	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7964	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7965	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7966	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7967	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7968    return 0;
7969
7970  /* Passed.  Operands are suitable for fmpyadd.  */
7971  return 1;
7972}
7973
7974#if !defined(USE_COLLECT2)
7975static void
7976pa_asm_out_constructor (rtx symbol, int priority)
7977{
7978  if (!function_label_operand (symbol, VOIDmode))
7979    hppa_encode_label (symbol);
7980
7981#ifdef CTORS_SECTION_ASM_OP
7982  default_ctor_section_asm_out_constructor (symbol, priority);
7983#else
7984# ifdef TARGET_ASM_NAMED_SECTION
7985  default_named_section_asm_out_constructor (symbol, priority);
7986# else
7987  default_stabs_asm_out_constructor (symbol, priority);
7988# endif
7989#endif
7990}
7991
7992static void
7993pa_asm_out_destructor (rtx symbol, int priority)
7994{
7995  if (!function_label_operand (symbol, VOIDmode))
7996    hppa_encode_label (symbol);
7997
7998#ifdef DTORS_SECTION_ASM_OP
7999  default_dtor_section_asm_out_destructor (symbol, priority);
8000#else
8001# ifdef TARGET_ASM_NAMED_SECTION
8002  default_named_section_asm_out_destructor (symbol, priority);
8003# else
8004  default_stabs_asm_out_destructor (symbol, priority);
8005# endif
8006#endif
8007}
8008#endif
8009
8010/* This function places uninitialized global data in the bss section.
8011   The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8012   function on the SOM port to prevent uninitialized global data from
8013   being placed in the data section.  */
8014
8015void
8016pa_asm_output_aligned_bss (FILE *stream,
8017			   const char *name,
8018			   unsigned HOST_WIDE_INT size,
8019			   unsigned int align)
8020{
8021  bss_section ();
8022  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8023
8024#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8025  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8026#endif
8027
8028#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8029  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8030#endif
8031
8032  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8033  ASM_OUTPUT_LABEL (stream, name);
8034  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8035}
8036
8037/* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8038   that doesn't allow the alignment of global common storage to be directly
8039   specified.  The SOM linker aligns common storage based on the rounded
8040   value of the NUM_BYTES parameter in the .comm directive.  It's not
8041   possible to use the .align directive as it doesn't affect the alignment
8042   of the label associated with a .comm directive.  */
8043
8044void
8045pa_asm_output_aligned_common (FILE *stream,
8046			      const char *name,
8047			      unsigned HOST_WIDE_INT size,
8048			      unsigned int align)
8049{
8050  unsigned int max_common_align;
8051
8052  max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8053  if (align > max_common_align)
8054    {
8055      warning (0, "alignment (%u) for %s exceeds maximum alignment "
8056	       "for global common data.  Using %u",
8057	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8058      align = max_common_align;
8059    }
8060
8061  bss_section ();
8062
8063  assemble_name (stream, name);
8064  fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8065           MAX (size, align / BITS_PER_UNIT));
8066}
8067
8068/* We can't use .comm for local common storage as the SOM linker effectively
8069   treats the symbol as universal and uses the same storage for local symbols
8070   with the same name in different object files.  The .block directive
8071   reserves an uninitialized block of storage.  However, it's not common
8072   storage.  Fortunately, GCC never requests common storage with the same
8073   name in any given translation unit.  */
8074
8075void
8076pa_asm_output_aligned_local (FILE *stream,
8077			     const char *name,
8078			     unsigned HOST_WIDE_INT size,
8079			     unsigned int align)
8080{
8081  bss_section ();
8082  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8083
8084#ifdef LOCAL_ASM_OP
8085  fprintf (stream, "%s", LOCAL_ASM_OP);
8086  assemble_name (stream, name);
8087  fprintf (stream, "\n");
8088#endif
8089
8090  ASM_OUTPUT_LABEL (stream, name);
8091  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8092}
8093
8094/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8095   use in fmpysub instructions.  */
8096int
8097fmpysuboperands (rtx *operands)
8098{
8099  enum machine_mode mode = GET_MODE (operands[0]);
8100
8101  /* Must be a floating point mode.  */
8102  if (mode != SFmode && mode != DFmode)
8103    return 0;
8104
8105  /* All modes must be the same.  */
8106  if (! (mode == GET_MODE (operands[1])
8107	 && mode == GET_MODE (operands[2])
8108	 && mode == GET_MODE (operands[3])
8109	 && mode == GET_MODE (operands[4])
8110	 && mode == GET_MODE (operands[5])))
8111    return 0;
8112
8113  /* All operands must be registers.  */
8114  if (! (GET_CODE (operands[1]) == REG
8115	 && GET_CODE (operands[2]) == REG
8116	 && GET_CODE (operands[3]) == REG
8117	 && GET_CODE (operands[4]) == REG
8118	 && GET_CODE (operands[5]) == REG))
8119    return 0;
8120
8121  /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8122     operation, so operands[4] must be the same as operand[3].  */
8123  if (! rtx_equal_p (operands[3], operands[4]))
8124    return 0;
8125
8126  /* multiply cannot feed into subtraction.  */
8127  if (rtx_equal_p (operands[5], operands[0]))
8128    return 0;
8129
8130  /* Inout operand of sub cannot conflict with any operands from multiply.  */
8131  if (rtx_equal_p (operands[3], operands[0])
8132     || rtx_equal_p (operands[3], operands[1])
8133     || rtx_equal_p (operands[3], operands[2]))
8134    return 0;
8135
8136  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8137  if (mode == SFmode
8138      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8139	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8140	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8141	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8142	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8143	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8144    return 0;
8145
8146  /* Passed.  Operands are suitable for fmpysub.  */
8147  return 1;
8148}
8149
8150/* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8151   constants for shadd instructions.  */
8152int
8153shadd_constant_p (int val)
8154{
8155  if (val == 2 || val == 4 || val == 8)
8156    return 1;
8157  else
8158    return 0;
8159}
8160
8161/* Return 1 if OP is valid as a base or index register in a
8162   REG+REG address.  */
8163
8164int
8165borx_reg_operand (rtx op, enum machine_mode mode)
8166{
8167  if (GET_CODE (op) != REG)
8168    return 0;
8169
8170  /* We must reject virtual registers as the only expressions that
8171     can be instantiated are REG and REG+CONST.  */
8172  if (op == virtual_incoming_args_rtx
8173      || op == virtual_stack_vars_rtx
8174      || op == virtual_stack_dynamic_rtx
8175      || op == virtual_outgoing_args_rtx
8176      || op == virtual_cfa_rtx)
8177    return 0;
8178
8179  /* While it's always safe to index off the frame pointer, it's not
8180     profitable to do so when the frame pointer is being eliminated.  */
8181  if (!reload_completed
8182      && flag_omit_frame_pointer
8183      && !current_function_calls_alloca
8184      && op == frame_pointer_rtx)
8185    return 0;
8186
8187  return register_operand (op, mode);
8188}
8189
8190/* Return 1 if this operand is anything other than a hard register.  */
8191
8192int
8193non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8194{
8195  return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8196}
8197
8198/* Return 1 if INSN branches forward.  Should be using insn_addresses
8199   to avoid walking through all the insns...  */
8200static int
8201forward_branch_p (rtx insn)
8202{
8203  rtx label = JUMP_LABEL (insn);
8204
8205  while (insn)
8206    {
8207      if (insn == label)
8208	break;
8209      else
8210	insn = NEXT_INSN (insn);
8211    }
8212
8213  return (insn == label);
8214}
8215
8216/* Return 1 if OP is an equality comparison, else return 0.  */
8217int
8218eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8219{
8220  return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8221}
8222
8223/* Return 1 if INSN is in the delay slot of a call instruction.  */
8224int
8225jump_in_call_delay (rtx insn)
8226{
8227
8228  if (GET_CODE (insn) != JUMP_INSN)
8229    return 0;
8230
8231  if (PREV_INSN (insn)
8232      && PREV_INSN (PREV_INSN (insn))
8233      && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8234    {
8235      rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8236
8237      return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8238	      && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8239
8240    }
8241  else
8242    return 0;
8243}
8244
8245/* Output an unconditional move and branch insn.  */
8246
8247const char *
8248output_parallel_movb (rtx *operands, int length)
8249{
8250  /* These are the cases in which we win.  */
8251  if (length == 4)
8252    return "mov%I1b,tr %1,%0,%2";
8253
8254  /* None of these cases wins, but they don't lose either.  */
8255  if (dbr_sequence_length () == 0)
8256    {
8257      /* Nothing in the delay slot, fake it by putting the combined
8258	 insn (the copy or add) in the delay slot of a bl.  */
8259      if (GET_CODE (operands[1]) == CONST_INT)
8260	return "b %2\n\tldi %1,%0";
8261      else
8262	return "b %2\n\tcopy %1,%0";
8263    }
8264  else
8265    {
8266      /* Something in the delay slot, but we've got a long branch.  */
8267      if (GET_CODE (operands[1]) == CONST_INT)
8268	return "ldi %1,%0\n\tb %2";
8269      else
8270	return "copy %1,%0\n\tb %2";
8271    }
8272}
8273
8274/* Output an unconditional add and branch insn.  */
8275
8276const char *
8277output_parallel_addb (rtx *operands, int length)
8278{
8279  /* To make life easy we want operand0 to be the shared input/output
8280     operand and operand1 to be the readonly operand.  */
8281  if (operands[0] == operands[1])
8282    operands[1] = operands[2];
8283
8284  /* These are the cases in which we win.  */
8285  if (length == 4)
8286    return "add%I1b,tr %1,%0,%3";
8287
8288  /* None of these cases win, but they don't lose either.  */
8289  if (dbr_sequence_length () == 0)
8290    {
8291      /* Nothing in the delay slot, fake it by putting the combined
8292	 insn (the copy or add) in the delay slot of a bl.  */
8293      return "b %3\n\tadd%I1 %1,%0,%0";
8294    }
8295  else
8296    {
8297      /* Something in the delay slot, but we've got a long branch.  */
8298      return "add%I1 %1,%0,%0\n\tb %3";
8299    }
8300}
8301
8302/* Return nonzero if INSN (a jump insn) immediately follows a call
8303   to a named function.  This is used to avoid filling the delay slot
8304   of the jump since it can usually be eliminated by modifying RP in
8305   the delay slot of the call.  */
8306
8307int
8308following_call (rtx insn)
8309{
8310  if (! TARGET_JUMP_IN_DELAY)
8311    return 0;
8312
8313  /* Find the previous real insn, skipping NOTEs.  */
8314  insn = PREV_INSN (insn);
8315  while (insn && GET_CODE (insn) == NOTE)
8316    insn = PREV_INSN (insn);
8317
8318  /* Check for CALL_INSNs and millicode calls.  */
8319  if (insn
8320      && ((GET_CODE (insn) == CALL_INSN
8321	   && get_attr_type (insn) != TYPE_DYNCALL)
8322	  || (GET_CODE (insn) == INSN
8323	      && GET_CODE (PATTERN (insn)) != SEQUENCE
8324	      && GET_CODE (PATTERN (insn)) != USE
8325	      && GET_CODE (PATTERN (insn)) != CLOBBER
8326	      && get_attr_type (insn) == TYPE_MILLI)))
8327    return 1;
8328
8329  return 0;
8330}
8331
8332/* We use this hook to perform a PA specific optimization which is difficult
8333   to do in earlier passes.
8334
8335   We want the delay slots of branches within jump tables to be filled.
8336   None of the compiler passes at the moment even has the notion that a
8337   PA jump table doesn't contain addresses, but instead contains actual
8338   instructions!
8339
8340   Because we actually jump into the table, the addresses of each entry
8341   must stay constant in relation to the beginning of the table (which
8342   itself must stay constant relative to the instruction to jump into
8343   it).  I don't believe we can guarantee earlier passes of the compiler
8344   will adhere to those rules.
8345
8346   So, late in the compilation process we find all the jump tables, and
8347   expand them into real code -- e.g. each entry in the jump table vector
8348   will get an appropriate label followed by a jump to the final target.
8349
8350   Reorg and the final jump pass can then optimize these branches and
8351   fill their delay slots.  We end up with smaller, more efficient code.
8352
8353   The jump instructions within the table are special; we must be able
8354   to identify them during assembly output (if the jumps don't get filled
8355   we need to emit a nop rather than nullifying the delay slot)).  We
8356   identify jumps in switch tables by using insns with the attribute
8357   type TYPE_BTABLE_BRANCH.
8358
8359   We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8360   insns.  This serves two purposes, first it prevents jump.c from
8361   noticing that the last N entries in the table jump to the instruction
8362   immediately after the table and deleting the jumps.  Second, those
8363   insns mark where we should emit .begin_brtab and .end_brtab directives
8364   when using GAS (allows for better link time optimizations).  */
8365
8366static void
8367pa_reorg (void)
8368{
8369  rtx insn;
8370
8371  remove_useless_addtr_insns (1);
8372
8373  if (pa_cpu < PROCESSOR_8000)
8374    pa_combine_instructions ();
8375
8376
8377  /* This is fairly cheap, so always run it if optimizing.  */
8378  if (optimize > 0 && !TARGET_BIG_SWITCH)
8379    {
8380      /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns.  */
8381      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8382	{
8383	  rtx pattern, tmp, location, label;
8384	  unsigned int length, i;
8385
8386	  /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode.  */
8387	  if (GET_CODE (insn) != JUMP_INSN
8388	      || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8389		  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8390	    continue;
8391
8392	  /* Emit marker for the beginning of the branch table.  */
8393	  emit_insn_before (gen_begin_brtab (), insn);
8394
8395	  pattern = PATTERN (insn);
8396	  location = PREV_INSN (insn);
8397          length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8398
8399	  for (i = 0; i < length; i++)
8400	    {
8401	      /* Emit a label before each jump to keep jump.c from
8402		 removing this code.  */
8403	      tmp = gen_label_rtx ();
8404	      LABEL_NUSES (tmp) = 1;
8405	      emit_label_after (tmp, location);
8406	      location = NEXT_INSN (location);
8407
8408	      if (GET_CODE (pattern) == ADDR_VEC)
8409		label = XEXP (XVECEXP (pattern, 0, i), 0);
8410	      else
8411		label = XEXP (XVECEXP (pattern, 1, i), 0);
8412
8413	      tmp = gen_short_jump (label);
8414
8415	      /* Emit the jump itself.  */
8416	      tmp = emit_jump_insn_after (tmp, location);
8417	      JUMP_LABEL (tmp) = label;
8418	      LABEL_NUSES (label)++;
8419	      location = NEXT_INSN (location);
8420
8421	      /* Emit a BARRIER after the jump.  */
8422	      emit_barrier_after (location);
8423	      location = NEXT_INSN (location);
8424	    }
8425
8426	  /* Emit marker for the end of the branch table.  */
8427	  emit_insn_before (gen_end_brtab (), location);
8428	  location = NEXT_INSN (location);
8429	  emit_barrier_after (location);
8430
8431	  /* Delete the ADDR_VEC or ADDR_DIFF_VEC.  */
8432	  delete_insn (insn);
8433	}
8434    }
8435  else
8436    {
8437      /* Still need brtab marker insns.  FIXME: the presence of these
8438	 markers disables output of the branch table to readonly memory,
8439	 and any alignment directives that might be needed.  Possibly,
8440	 the begin_brtab insn should be output before the label for the
8441	 table.  This doesn't matter at the moment since the tables are
8442	 always output in the text section.  */
8443      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8444	{
8445	  /* Find an ADDR_VEC insn.  */
8446	  if (GET_CODE (insn) != JUMP_INSN
8447	      || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8448		  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8449	    continue;
8450
8451	  /* Now generate markers for the beginning and end of the
8452	     branch table.  */
8453	  emit_insn_before (gen_begin_brtab (), insn);
8454	  emit_insn_after (gen_end_brtab (), insn);
8455	}
8456    }
8457}
8458
8459/* The PA has a number of odd instructions which can perform multiple
8460   tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8461   it may be profitable to combine two instructions into one instruction
8462   with two outputs.  It's not profitable PA2.0 machines because the
8463   two outputs would take two slots in the reorder buffers.
8464
8465   This routine finds instructions which can be combined and combines
8466   them.  We only support some of the potential combinations, and we
8467   only try common ways to find suitable instructions.
8468
8469      * addb can add two registers or a register and a small integer
8470      and jump to a nearby (+-8k) location.  Normally the jump to the
8471      nearby location is conditional on the result of the add, but by
8472      using the "true" condition we can make the jump unconditional.
8473      Thus addb can perform two independent operations in one insn.
8474
8475      * movb is similar to addb in that it can perform a reg->reg
8476      or small immediate->reg copy and jump to a nearby (+-8k location).
8477
8478      * fmpyadd and fmpysub can perform a FP multiply and either an
8479      FP add or FP sub if the operands of the multiply and add/sub are
8480      independent (there are other minor restrictions).  Note both
8481      the fmpy and fadd/fsub can in theory move to better spots according
8482      to data dependencies, but for now we require the fmpy stay at a
8483      fixed location.
8484
8485      * Many of the memory operations can perform pre & post updates
8486      of index registers.  GCC's pre/post increment/decrement addressing
8487      is far too simple to take advantage of all the possibilities.  This
8488      pass may not be suitable since those insns may not be independent.
8489
8490      * comclr can compare two ints or an int and a register, nullify
8491      the following instruction and zero some other register.  This
8492      is more difficult to use as it's harder to find an insn which
8493      will generate a comclr than finding something like an unconditional
8494      branch.  (conditional moves & long branches create comclr insns).
8495
8496      * Most arithmetic operations can conditionally skip the next
8497      instruction.  They can be viewed as "perform this operation
8498      and conditionally jump to this nearby location" (where nearby
8499      is an insns away).  These are difficult to use due to the
8500      branch length restrictions.  */
8501
8502static void
8503pa_combine_instructions (void)
8504{
8505  rtx anchor, new;
8506
8507  /* This can get expensive since the basic algorithm is on the
8508     order of O(n^2) (or worse).  Only do it for -O2 or higher
8509     levels of optimization.  */
8510  if (optimize < 2)
8511    return;
8512
8513  /* Walk down the list of insns looking for "anchor" insns which
8514     may be combined with "floating" insns.  As the name implies,
8515     "anchor" instructions don't move, while "floating" insns may
8516     move around.  */
8517  new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8518  new = make_insn_raw (new);
8519
8520  for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8521    {
8522      enum attr_pa_combine_type anchor_attr;
8523      enum attr_pa_combine_type floater_attr;
8524
8525      /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8526	 Also ignore any special USE insns.  */
8527      if ((GET_CODE (anchor) != INSN
8528	  && GET_CODE (anchor) != JUMP_INSN
8529	  && GET_CODE (anchor) != CALL_INSN)
8530	  || GET_CODE (PATTERN (anchor)) == USE
8531	  || GET_CODE (PATTERN (anchor)) == CLOBBER
8532	  || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8533	  || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8534	continue;
8535
8536      anchor_attr = get_attr_pa_combine_type (anchor);
8537      /* See if anchor is an insn suitable for combination.  */
8538      if (anchor_attr == PA_COMBINE_TYPE_FMPY
8539	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8540	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8541	      && ! forward_branch_p (anchor)))
8542	{
8543	  rtx floater;
8544
8545	  for (floater = PREV_INSN (anchor);
8546	       floater;
8547	       floater = PREV_INSN (floater))
8548	    {
8549	      if (GET_CODE (floater) == NOTE
8550		  || (GET_CODE (floater) == INSN
8551		      && (GET_CODE (PATTERN (floater)) == USE
8552			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
8553		continue;
8554
8555	      /* Anything except a regular INSN will stop our search.  */
8556	      if (GET_CODE (floater) != INSN
8557		  || GET_CODE (PATTERN (floater)) == ADDR_VEC
8558		  || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8559		{
8560		  floater = NULL_RTX;
8561		  break;
8562		}
8563
8564	      /* See if FLOATER is suitable for combination with the
8565		 anchor.  */
8566	      floater_attr = get_attr_pa_combine_type (floater);
8567	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8568		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8569		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8570		      && floater_attr == PA_COMBINE_TYPE_FMPY))
8571		{
8572		  /* If ANCHOR and FLOATER can be combined, then we're
8573		     done with this pass.  */
8574		  if (pa_can_combine_p (new, anchor, floater, 0,
8575					SET_DEST (PATTERN (floater)),
8576					XEXP (SET_SRC (PATTERN (floater)), 0),
8577					XEXP (SET_SRC (PATTERN (floater)), 1)))
8578		    break;
8579		}
8580
8581	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8582		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8583		{
8584		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8585		    {
8586		      if (pa_can_combine_p (new, anchor, floater, 0,
8587					    SET_DEST (PATTERN (floater)),
8588					XEXP (SET_SRC (PATTERN (floater)), 0),
8589					XEXP (SET_SRC (PATTERN (floater)), 1)))
8590			break;
8591		    }
8592		  else
8593		    {
8594		      if (pa_can_combine_p (new, anchor, floater, 0,
8595					    SET_DEST (PATTERN (floater)),
8596					    SET_SRC (PATTERN (floater)),
8597					    SET_SRC (PATTERN (floater))))
8598			break;
8599		    }
8600		}
8601	    }
8602
8603	  /* If we didn't find anything on the backwards scan try forwards.  */
8604	  if (!floater
8605	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
8606		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8607	    {
8608	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
8609		{
8610		  if (GET_CODE (floater) == NOTE
8611		      || (GET_CODE (floater) == INSN
8612			  && (GET_CODE (PATTERN (floater)) == USE
8613			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
8614
8615		    continue;
8616
8617		  /* Anything except a regular INSN will stop our search.  */
8618		  if (GET_CODE (floater) != INSN
8619		      || GET_CODE (PATTERN (floater)) == ADDR_VEC
8620		      || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8621		    {
8622		      floater = NULL_RTX;
8623		      break;
8624		    }
8625
8626		  /* See if FLOATER is suitable for combination with the
8627		     anchor.  */
8628		  floater_attr = get_attr_pa_combine_type (floater);
8629		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8630		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8631		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8632			  && floater_attr == PA_COMBINE_TYPE_FMPY))
8633		    {
8634		      /* If ANCHOR and FLOATER can be combined, then we're
8635			 done with this pass.  */
8636		      if (pa_can_combine_p (new, anchor, floater, 1,
8637					    SET_DEST (PATTERN (floater)),
8638					    XEXP (SET_SRC (PATTERN (floater)),
8639						  0),
8640					    XEXP (SET_SRC (PATTERN (floater)),
8641						  1)))
8642			break;
8643		    }
8644		}
8645	    }
8646
8647	  /* FLOATER will be nonzero if we found a suitable floating
8648	     insn for combination with ANCHOR.  */
8649	  if (floater
8650	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8651		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
8652	    {
8653	      /* Emit the new instruction and delete the old anchor.  */
8654	      emit_insn_before (gen_rtx_PARALLEL
8655				(VOIDmode,
8656				 gen_rtvec (2, PATTERN (anchor),
8657					    PATTERN (floater))),
8658				anchor);
8659
8660	      PUT_CODE (anchor, NOTE);
8661	      NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8662	      NOTE_SOURCE_FILE (anchor) = 0;
8663
8664	      /* Emit a special USE insn for FLOATER, then delete
8665		 the floating insn.  */
8666	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8667	      delete_insn (floater);
8668
8669	      continue;
8670	    }
8671	  else if (floater
8672		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8673	    {
8674	      rtx temp;
8675	      /* Emit the new_jump instruction and delete the old anchor.  */
8676	      temp
8677		= emit_jump_insn_before (gen_rtx_PARALLEL
8678					 (VOIDmode,
8679					  gen_rtvec (2, PATTERN (anchor),
8680						     PATTERN (floater))),
8681					 anchor);
8682
8683	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8684	      PUT_CODE (anchor, NOTE);
8685	      NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8686	      NOTE_SOURCE_FILE (anchor) = 0;
8687
8688	      /* Emit a special USE insn for FLOATER, then delete
8689		 the floating insn.  */
8690	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8691	      delete_insn (floater);
8692	      continue;
8693	    }
8694	}
8695    }
8696}
8697
8698static int
8699pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8700		  rtx src1, rtx src2)
8701{
8702  int insn_code_number;
8703  rtx start, end;
8704
8705  /* Create a PARALLEL with the patterns of ANCHOR and
8706     FLOATER, try to recognize it, then test constraints
8707     for the resulting pattern.
8708
8709     If the pattern doesn't match or the constraints
8710     aren't met keep searching for a suitable floater
8711     insn.  */
8712  XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8713  XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8714  INSN_CODE (new) = -1;
8715  insn_code_number = recog_memoized (new);
8716  if (insn_code_number < 0
8717      || (extract_insn (new), ! constrain_operands (1)))
8718    return 0;
8719
8720  if (reversed)
8721    {
8722      start = anchor;
8723      end = floater;
8724    }
8725  else
8726    {
8727      start = floater;
8728      end = anchor;
8729    }
8730
8731  /* There's up to three operands to consider.  One
8732     output and two inputs.
8733
8734     The output must not be used between FLOATER & ANCHOR
8735     exclusive.  The inputs must not be set between
8736     FLOATER and ANCHOR exclusive.  */
8737
8738  if (reg_used_between_p (dest, start, end))
8739    return 0;
8740
8741  if (reg_set_between_p (src1, start, end))
8742    return 0;
8743
8744  if (reg_set_between_p (src2, start, end))
8745    return 0;
8746
8747  /* If we get here, then everything is good.  */
8748  return 1;
8749}
8750
8751/* Return nonzero if references for INSN are delayed.
8752
8753   Millicode insns are actually function calls with some special
8754   constraints on arguments and register usage.
8755
8756   Millicode calls always expect their arguments in the integer argument
8757   registers, and always return their result in %r29 (ret1).  They
8758   are expected to clobber their arguments, %r1, %r29, and the return
8759   pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8760
8761   This function tells reorg that the references to arguments and
8762   millicode calls do not appear to happen until after the millicode call.
8763   This allows reorg to put insns which set the argument registers into the
8764   delay slot of the millicode call -- thus they act more like traditional
8765   CALL_INSNs.
8766
8767   Note we cannot consider side effects of the insn to be delayed because
8768   the branch and link insn will clobber the return pointer.  If we happened
8769   to use the return pointer in the delay slot of the call, then we lose.
8770
8771   get_attr_type will try to recognize the given insn, so make sure to
8772   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8773   in particular.  */
8774int
8775insn_refs_are_delayed (rtx insn)
8776{
8777  return ((GET_CODE (insn) == INSN
8778	   && GET_CODE (PATTERN (insn)) != SEQUENCE
8779	   && GET_CODE (PATTERN (insn)) != USE
8780	   && GET_CODE (PATTERN (insn)) != CLOBBER
8781	   && get_attr_type (insn) == TYPE_MILLI));
8782}
8783
8784/* On the HP-PA the value is found in register(s) 28(-29), unless
8785   the mode is SF or DF. Then the value is returned in fr4 (32).
8786
8787   This must perform the same promotions as PROMOTE_MODE, else
8788   TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
8789
8790   Small structures must be returned in a PARALLEL on PA64 in order
8791   to match the HP Compiler ABI.  */
8792
8793rtx
8794function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8795{
8796  enum machine_mode valmode;
8797
8798  if (AGGREGATE_TYPE_P (valtype)
8799      || TREE_CODE (valtype) == COMPLEX_TYPE
8800      || TREE_CODE (valtype) == VECTOR_TYPE)
8801    {
8802      if (TARGET_64BIT)
8803	{
8804          /* Aggregates with a size less than or equal to 128 bits are
8805	     returned in GR 28(-29).  They are left justified.  The pad
8806	     bits are undefined.  Larger aggregates are returned in
8807	     memory.  */
8808	  rtx loc[2];
8809	  int i, offset = 0;
8810	  int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8811
8812	  for (i = 0; i < ub; i++)
8813	    {
8814	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8815					  gen_rtx_REG (DImode, 28 + i),
8816					  GEN_INT (offset));
8817	      offset += 8;
8818	    }
8819
8820	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8821	}
8822      else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
8823	{
8824	  /* Aggregates 5 to 8 bytes in size are returned in general
8825	     registers r28-r29 in the same manner as other non
8826	     floating-point objects.  The data is right-justified and
8827	     zero-extended to 64 bits.  This is opposite to the normal
8828	     justification used on big endian targets and requires
8829	     special treatment.  */
8830	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8831				       gen_rtx_REG (DImode, 28), const0_rtx);
8832	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
8833	}
8834    }
8835
8836  if ((INTEGRAL_TYPE_P (valtype)
8837       && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8838      || POINTER_TYPE_P (valtype))
8839    valmode = word_mode;
8840  else
8841    valmode = TYPE_MODE (valtype);
8842
8843  if (TREE_CODE (valtype) == REAL_TYPE
8844      && !AGGREGATE_TYPE_P (valtype)
8845      && TYPE_MODE (valtype) != TFmode
8846      && !TARGET_SOFT_FLOAT)
8847    return gen_rtx_REG (valmode, 32);
8848
8849  return gen_rtx_REG (valmode, 28);
8850}
8851
8852/* Return the location of a parameter that is passed in a register or NULL
8853   if the parameter has any component that is passed in memory.
8854
8855   This is new code and will be pushed to into the net sources after
8856   further testing.
8857
8858   ??? We might want to restructure this so that it looks more like other
8859   ports.  */
8860rtx
8861function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
8862	      int named ATTRIBUTE_UNUSED)
8863{
8864  int max_arg_words = (TARGET_64BIT ? 8 : 4);
8865  int alignment = 0;
8866  int arg_size;
8867  int fpr_reg_base;
8868  int gpr_reg_base;
8869  rtx retval;
8870
8871  if (mode == VOIDmode)
8872    return NULL_RTX;
8873
8874  arg_size = FUNCTION_ARG_SIZE (mode, type);
8875
8876  /* If this arg would be passed partially or totally on the stack, then
8877     this routine should return zero.  pa_arg_partial_bytes will
8878     handle arguments which are split between regs and stack slots if
8879     the ABI mandates split arguments.  */
8880  if (!TARGET_64BIT)
8881    {
8882      /* The 32-bit ABI does not split arguments.  */
8883      if (cum->words + arg_size > max_arg_words)
8884	return NULL_RTX;
8885    }
8886  else
8887    {
8888      if (arg_size > 1)
8889	alignment = cum->words & 1;
8890      if (cum->words + alignment >= max_arg_words)
8891	return NULL_RTX;
8892    }
8893
8894  /* The 32bit ABIs and the 64bit ABIs are rather different,
8895     particularly in their handling of FP registers.  We might
8896     be able to cleverly share code between them, but I'm not
8897     going to bother in the hope that splitting them up results
8898     in code that is more easily understood.  */
8899
8900  if (TARGET_64BIT)
8901    {
8902      /* Advance the base registers to their current locations.
8903
8904         Remember, gprs grow towards smaller register numbers while
8905	 fprs grow to higher register numbers.  Also remember that
8906	 although FP regs are 32-bit addressable, we pretend that
8907	 the registers are 64-bits wide.  */
8908      gpr_reg_base = 26 - cum->words;
8909      fpr_reg_base = 32 + cum->words;
8910
8911      /* Arguments wider than one word and small aggregates need special
8912	 treatment.  */
8913      if (arg_size > 1
8914	  || mode == BLKmode
8915	  || (type && (AGGREGATE_TYPE_P (type)
8916		       || TREE_CODE (type) == COMPLEX_TYPE
8917		       || TREE_CODE (type) == VECTOR_TYPE)))
8918	{
8919	  /* Double-extended precision (80-bit), quad-precision (128-bit)
8920	     and aggregates including complex numbers are aligned on
8921	     128-bit boundaries.  The first eight 64-bit argument slots
8922	     are associated one-to-one, with general registers r26
8923	     through r19, and also with floating-point registers fr4
8924	     through fr11.  Arguments larger than one word are always
8925	     passed in general registers.
8926
8927	     Using a PARALLEL with a word mode register results in left
8928	     justified data on a big-endian target.  */
8929
8930	  rtx loc[8];
8931	  int i, offset = 0, ub = arg_size;
8932
8933	  /* Align the base register.  */
8934	  gpr_reg_base -= alignment;
8935
8936	  ub = MIN (ub, max_arg_words - cum->words - alignment);
8937	  for (i = 0; i < ub; i++)
8938	    {
8939	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8940					  gen_rtx_REG (DImode, gpr_reg_base),
8941					  GEN_INT (offset));
8942	      gpr_reg_base -= 1;
8943	      offset += 8;
8944	    }
8945
8946	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8947	}
8948     }
8949  else
8950    {
8951      /* If the argument is larger than a word, then we know precisely
8952	 which registers we must use.  */
8953      if (arg_size > 1)
8954	{
8955	  if (cum->words)
8956	    {
8957	      gpr_reg_base = 23;
8958	      fpr_reg_base = 38;
8959	    }
8960	  else
8961	    {
8962	      gpr_reg_base = 25;
8963	      fpr_reg_base = 34;
8964	    }
8965
8966	  /* Structures 5 to 8 bytes in size are passed in the general
8967	     registers in the same manner as other non floating-point
8968	     objects.  The data is right-justified and zero-extended
8969	     to 64 bits.  This is opposite to the normal justification
8970	     used on big endian targets and requires special treatment.
8971	     We now define BLOCK_REG_PADDING to pad these objects.
8972	     Aggregates, complex and vector types are passed in the same
8973	     manner as structures.  */
8974	  if (mode == BLKmode
8975	      || (type && (AGGREGATE_TYPE_P (type)
8976			   || TREE_CODE (type) == COMPLEX_TYPE
8977			   || TREE_CODE (type) == VECTOR_TYPE)))
8978	    {
8979	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8980					   gen_rtx_REG (DImode, gpr_reg_base),
8981					   const0_rtx);
8982	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
8983	    }
8984	}
8985      else
8986        {
8987	   /* We have a single word (32 bits).  A simple computation
8988	      will get us the register #s we need.  */
8989	   gpr_reg_base = 26 - cum->words;
8990	   fpr_reg_base = 32 + 2 * cum->words;
8991	}
8992    }
8993
8994  /* Determine if the argument needs to be passed in both general and
8995     floating point registers.  */
8996  if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8997       /* If we are doing soft-float with portable runtime, then there
8998	  is no need to worry about FP regs.  */
8999       && !TARGET_SOFT_FLOAT
9000       /* The parameter must be some kind of scalar float, else we just
9001	  pass it in integer registers.  */
9002       && GET_MODE_CLASS (mode) == MODE_FLOAT
9003       /* The target function must not have a prototype.  */
9004       && cum->nargs_prototype <= 0
9005       /* libcalls do not need to pass items in both FP and general
9006	  registers.  */
9007       && type != NULL_TREE
9008       /* All this hair applies to "outgoing" args only.  This includes
9009	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9010       && !cum->incoming)
9011      /* Also pass outgoing floating arguments in both registers in indirect
9012	 calls with the 32 bit ABI and the HP assembler since there is no
9013	 way to the specify argument locations in static functions.  */
9014      || (!TARGET_64BIT
9015	  && !TARGET_GAS
9016	  && !cum->incoming
9017	  && cum->indirect
9018	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9019    {
9020      retval
9021	= gen_rtx_PARALLEL
9022	    (mode,
9023	     gen_rtvec (2,
9024			gen_rtx_EXPR_LIST (VOIDmode,
9025					   gen_rtx_REG (mode, fpr_reg_base),
9026					   const0_rtx),
9027			gen_rtx_EXPR_LIST (VOIDmode,
9028					   gen_rtx_REG (mode, gpr_reg_base),
9029					   const0_rtx)));
9030    }
9031  else
9032    {
9033      /* See if we should pass this parameter in a general register.  */
9034      if (TARGET_SOFT_FLOAT
9035	  /* Indirect calls in the normal 32bit ABI require all arguments
9036	     to be passed in general registers.  */
9037	  || (!TARGET_PORTABLE_RUNTIME
9038	      && !TARGET_64BIT
9039	      && !TARGET_ELF32
9040	      && cum->indirect)
9041	  /* If the parameter is not a scalar floating-point parameter,
9042	     then it belongs in GPRs.  */
9043	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9044	  /* Structure with single SFmode field belongs in GPR.  */
9045	  || (type && AGGREGATE_TYPE_P (type)))
9046	retval = gen_rtx_REG (mode, gpr_reg_base);
9047      else
9048	retval = gen_rtx_REG (mode, fpr_reg_base);
9049    }
9050  return retval;
9051}
9052
9053
9054/* If this arg would be passed totally in registers or totally on the stack,
9055   then this routine should return zero.  */
9056
9057static int
9058pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9059		      tree type, bool named ATTRIBUTE_UNUSED)
9060{
9061  unsigned int max_arg_words = 8;
9062  unsigned int offset = 0;
9063
9064  if (!TARGET_64BIT)
9065    return 0;
9066
9067  if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9068    offset = 1;
9069
9070  if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9071    /* Arg fits fully into registers.  */
9072    return 0;
9073  else if (cum->words + offset >= max_arg_words)
9074    /* Arg fully on the stack.  */
9075    return 0;
9076  else
9077    /* Arg is split.  */
9078    return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9079}
9080
9081
9082/* Return a string to output before text in the current function.
9083
9084   This function is only used with SOM.  Because we don't support
9085   named subspaces, we can only create a new subspace or switch back
9086   to the default text subspace.  */
9087const char *
9088som_text_section_asm_op (void)
9089{
9090  if (!TARGET_SOM)
9091    return "";
9092
9093  if (TARGET_GAS)
9094    {
9095      if (cfun && !cfun->machine->in_nsubspa)
9096	{
9097	  /* We only want to emit a .nsubspa directive once at the
9098	     start of the function.  */
9099	  cfun->machine->in_nsubspa = 1;
9100
9101	  /* Create a new subspace for the text.  This provides
9102	     better stub placement and one-only functions.  */
9103	  if (cfun->decl
9104	      && DECL_ONE_ONLY (cfun->decl)
9105	      && !DECL_WEAK (cfun->decl))
9106	    return
9107 "\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,SORT=24,COMDAT";
9108	}
9109      else
9110	{
9111	  /* There isn't a current function or the body of the current
9112	     function has been completed.  So, we are changing to the
9113	     text section to output debugging information.  We need to
9114	     forget that we are in the text section so that the function
9115	     text_section in varasm.c will call us the next time around.  */
9116	  forget_section ();
9117	}
9118      return "\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$";
9119    }
9120
9121  return "\t.SPACE $TEXT$\n\t.SUBSPA $CODE$";
9122}
9123
9124/* On hpux10, the linker will give an error if we have a reference
9125   in the read-only data section to a symbol defined in a shared
9126   library.  Therefore, expressions that might require a reloc can
9127   not be placed in the read-only data section.  */
9128
9129static void
9130pa_select_section (tree exp, int reloc,
9131		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9132{
9133  if (TREE_CODE (exp) == VAR_DECL
9134      && TREE_READONLY (exp)
9135      && !TREE_THIS_VOLATILE (exp)
9136      && DECL_INITIAL (exp)
9137      && (DECL_INITIAL (exp) == error_mark_node
9138          || TREE_CONSTANT (DECL_INITIAL (exp)))
9139      && !reloc)
9140    {
9141      if (TARGET_SOM
9142	  && DECL_ONE_ONLY (exp)
9143	  && !DECL_WEAK (exp))
9144	som_one_only_readonly_data_section ();
9145      else
9146	readonly_data_section ();
9147    }
9148  else if (CONSTANT_CLASS_P (exp) && !reloc)
9149    readonly_data_section ();
9150  else if (TARGET_SOM
9151	   && TREE_CODE (exp) == VAR_DECL
9152	   && DECL_ONE_ONLY (exp)
9153	   && !DECL_WEAK (exp))
9154    som_one_only_data_section ();
9155  else
9156    data_section ();
9157}
9158
9159static void
9160pa_globalize_label (FILE *stream, const char *name)
9161{
9162  /* We only handle DATA objects here, functions are globalized in
9163     ASM_DECLARE_FUNCTION_NAME.  */
9164  if (! FUNCTION_NAME_P (name))
9165  {
9166    fputs ("\t.EXPORT ", stream);
9167    assemble_name (stream, name);
9168    fputs (",DATA\n", stream);
9169  }
9170}
9171
9172/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9173
9174static rtx
9175pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9176		     int incoming ATTRIBUTE_UNUSED)
9177{
9178  return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9179}
9180
9181/* Worker function for TARGET_RETURN_IN_MEMORY.  */
9182
9183bool
9184pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9185{
9186  /* SOM ABI says that objects larger than 64 bits are returned in memory.
9187     PA64 ABI says that objects larger than 128 bits are returned in memory.
9188     Note, int_size_in_bytes can return -1 if the size of the object is
9189     variable or larger than the maximum value that can be expressed as
9190     a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9191     simplest way to handle variable and empty types is to pass them in
9192     memory.  This avoids problems in defining the boundaries of argument
9193     slots, allocating registers, etc.  */
9194  return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9195	  || int_size_in_bytes (type) <= 0);
9196}
9197
9198/* Structure to hold declaration and name of external symbols that are
9199   emitted by GCC.  We generate a vector of these symbols and output them
9200   at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9201   This avoids putting out names that are never really used.  */
9202
9203typedef struct extern_symbol GTY(())
9204{
9205  tree decl;
9206  const char *name;
9207} extern_symbol;
9208
9209/* Define gc'd vector type for extern_symbol.  */
9210DEF_VEC_O(extern_symbol);
9211DEF_VEC_ALLOC_O(extern_symbol,gc);
9212
9213/* Vector of extern_symbol pointers.  */
9214static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9215
9216#ifdef ASM_OUTPUT_EXTERNAL_REAL
9217/* Mark DECL (name NAME) as an external reference (assembler output
9218   file FILE).  This saves the names to output at the end of the file
9219   if actually referenced.  */
9220
9221void
9222pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9223{
9224  extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9225
9226  gcc_assert (file == asm_out_file);
9227  p->decl = decl;
9228  p->name = name;
9229}
9230
9231/* Output text required at the end of an assembler file.
9232   This includes deferred plabels and .import directives for
9233   all external symbols that were actually referenced.  */
9234
9235static void
9236pa_hpux_file_end (void)
9237{
9238  unsigned int i;
9239  extern_symbol *p;
9240
9241  if (!NO_DEFERRED_PROFILE_COUNTERS)
9242    output_deferred_profile_counters ();
9243
9244  output_deferred_plabels ();
9245
9246  for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9247    {
9248      tree decl = p->decl;
9249
9250      if (!TREE_ASM_WRITTEN (decl)
9251	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9252	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9253    }
9254
9255  VEC_free (extern_symbol, gc, extern_symbols);
9256}
9257#endif
9258
9259#include "gt-pa.h"
9260