1/* Output routines for GCC for ARM.
2   Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
4   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5   and Martin Simmons (@harleqn.co.uk).
6   More major hacks by Richard Earnshaw (rearnsha@arm.com).
7
8   This file is part of GCC.
9
10   GCC is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published
12   by the Free Software Foundation; either version 2, or (at your
13   option) any later version.
14
15   GCC is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
18   License for more details.
19
20   You should have received a copy of the GNU General Public License
21   along with GCC; see the file COPYING.  If not, write to
22   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23   Boston, MA 02110-1301, USA.  */
24
25#include "config.h"
26#include "system.h"
27#include "coretypes.h"
28#include "tm.h"
29#include "rtl.h"
30#include "tree.h"
31#include "obstack.h"
32#include "regs.h"
33#include "hard-reg-set.h"
34#include "real.h"
35#include "insn-config.h"
36#include "conditions.h"
37#include "output.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "reload.h"
41#include "function.h"
42#include "expr.h"
43#include "optabs.h"
44#include "toplev.h"
45#include "recog.h"
46#include "ggc.h"
47#include "except.h"
48#include "c-pragma.h"
49#include "integrate.h"
50#include "tm_p.h"
51#include "target.h"
52#include "target-def.h"
53#include "debug.h"
54#include "langhooks.h"
55
56/* Forward definitions of types.  */
57typedef struct minipool_node    Mnode;
58typedef struct minipool_fixup   Mfix;
59
60const struct attribute_spec arm_attribute_table[];
61
62/* Forward function declarations.  */
63static arm_stack_offsets *arm_get_frame_offsets (void);
64static void arm_add_gc_roots (void);
65static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
66			     HOST_WIDE_INT, rtx, rtx, int, int);
67static unsigned bit_count (unsigned long);
68static int arm_address_register_rtx_p (rtx, int);
69static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
70static int thumb_base_register_rtx_p (rtx, enum machine_mode, int);
71inline static int thumb_index_register_rtx_p (rtx, int);
72static int thumb_far_jump_used_p (void);
73static bool thumb_force_lr_save (void);
74static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
75static rtx emit_sfm (int, int);
76static int arm_size_return_regs (void);
77#ifndef AOF_ASSEMBLER
78static bool arm_assemble_integer (rtx, unsigned int, int);
79#endif
80static const char *fp_const_from_val (REAL_VALUE_TYPE *);
81static arm_cc get_arm_condition_code (rtx);
82static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
83static rtx is_jump_table (rtx);
84static const char *output_multi_immediate (rtx *, const char *, const char *,
85					   int, HOST_WIDE_INT);
86static const char *shift_op (rtx, HOST_WIDE_INT *);
87static struct machine_function *arm_init_machine_status (void);
88static void thumb_exit (FILE *, int);
89static rtx is_jump_table (rtx);
90static HOST_WIDE_INT get_jump_table_size (rtx);
91static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
92static Mnode *add_minipool_forward_ref (Mfix *);
93static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
94static Mnode *add_minipool_backward_ref (Mfix *);
95static void assign_minipool_offsets (Mfix *);
96static void arm_print_value (FILE *, rtx);
97static void dump_minipool (rtx);
98static int arm_barrier_cost (rtx);
99static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
100static void push_minipool_barrier (rtx, HOST_WIDE_INT);
101static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
102			       rtx);
103static void arm_reorg (void);
104static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
105static int current_file_function_operand (rtx);
106static unsigned long arm_compute_save_reg0_reg12_mask (void);
107static unsigned long arm_compute_save_reg_mask (void);
108static unsigned long arm_isr_value (tree);
109static unsigned long arm_compute_func_type (void);
110static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
111static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
112#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
113static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
114#endif
115static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
116static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
117static void thumb_output_function_prologue (FILE *, HOST_WIDE_INT);
118static int arm_comp_type_attributes (tree, tree);
119static void arm_set_default_type_attributes (tree);
120static int arm_adjust_cost (rtx, rtx, rtx, int);
121static int count_insns_for_constant (HOST_WIDE_INT, int);
122static int arm_get_strip_length (int);
123static bool arm_function_ok_for_sibcall (tree, tree);
124static void arm_internal_label (FILE *, const char *, unsigned long);
125static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
126				 tree);
127static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
128static bool arm_size_rtx_costs (rtx, int, int, int *);
129static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
130static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
131static bool arm_xscale_rtx_costs (rtx, int, int, int *);
132static bool arm_9e_rtx_costs (rtx, int, int, int *);
133static int arm_address_cost (rtx);
134static bool arm_memory_load_p (rtx);
135static bool arm_cirrus_insn_p (rtx);
136static void cirrus_reorg (rtx);
137static void arm_init_builtins (void);
138static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
139static void arm_init_iwmmxt_builtins (void);
140static rtx safe_vector_operand (rtx, enum machine_mode);
141static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
142static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
143static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
144static void emit_constant_insn (rtx cond, rtx pattern);
145static rtx emit_set_insn (rtx, rtx);
146static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
147				  tree, bool);
148
149#ifdef OBJECT_FORMAT_ELF
150static void arm_elf_asm_constructor (rtx, int);
151#endif
152#ifndef ARM_PE
153static void arm_encode_section_info (tree, rtx, int);
154#endif
155
156static void arm_file_end (void);
157static void arm_file_start (void);
158
159#ifdef AOF_ASSEMBLER
160static void aof_globalize_label (FILE *, const char *);
161static void aof_dump_imports (FILE *);
162static void aof_dump_pic_table (FILE *);
163static void aof_file_start (void);
164static void aof_file_end (void);
165static void aof_asm_init_sections (void);
166#endif
167static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
168					tree, int *, int);
169static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
170				   enum machine_mode, tree, bool);
171static bool arm_promote_prototypes (tree);
172static bool arm_default_short_enums (void);
173static bool arm_align_anon_bitfield (void);
174static bool arm_return_in_msb (tree);
175static bool arm_must_pass_in_stack (enum machine_mode, tree);
176#ifdef TARGET_UNWIND_INFO
177static void arm_unwind_emit (FILE *, rtx);
178static bool arm_output_ttype (rtx);
179#endif
180
181static tree arm_cxx_guard_type (void);
182static bool arm_cxx_guard_mask_bit (void);
183static tree arm_get_cookie_size (tree);
184static bool arm_cookie_has_size (void);
185static bool arm_cxx_cdtor_returns_this (void);
186static bool arm_cxx_key_method_may_be_inline (void);
187static void arm_cxx_determine_class_data_visibility (tree);
188static bool arm_cxx_class_data_always_comdat (void);
189static bool arm_cxx_use_aeabi_atexit (void);
190static void arm_init_libfuncs (void);
191static bool arm_handle_option (size_t, const char *, int);
192static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
193static bool arm_cannot_copy_insn_p (rtx);
194static bool arm_tls_symbol_p (rtx x);
195
196
197/* Initialize the GCC target structure.  */
198#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
199#undef  TARGET_MERGE_DECL_ATTRIBUTES
200#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
201#endif
202
203#undef  TARGET_ATTRIBUTE_TABLE
204#define TARGET_ATTRIBUTE_TABLE arm_attribute_table
205
206#undef TARGET_ASM_FILE_START
207#define TARGET_ASM_FILE_START arm_file_start
208
209#undef TARGET_ASM_FILE_END
210#define TARGET_ASM_FILE_END arm_file_end
211
212#ifdef AOF_ASSEMBLER
213#undef  TARGET_ASM_BYTE_OP
214#define TARGET_ASM_BYTE_OP "\tDCB\t"
215#undef  TARGET_ASM_ALIGNED_HI_OP
216#define TARGET_ASM_ALIGNED_HI_OP "\tDCW\t"
217#undef  TARGET_ASM_ALIGNED_SI_OP
218#define TARGET_ASM_ALIGNED_SI_OP "\tDCD\t"
219#undef TARGET_ASM_GLOBALIZE_LABEL
220#define TARGET_ASM_GLOBALIZE_LABEL aof_globalize_label
221#undef TARGET_ASM_FILE_START
222#define TARGET_ASM_FILE_START aof_file_start
223#undef TARGET_ASM_FILE_END
224#define TARGET_ASM_FILE_END aof_file_end
225#else
226#undef  TARGET_ASM_ALIGNED_SI_OP
227#define TARGET_ASM_ALIGNED_SI_OP NULL
228#undef  TARGET_ASM_INTEGER
229#define TARGET_ASM_INTEGER arm_assemble_integer
230#endif
231
232#undef  TARGET_ASM_FUNCTION_PROLOGUE
233#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
234
235#undef  TARGET_ASM_FUNCTION_EPILOGUE
236#define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
237
238#undef  TARGET_DEFAULT_TARGET_FLAGS
239#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
240#undef  TARGET_HANDLE_OPTION
241#define TARGET_HANDLE_OPTION arm_handle_option
242
243#undef  TARGET_COMP_TYPE_ATTRIBUTES
244#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
245
246#undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
247#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
248
249#undef  TARGET_SCHED_ADJUST_COST
250#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
251
252#undef TARGET_ENCODE_SECTION_INFO
253#ifdef ARM_PE
254#define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
255#else
256#define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
257#endif
258
259#undef  TARGET_STRIP_NAME_ENCODING
260#define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
261
262#undef  TARGET_ASM_INTERNAL_LABEL
263#define TARGET_ASM_INTERNAL_LABEL arm_internal_label
264
265#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
266#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
267
268#undef  TARGET_ASM_OUTPUT_MI_THUNK
269#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
270#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
271#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
272
273/* This will be overridden in arm_override_options.  */
274#undef  TARGET_RTX_COSTS
275#define TARGET_RTX_COSTS arm_slowmul_rtx_costs
276#undef  TARGET_ADDRESS_COST
277#define TARGET_ADDRESS_COST arm_address_cost
278
279#undef TARGET_SHIFT_TRUNCATION_MASK
280#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
281#undef TARGET_VECTOR_MODE_SUPPORTED_P
282#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
283
284#undef  TARGET_MACHINE_DEPENDENT_REORG
285#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
286
287#undef  TARGET_INIT_BUILTINS
288#define TARGET_INIT_BUILTINS  arm_init_builtins
289#undef  TARGET_EXPAND_BUILTIN
290#define TARGET_EXPAND_BUILTIN arm_expand_builtin
291
292#undef TARGET_INIT_LIBFUNCS
293#define TARGET_INIT_LIBFUNCS arm_init_libfuncs
294
295#undef TARGET_PROMOTE_FUNCTION_ARGS
296#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
297#undef TARGET_PROMOTE_FUNCTION_RETURN
298#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
299#undef TARGET_PROMOTE_PROTOTYPES
300#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
301#undef TARGET_PASS_BY_REFERENCE
302#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
303#undef TARGET_ARG_PARTIAL_BYTES
304#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
305
306#undef  TARGET_SETUP_INCOMING_VARARGS
307#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
308
309#undef TARGET_DEFAULT_SHORT_ENUMS
310#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
311
312#undef TARGET_ALIGN_ANON_BITFIELD
313#define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
314
315#undef TARGET_NARROW_VOLATILE_BITFIELD
316#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
317
318#undef TARGET_CXX_GUARD_TYPE
319#define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
320
321#undef TARGET_CXX_GUARD_MASK_BIT
322#define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
323
324#undef TARGET_CXX_GET_COOKIE_SIZE
325#define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
326
327#undef TARGET_CXX_COOKIE_HAS_SIZE
328#define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
329
330#undef TARGET_CXX_CDTOR_RETURNS_THIS
331#define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
332
333#undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
334#define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
335
336#undef TARGET_CXX_USE_AEABI_ATEXIT
337#define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
338
339#undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
340#define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
341  arm_cxx_determine_class_data_visibility
342
343#undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
344#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
345
346#undef TARGET_RETURN_IN_MSB
347#define TARGET_RETURN_IN_MSB arm_return_in_msb
348
349#undef TARGET_MUST_PASS_IN_STACK
350#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
351
352#ifdef TARGET_UNWIND_INFO
353#undef TARGET_UNWIND_EMIT
354#define TARGET_UNWIND_EMIT arm_unwind_emit
355
356/* EABI unwinding tables use a different format for the typeinfo tables.  */
357#undef TARGET_ASM_TTYPE
358#define TARGET_ASM_TTYPE arm_output_ttype
359
360#undef TARGET_ARM_EABI_UNWINDER
361#define TARGET_ARM_EABI_UNWINDER true
362#endif /* TARGET_UNWIND_INFO */
363
364#undef  TARGET_CANNOT_COPY_INSN_P
365#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
366
367#ifdef HAVE_AS_TLS
368#undef TARGET_HAVE_TLS
369#define TARGET_HAVE_TLS true
370#endif
371
372#undef TARGET_CANNOT_FORCE_CONST_MEM
373#define TARGET_CANNOT_FORCE_CONST_MEM arm_tls_referenced_p
374
375struct gcc_target targetm = TARGET_INITIALIZER;
376
377/* Obstack for minipool constant handling.  */
378static struct obstack minipool_obstack;
379static char *         minipool_startobj;
380
381/* The maximum number of insns skipped which
382   will be conditionalised if possible.  */
383static int max_insns_skipped = 5;
384
385extern FILE * asm_out_file;
386
387/* True if we are currently building a constant table.  */
388int making_const_table;
389
390/* Define the information needed to generate branch insns.  This is
391   stored from the compare operation.  */
392rtx arm_compare_op0, arm_compare_op1;
393
394/* The processor for which instructions should be scheduled.  */
395enum processor_type arm_tune = arm_none;
396
397/* The default processor used if not overriden by commandline.  */
398static enum processor_type arm_default_cpu = arm_none;
399
400/* Which floating point model to use.  */
401enum arm_fp_model arm_fp_model;
402
403/* Which floating point hardware is available.  */
404enum fputype arm_fpu_arch;
405
406/* Which floating point hardware to schedule for.  */
407enum fputype arm_fpu_tune;
408
409/* Whether to use floating point hardware.  */
410enum float_abi_type arm_float_abi;
411
412/* Which ABI to use.  */
413enum arm_abi_type arm_abi;
414
415/* Which thread pointer model to use.  */
416enum arm_tp_type target_thread_pointer = TP_AUTO;
417
418/* Used to parse -mstructure_size_boundary command line option.  */
419int    arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
420
421/* Used for Thumb call_via trampolines.  */
422rtx thumb_call_via_label[14];
423static int thumb_call_reg_needed;
424
425/* Bit values used to identify processor capabilities.  */
426#define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
427#define FL_ARCH3M     (1 << 1)        /* Extended multiply */
428#define FL_MODE26     (1 << 2)        /* 26-bit mode support */
429#define FL_MODE32     (1 << 3)        /* 32-bit mode support */
430#define FL_ARCH4      (1 << 4)        /* Architecture rel 4 */
431#define FL_ARCH5      (1 << 5)        /* Architecture rel 5 */
432#define FL_THUMB      (1 << 6)        /* Thumb aware */
433#define FL_LDSCHED    (1 << 7)	      /* Load scheduling necessary */
434#define FL_STRONG     (1 << 8)	      /* StrongARM */
435#define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
436#define FL_XSCALE     (1 << 10)	      /* XScale */
437#define FL_CIRRUS     (1 << 11)	      /* Cirrus/DSP.  */
438#define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
439					 media instructions.  */
440#define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
441#define FL_WBUF	      (1 << 14)	      /* Schedule for write buffer ops.
442					 Note: ARM6 & 7 derivatives only.  */
443#define FL_ARCH6K     (1 << 15)       /* Architecture rel 6 K extensions.  */
444
445#define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
446
447#define FL_FOR_ARCH2	0
448#define FL_FOR_ARCH3	FL_MODE32
449#define FL_FOR_ARCH3M	(FL_FOR_ARCH3 | FL_ARCH3M)
450#define FL_FOR_ARCH4	(FL_FOR_ARCH3M | FL_ARCH4)
451#define FL_FOR_ARCH4T	(FL_FOR_ARCH4 | FL_THUMB)
452#define FL_FOR_ARCH5	(FL_FOR_ARCH4 | FL_ARCH5)
453#define FL_FOR_ARCH5T	(FL_FOR_ARCH5 | FL_THUMB)
454#define FL_FOR_ARCH5E	(FL_FOR_ARCH5 | FL_ARCH5E)
455#define FL_FOR_ARCH5TE	(FL_FOR_ARCH5E | FL_THUMB)
456#define FL_FOR_ARCH5TEJ	FL_FOR_ARCH5TE
457#define FL_FOR_ARCH6	(FL_FOR_ARCH5TE | FL_ARCH6)
458#define FL_FOR_ARCH6J	FL_FOR_ARCH6
459#define FL_FOR_ARCH6K	(FL_FOR_ARCH6 | FL_ARCH6K)
460#define FL_FOR_ARCH6Z	FL_FOR_ARCH6
461#define FL_FOR_ARCH6ZK	FL_FOR_ARCH6K
462
463/* The bits in this mask specify which
464   instructions we are allowed to generate.  */
465static unsigned long insn_flags = 0;
466
467/* The bits in this mask specify which instruction scheduling options should
468   be used.  */
469static unsigned long tune_flags = 0;
470
471/* The following are used in the arm.md file as equivalents to bits
472   in the above two flag variables.  */
473
474/* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
475int arm_arch3m = 0;
476
477/* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
478int arm_arch4 = 0;
479
480/* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
481int arm_arch4t = 0;
482
483/* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
484int arm_arch5 = 0;
485
486/* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
487int arm_arch5e = 0;
488
489/* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
490int arm_arch6 = 0;
491
492/* Nonzero if this chip supports the ARM 6K extensions.  */
493int arm_arch6k = 0;
494
495/* Nonzero if this chip can benefit from load scheduling.  */
496int arm_ld_sched = 0;
497
498/* Nonzero if this chip is a StrongARM.  */
499int arm_tune_strongarm = 0;
500
501/* Nonzero if this chip is a Cirrus variant.  */
502int arm_arch_cirrus = 0;
503
504/* Nonzero if this chip supports Intel Wireless MMX technology.  */
505int arm_arch_iwmmxt = 0;
506
507/* Nonzero if this chip is an XScale.  */
508int arm_arch_xscale = 0;
509
510/* Nonzero if tuning for XScale  */
511int arm_tune_xscale = 0;
512
513/* Nonzero if we want to tune for stores that access the write-buffer.
514   This typically means an ARM6 or ARM7 with MMU or MPU.  */
515int arm_tune_wbuf = 0;
516
517/* Nonzero if generating Thumb instructions.  */
518int thumb_code = 0;
519
520/* Nonzero if we should define __THUMB_INTERWORK__ in the
521   preprocessor.
522   XXX This is a bit of a hack, it's intended to help work around
523   problems in GLD which doesn't understand that armv5t code is
524   interworking clean.  */
525int arm_cpp_interwork = 0;
526
527/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
528   must report the mode of the memory reference from PRINT_OPERAND to
529   PRINT_OPERAND_ADDRESS.  */
530enum machine_mode output_memory_reference_mode;
531
532/* The register number to be used for the PIC offset register.  */
533unsigned arm_pic_register = INVALID_REGNUM;
534
535/* Set to 1 when a return insn is output, this means that the epilogue
536   is not needed.  */
537int return_used_this_function;
538
539/* Set to 1 after arm_reorg has started.  Reset to start at the start of
540   the next function.  */
541static int after_arm_reorg = 0;
542
543/* The maximum number of insns to be used when loading a constant.  */
544static int arm_constant_limit = 3;
545
546/* For an explanation of these variables, see final_prescan_insn below.  */
547int arm_ccfsm_state;
548enum arm_cond_code arm_current_cc;
549rtx arm_target_insn;
550int arm_target_label;
551
552/* The condition codes of the ARM, and the inverse function.  */
553static const char * const arm_condition_codes[] =
554{
555  "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
556  "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
557};
558
559#define streq(string1, string2) (strcmp (string1, string2) == 0)
560
561/* Initialization code.  */
562
563struct processors
564{
565  const char *const name;
566  enum processor_type core;
567  const char *arch;
568  const unsigned long flags;
569  bool (* rtx_costs) (rtx, int, int, int *);
570};
571
572/* Not all of these give usefully different compilation alternatives,
573   but there is no simple way of generalizing them.  */
574static const struct processors all_cores[] =
575{
576  /* ARM Cores */
577#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
578  {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
579#include "arm-cores.def"
580#undef ARM_CORE
581  {NULL, arm_none, NULL, 0, NULL}
582};
583
584static const struct processors all_architectures[] =
585{
586  /* ARM Architectures */
587  /* We don't specify rtx_costs here as it will be figured out
588     from the core.  */
589
590  {"armv2",   arm2,       "2",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
591  {"armv2a",  arm2,       "2",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
592  {"armv3",   arm6,       "3",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
593  {"armv3m",  arm7m,      "3M",  FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
594  {"armv4",   arm7tdmi,   "4",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
595  /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
596     implementations that support it, so we will leave it out for now.  */
597  {"armv4t",  arm7tdmi,   "4T",  FL_CO_PROC |             FL_FOR_ARCH4T, NULL},
598  {"armv5",   arm10tdmi,  "5",   FL_CO_PROC |             FL_FOR_ARCH5, NULL},
599  {"armv5t",  arm10tdmi,  "5T",  FL_CO_PROC |             FL_FOR_ARCH5T, NULL},
600  {"armv5e",  arm1026ejs, "5E",  FL_CO_PROC |             FL_FOR_ARCH5E, NULL},
601  {"armv5te", arm1026ejs, "5TE", FL_CO_PROC |             FL_FOR_ARCH5TE, NULL},
602  {"armv6",   arm1136js,  "6",   FL_CO_PROC |             FL_FOR_ARCH6, NULL},
603  {"armv6j",  arm1136js,  "6J",  FL_CO_PROC |             FL_FOR_ARCH6J, NULL},
604  {"armv6k",  mpcore,	  "6K",  FL_CO_PROC |             FL_FOR_ARCH6K, NULL},
605  {"armv6z",  arm1176jzs, "6Z",  FL_CO_PROC |             FL_FOR_ARCH6Z, NULL},
606  {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC |             FL_FOR_ARCH6ZK, NULL},
607  /* Clang compatibility... define __ARM_ARCH_7A__, but codegen is still 6ZK. */
608  {"armv7a",  arm1176jzs, "7A",  FL_CO_PROC |             FL_FOR_ARCH6ZK, NULL},
609  {"ep9312",  ep9312,     "4T",  FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
610  {"iwmmxt",  iwmmxt,     "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
611  {NULL, arm_none, NULL, 0 , NULL}
612};
613
614struct arm_cpu_select
615{
616  const char *              string;
617  const char *              name;
618  const struct processors * processors;
619};
620
621/* This is a magic structure.  The 'string' field is magically filled in
622   with a pointer to the value specified by the user on the command line
623   assuming that the user has specified such a value.  */
624
625static struct arm_cpu_select arm_select[] =
626{
627  /* string	  name            processors  */
628  { NULL,	"-mcpu=",	all_cores  },
629  { NULL,	"-march=",	all_architectures },
630  { NULL,	"-mtune=",	all_cores }
631};
632
633/* Defines representing the indexes into the above table.  */
634#define ARM_OPT_SET_CPU 0
635#define ARM_OPT_SET_ARCH 1
636#define ARM_OPT_SET_TUNE 2
637
638/* The name of the preprocessor macro to define for this architecture.  */
639
640char arm_arch_name[] = "__ARM_ARCH_0UNK__";
641
642struct fpu_desc
643{
644  const char * name;
645  enum fputype fpu;
646};
647
648
649/* Available values for -mfpu=.  */
650
651static const struct fpu_desc all_fpus[] =
652{
653  {"fpa",	FPUTYPE_FPA},
654  {"fpe2",	FPUTYPE_FPA_EMU2},
655  {"fpe3",	FPUTYPE_FPA_EMU2},
656  {"maverick",	FPUTYPE_MAVERICK},
657  {"vfp",	FPUTYPE_VFP}
658};
659
660
661/* Floating point models used by the different hardware.
662   See fputype in arm.h.  */
663
664static const enum fputype fp_model_for_fpu[] =
665{
666  /* No FP hardware.  */
667  ARM_FP_MODEL_UNKNOWN,		/* FPUTYPE_NONE  */
668  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA  */
669  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU2  */
670  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU3  */
671  ARM_FP_MODEL_MAVERICK,	/* FPUTYPE_MAVERICK  */
672  ARM_FP_MODEL_VFP		/* FPUTYPE_VFP  */
673};
674
675
676struct float_abi
677{
678  const char * name;
679  enum float_abi_type abi_type;
680};
681
682
683/* Available values for -mfloat-abi=.  */
684
685static const struct float_abi all_float_abis[] =
686{
687  {"soft",	ARM_FLOAT_ABI_SOFT},
688  {"softfp",	ARM_FLOAT_ABI_SOFTFP},
689  {"hard",	ARM_FLOAT_ABI_HARD}
690};
691
692
693struct abi_name
694{
695  const char *name;
696  enum arm_abi_type abi_type;
697};
698
699
700/* Available values for -mabi=.  */
701
702static const struct abi_name arm_all_abis[] =
703{
704  {"apcs-gnu",    ARM_ABI_APCS},
705  {"atpcs",   ARM_ABI_ATPCS},
706  {"aapcs",   ARM_ABI_AAPCS},
707  {"iwmmxt",  ARM_ABI_IWMMXT},
708  {"aapcs-linux",   ARM_ABI_AAPCS_LINUX}
709};
710
711/* Supported TLS relocations.  */
712
713enum tls_reloc {
714  TLS_GD32,
715  TLS_LDM32,
716  TLS_LDO32,
717  TLS_IE32,
718  TLS_LE32
719};
720
721/* Emit an insn that's a simple single-set.  Both the operands must be known
722   to be valid.  */
723inline static rtx
724emit_set_insn (rtx x, rtx y)
725{
726  return emit_insn (gen_rtx_SET (VOIDmode, x, y));
727}
728
729/* Return the number of bits set in VALUE.  */
730static unsigned
731bit_count (unsigned long value)
732{
733  unsigned long count = 0;
734
735  while (value)
736    {
737      count++;
738      value &= value - 1;  /* Clear the least-significant set bit.  */
739    }
740
741  return count;
742}
743
744/* Set up library functions unique to ARM.  */
745
746static void
747arm_init_libfuncs (void)
748{
749  /* There are no special library functions unless we are using the
750     ARM BPABI.  */
751  if (!TARGET_BPABI)
752    return;
753
754  /* The functions below are described in Section 4 of the "Run-Time
755     ABI for the ARM architecture", Version 1.0.  */
756
757  /* Double-precision floating-point arithmetic.  Table 2.  */
758  set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
759  set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
760  set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
761  set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
762  set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
763
764  /* Double-precision comparisons.  Table 3.  */
765  set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
766  set_optab_libfunc (ne_optab, DFmode, NULL);
767  set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
768  set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
769  set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
770  set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
771  set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
772
773  /* Single-precision floating-point arithmetic.  Table 4.  */
774  set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
775  set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
776  set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
777  set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
778  set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
779
780  /* Single-precision comparisons.  Table 5.  */
781  set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
782  set_optab_libfunc (ne_optab, SFmode, NULL);
783  set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
784  set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
785  set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
786  set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
787  set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
788
789  /* Floating-point to integer conversions.  Table 6.  */
790  set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
791  set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
792  set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
793  set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
794  set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
795  set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
796  set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
797  set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
798
799  /* Conversions between floating types.  Table 7.  */
800  set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
801  set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
802
803  /* Integer to floating-point conversions.  Table 8.  */
804  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
805  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
806  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
807  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
808  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
809  set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
810  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
811  set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
812
813  /* Long long.  Table 9.  */
814  set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
815  set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
816  set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
817  set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
818  set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
819  set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
820  set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
821  set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
822
823  /* Integer (32/32->32) division.  \S 4.3.1.  */
824  set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
825  set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
826
827  /* The divmod functions are designed so that they can be used for
828     plain division, even though they return both the quotient and the
829     remainder.  The quotient is returned in the usual location (i.e.,
830     r0 for SImode, {r0, r1} for DImode), just as would be expected
831     for an ordinary division routine.  Because the AAPCS calling
832     conventions specify that all of { r0, r1, r2, r3 } are
833     callee-saved registers, there is no need to tell the compiler
834     explicitly that those registers are clobbered by these
835     routines.  */
836  set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
837  set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
838
839  /* For SImode division the ABI provides div-without-mod routines,
840     which are faster.  */
841  set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
842  set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
843
844  /* We don't have mod libcalls.  Fortunately gcc knows how to use the
845     divmod libcalls instead.  */
846  set_optab_libfunc (smod_optab, DImode, NULL);
847  set_optab_libfunc (umod_optab, DImode, NULL);
848  set_optab_libfunc (smod_optab, SImode, NULL);
849  set_optab_libfunc (umod_optab, SImode, NULL);
850}
851
852/* Implement TARGET_HANDLE_OPTION.  */
853
854static bool
855arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
856{
857  switch (code)
858    {
859    case OPT_march_:
860      arm_select[1].string = arg;
861      return true;
862
863    case OPT_mcpu_:
864      arm_select[0].string = arg;
865      return true;
866
867    case OPT_mhard_float:
868      target_float_abi_name = "hard";
869      return true;
870
871    case OPT_msoft_float:
872      target_float_abi_name = "soft";
873      return true;
874
875    case OPT_mtune_:
876      arm_select[2].string = arg;
877      return true;
878
879    default:
880      return true;
881    }
882}
883
884/* Fix up any incompatible options that the user has specified.
885   This has now turned into a maze.  */
886void
887arm_override_options (void)
888{
889  unsigned i;
890  enum processor_type target_arch_cpu = arm_none;
891
892  /* Set up the flags based on the cpu/architecture selected by the user.  */
893  for (i = ARRAY_SIZE (arm_select); i--;)
894    {
895      struct arm_cpu_select * ptr = arm_select + i;
896
897      if (ptr->string != NULL && ptr->string[0] != '\0')
898        {
899	  const struct processors * sel;
900
901          for (sel = ptr->processors; sel->name != NULL; sel++)
902            if (streq (ptr->string, sel->name))
903              {
904		/* Set the architecture define.  */
905		if (i != ARM_OPT_SET_TUNE)
906		  sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
907
908		/* Determine the processor core for which we should
909		   tune code-generation.  */
910		if (/* -mcpu= is a sensible default.  */
911		    i == ARM_OPT_SET_CPU
912		    /* -mtune= overrides -mcpu= and -march=.  */
913		    || i == ARM_OPT_SET_TUNE)
914		  arm_tune = (enum processor_type) (sel - ptr->processors);
915
916		/* Remember the CPU associated with this architecture.
917		   If no other option is used to set the CPU type,
918		   we'll use this to guess the most suitable tuning
919		   options.  */
920		if (i == ARM_OPT_SET_ARCH)
921		  target_arch_cpu = sel->core;
922
923		if (i != ARM_OPT_SET_TUNE)
924		  {
925		    /* If we have been given an architecture and a processor
926		       make sure that they are compatible.  We only generate
927		       a warning though, and we prefer the CPU over the
928		       architecture.  */
929		    if (insn_flags != 0 && (insn_flags ^ sel->flags))
930		      warning (0, "switch -mcpu=%s conflicts with -march= switch",
931			       ptr->string);
932
933		    insn_flags = sel->flags;
934		  }
935
936                break;
937              }
938
939          if (sel->name == NULL)
940            error ("bad value (%s) for %s switch", ptr->string, ptr->name);
941        }
942    }
943
944  /* Guess the tuning options from the architecture if necessary.  */
945  if (arm_tune == arm_none)
946    arm_tune = target_arch_cpu;
947
948  /* If the user did not specify a processor, choose one for them.  */
949  if (insn_flags == 0)
950    {
951      const struct processors * sel;
952      unsigned int        sought;
953      enum processor_type cpu;
954
955      cpu = TARGET_CPU_DEFAULT;
956      if (cpu == arm_none)
957	{
958#ifdef SUBTARGET_CPU_DEFAULT
959	  /* Use the subtarget default CPU if none was specified by
960	     configure.  */
961	  cpu = SUBTARGET_CPU_DEFAULT;
962#endif
963	  /* Default to ARM6.  */
964	  if (cpu == arm_none)
965	    cpu = arm6;
966	}
967      sel = &all_cores[cpu];
968
969      insn_flags = sel->flags;
970
971      /* Now check to see if the user has specified some command line
972	 switch that require certain abilities from the cpu.  */
973      sought = 0;
974
975      if (TARGET_INTERWORK || TARGET_THUMB)
976	{
977	  sought |= (FL_THUMB | FL_MODE32);
978
979	  /* There are no ARM processors that support both APCS-26 and
980	     interworking.  Therefore we force FL_MODE26 to be removed
981	     from insn_flags here (if it was set), so that the search
982	     below will always be able to find a compatible processor.  */
983	  insn_flags &= ~FL_MODE26;
984	}
985
986      if (sought != 0 && ((sought & insn_flags) != sought))
987	{
988	  /* Try to locate a CPU type that supports all of the abilities
989	     of the default CPU, plus the extra abilities requested by
990	     the user.  */
991	  for (sel = all_cores; sel->name != NULL; sel++)
992	    if ((sel->flags & sought) == (sought | insn_flags))
993	      break;
994
995	  if (sel->name == NULL)
996	    {
997	      unsigned current_bit_count = 0;
998	      const struct processors * best_fit = NULL;
999
1000	      /* Ideally we would like to issue an error message here
1001		 saying that it was not possible to find a CPU compatible
1002		 with the default CPU, but which also supports the command
1003		 line options specified by the programmer, and so they
1004		 ought to use the -mcpu=<name> command line option to
1005		 override the default CPU type.
1006
1007		 If we cannot find a cpu that has both the
1008		 characteristics of the default cpu and the given
1009		 command line options we scan the array again looking
1010		 for a best match.  */
1011	      for (sel = all_cores; sel->name != NULL; sel++)
1012		if ((sel->flags & sought) == sought)
1013		  {
1014		    unsigned count;
1015
1016		    count = bit_count (sel->flags & insn_flags);
1017
1018		    if (count >= current_bit_count)
1019		      {
1020			best_fit = sel;
1021			current_bit_count = count;
1022		      }
1023		  }
1024
1025	      gcc_assert (best_fit);
1026	      sel = best_fit;
1027	    }
1028
1029	  insn_flags = sel->flags;
1030	}
1031      sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1032      arm_default_cpu = (enum processor_type) (sel - all_cores);
1033      if (arm_tune == arm_none)
1034	arm_tune = arm_default_cpu;
1035    }
1036
1037  /* The processor for which we should tune should now have been
1038     chosen.  */
1039  gcc_assert (arm_tune != arm_none);
1040
1041  tune_flags = all_cores[(int)arm_tune].flags;
1042  if (optimize_size)
1043    targetm.rtx_costs = arm_size_rtx_costs;
1044  else
1045    targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1046
1047  /* Make sure that the processor choice does not conflict with any of the
1048     other command line choices.  */
1049  if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1050    {
1051      warning (0, "target CPU does not support interworking" );
1052      target_flags &= ~MASK_INTERWORK;
1053    }
1054
1055  if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1056    {
1057      warning (0, "target CPU does not support THUMB instructions");
1058      target_flags &= ~MASK_THUMB;
1059    }
1060
1061  if (TARGET_APCS_FRAME && TARGET_THUMB)
1062    {
1063      /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1064      target_flags &= ~MASK_APCS_FRAME;
1065    }
1066
1067  /* Callee super interworking implies thumb interworking.  Adding
1068     this to the flags here simplifies the logic elsewhere.  */
1069  if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1070      target_flags |= MASK_INTERWORK;
1071
1072  /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1073     from here where no function is being compiled currently.  */
1074  if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1075    warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1076
1077  if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1078    warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1079
1080  if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1081    warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1082
1083  if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1084    {
1085      warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1086      target_flags |= MASK_APCS_FRAME;
1087    }
1088
1089  if (TARGET_POKE_FUNCTION_NAME)
1090    target_flags |= MASK_APCS_FRAME;
1091
1092  if (TARGET_APCS_REENT && flag_pic)
1093    error ("-fpic and -mapcs-reent are incompatible");
1094
1095  if (TARGET_APCS_REENT)
1096    warning (0, "APCS reentrant code not supported.  Ignored");
1097
1098  /* If this target is normally configured to use APCS frames, warn if they
1099     are turned off and debugging is turned on.  */
1100  if (TARGET_ARM
1101      && write_symbols != NO_DEBUG
1102      && !TARGET_APCS_FRAME
1103      && (TARGET_DEFAULT & MASK_APCS_FRAME))
1104    warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1105
1106  /* If stack checking is disabled, we can use r10 as the PIC register,
1107     which keeps r9 available.  */
1108  if (flag_pic && TARGET_SINGLE_PIC_BASE)
1109    arm_pic_register = TARGET_APCS_STACK ? 9 : 10;
1110
1111  if (TARGET_APCS_FLOAT)
1112    warning (0, "passing floating point arguments in fp regs not yet supported");
1113
1114  /* Initialize boolean versions of the flags, for use in the arm.md file.  */
1115  arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1116  arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1117  arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1118  arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1119  arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1120  arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1121  arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1122  arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1123  arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1124
1125  arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1126  arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1127  thumb_code = (TARGET_ARM == 0);
1128  arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1129  arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1130  arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1131
1132  /* V5 code we generate is completely interworking capable, so we turn off
1133     TARGET_INTERWORK here to avoid many tests later on.  */
1134
1135  /* XXX However, we must pass the right pre-processor defines to CPP
1136     or GLD can get confused.  This is a hack.  */
1137  if (TARGET_INTERWORK)
1138    arm_cpp_interwork = 1;
1139
1140  if (arm_arch5)
1141    target_flags &= ~MASK_INTERWORK;
1142
1143  if (target_abi_name)
1144    {
1145      for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1146	{
1147	  if (streq (arm_all_abis[i].name, target_abi_name))
1148	    {
1149	      arm_abi = arm_all_abis[i].abi_type;
1150	      break;
1151	    }
1152	}
1153      if (i == ARRAY_SIZE (arm_all_abis))
1154	error ("invalid ABI option: -mabi=%s", target_abi_name);
1155    }
1156  else
1157    arm_abi = ARM_DEFAULT_ABI;
1158
1159  if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1160    error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1161
1162  if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1163    error ("iwmmxt abi requires an iwmmxt capable cpu");
1164
1165  arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1166  if (target_fpu_name == NULL && target_fpe_name != NULL)
1167    {
1168      if (streq (target_fpe_name, "2"))
1169	target_fpu_name = "fpe2";
1170      else if (streq (target_fpe_name, "3"))
1171	target_fpu_name = "fpe3";
1172      else
1173	error ("invalid floating point emulation option: -mfpe=%s",
1174	       target_fpe_name);
1175    }
1176  if (target_fpu_name != NULL)
1177    {
1178      /* The user specified a FPU.  */
1179      for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1180	{
1181	  if (streq (all_fpus[i].name, target_fpu_name))
1182	    {
1183	      arm_fpu_arch = all_fpus[i].fpu;
1184	      arm_fpu_tune = arm_fpu_arch;
1185	      arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1186	      break;
1187	    }
1188	}
1189      if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1190	error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1191    }
1192  else
1193    {
1194#ifdef FPUTYPE_DEFAULT
1195      /* Use the default if it is specified for this platform.  */
1196      arm_fpu_arch = FPUTYPE_DEFAULT;
1197      arm_fpu_tune = FPUTYPE_DEFAULT;
1198#else
1199      /* Pick one based on CPU type.  */
1200      /* ??? Some targets assume FPA is the default.
1201      if ((insn_flags & FL_VFP) != 0)
1202	arm_fpu_arch = FPUTYPE_VFP;
1203      else
1204      */
1205      if (arm_arch_cirrus)
1206	arm_fpu_arch = FPUTYPE_MAVERICK;
1207      else
1208	arm_fpu_arch = FPUTYPE_FPA_EMU2;
1209#endif
1210      if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1211	arm_fpu_tune = FPUTYPE_FPA;
1212      else
1213	arm_fpu_tune = arm_fpu_arch;
1214      arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1215      gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1216    }
1217
1218  if (target_float_abi_name != NULL)
1219    {
1220      /* The user specified a FP ABI.  */
1221      for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1222	{
1223	  if (streq (all_float_abis[i].name, target_float_abi_name))
1224	    {
1225	      arm_float_abi = all_float_abis[i].abi_type;
1226	      break;
1227	    }
1228	}
1229      if (i == ARRAY_SIZE (all_float_abis))
1230	error ("invalid floating point abi: -mfloat-abi=%s",
1231	       target_float_abi_name);
1232    }
1233  else
1234    arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1235
1236  if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1237    sorry ("-mfloat-abi=hard and VFP");
1238
1239  /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1240     VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1241     will ever exist.  GCC makes no attempt to support this combination.  */
1242  if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1243    sorry ("iWMMXt and hardware floating point");
1244
1245  /* If soft-float is specified then don't use FPU.  */
1246  if (TARGET_SOFT_FLOAT)
1247    arm_fpu_arch = FPUTYPE_NONE;
1248
1249  /* For arm2/3 there is no need to do any scheduling if there is only
1250     a floating point emulator, or we are doing software floating-point.  */
1251  if ((TARGET_SOFT_FLOAT
1252       || arm_fpu_tune == FPUTYPE_FPA_EMU2
1253       || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1254      && (tune_flags & FL_MODE32) == 0)
1255    flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1256
1257  if (target_thread_switch)
1258    {
1259      if (strcmp (target_thread_switch, "soft") == 0)
1260	target_thread_pointer = TP_SOFT;
1261      else if (strcmp (target_thread_switch, "auto") == 0)
1262	target_thread_pointer = TP_AUTO;
1263      else if (strcmp (target_thread_switch, "cp15") == 0)
1264	target_thread_pointer = TP_CP15;
1265      else
1266	error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1267    }
1268
1269  /* Use the cp15 method if it is available.  */
1270  if (target_thread_pointer == TP_AUTO)
1271    {
1272      if (arm_arch6k && !TARGET_THUMB)
1273	target_thread_pointer = TP_CP15;
1274      else
1275	target_thread_pointer = TP_SOFT;
1276    }
1277
1278  if (TARGET_HARD_TP && TARGET_THUMB)
1279    error ("can not use -mtp=cp15 with -mthumb");
1280
1281  /* Override the default structure alignment for AAPCS ABI.  */
1282  if (TARGET_AAPCS_BASED)
1283    arm_structure_size_boundary = 8;
1284
1285  if (structure_size_string != NULL)
1286    {
1287      int size = strtol (structure_size_string, NULL, 0);
1288
1289      if (size == 8 || size == 32
1290	  || (ARM_DOUBLEWORD_ALIGN && size == 64))
1291	arm_structure_size_boundary = size;
1292      else
1293	warning (0, "structure size boundary can only be set to %s",
1294		 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1295    }
1296
1297  if (arm_pic_register_string != NULL)
1298    {
1299      int pic_register = decode_reg_name (arm_pic_register_string);
1300
1301      if (!flag_pic)
1302	warning (0, "-mpic-register= is useless without -fpic");
1303
1304      /* Prevent the user from choosing an obviously stupid PIC register.  */
1305      else if (pic_register < 0 || call_used_regs[pic_register]
1306	       || pic_register == HARD_FRAME_POINTER_REGNUM
1307	       || pic_register == STACK_POINTER_REGNUM
1308	       || pic_register >= PC_REGNUM)
1309	error ("unable to use '%s' for PIC register", arm_pic_register_string);
1310      else
1311	arm_pic_register = pic_register;
1312    }
1313
1314  if (TARGET_THUMB && flag_schedule_insns)
1315    {
1316      /* Don't warn since it's on by default in -O2.  */
1317      flag_schedule_insns = 0;
1318    }
1319
1320  if (optimize_size)
1321    {
1322      arm_constant_limit = 1;
1323
1324      /* If optimizing for size, bump the number of instructions that we
1325         are prepared to conditionally execute (even on a StrongARM).  */
1326      max_insns_skipped = 6;
1327    }
1328  else
1329    {
1330      /* For processors with load scheduling, it never costs more than
1331         2 cycles to load a constant, and the load scheduler may well
1332	 reduce that to 1.  */
1333      if (arm_ld_sched)
1334        arm_constant_limit = 1;
1335
1336      /* On XScale the longer latency of a load makes it more difficult
1337         to achieve a good schedule, so it's faster to synthesize
1338	 constants that can be done in two insns.  */
1339      if (arm_tune_xscale)
1340        arm_constant_limit = 2;
1341
1342      /* StrongARM has early execution of branches, so a sequence
1343         that is worth skipping is shorter.  */
1344      if (arm_tune_strongarm)
1345        max_insns_skipped = 3;
1346    }
1347
1348  /* Register global variables with the garbage collector.  */
1349  arm_add_gc_roots ();
1350}
1351
1352static void
1353arm_add_gc_roots (void)
1354{
1355  gcc_obstack_init(&minipool_obstack);
1356  minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1357}
1358
1359/* A table of known ARM exception types.
1360   For use with the interrupt function attribute.  */
1361
1362typedef struct
1363{
1364  const char *const arg;
1365  const unsigned long return_value;
1366}
1367isr_attribute_arg;
1368
1369static const isr_attribute_arg isr_attribute_args [] =
1370{
1371  { "IRQ",   ARM_FT_ISR },
1372  { "irq",   ARM_FT_ISR },
1373  { "FIQ",   ARM_FT_FIQ },
1374  { "fiq",   ARM_FT_FIQ },
1375  { "ABORT", ARM_FT_ISR },
1376  { "abort", ARM_FT_ISR },
1377  { "ABORT", ARM_FT_ISR },
1378  { "abort", ARM_FT_ISR },
1379  { "UNDEF", ARM_FT_EXCEPTION },
1380  { "undef", ARM_FT_EXCEPTION },
1381  { "SWI",   ARM_FT_EXCEPTION },
1382  { "swi",   ARM_FT_EXCEPTION },
1383  { NULL,    ARM_FT_NORMAL }
1384};
1385
1386/* Returns the (interrupt) function type of the current
1387   function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
1388
1389static unsigned long
1390arm_isr_value (tree argument)
1391{
1392  const isr_attribute_arg * ptr;
1393  const char *              arg;
1394
1395  /* No argument - default to IRQ.  */
1396  if (argument == NULL_TREE)
1397    return ARM_FT_ISR;
1398
1399  /* Get the value of the argument.  */
1400  if (TREE_VALUE (argument) == NULL_TREE
1401      || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1402    return ARM_FT_UNKNOWN;
1403
1404  arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1405
1406  /* Check it against the list of known arguments.  */
1407  for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1408    if (streq (arg, ptr->arg))
1409      return ptr->return_value;
1410
1411  /* An unrecognized interrupt type.  */
1412  return ARM_FT_UNKNOWN;
1413}
1414
1415/* Computes the type of the current function.  */
1416
1417static unsigned long
1418arm_compute_func_type (void)
1419{
1420  unsigned long type = ARM_FT_UNKNOWN;
1421  tree a;
1422  tree attr;
1423
1424  gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1425
1426  /* Decide if the current function is volatile.  Such functions
1427     never return, and many memory cycles can be saved by not storing
1428     register values that will never be needed again.  This optimization
1429     was added to speed up context switching in a kernel application.  */
1430  if (optimize > 0
1431      && (TREE_NOTHROW (current_function_decl)
1432          || !(flag_unwind_tables
1433               || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1434      && TREE_THIS_VOLATILE (current_function_decl))
1435    type |= ARM_FT_VOLATILE;
1436
1437  if (cfun->static_chain_decl != NULL)
1438    type |= ARM_FT_NESTED;
1439
1440  attr = DECL_ATTRIBUTES (current_function_decl);
1441
1442  a = lookup_attribute ("naked", attr);
1443  if (a != NULL_TREE)
1444    type |= ARM_FT_NAKED;
1445
1446  a = lookup_attribute ("isr", attr);
1447  if (a == NULL_TREE)
1448    a = lookup_attribute ("interrupt", attr);
1449
1450  if (a == NULL_TREE)
1451    type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1452  else
1453    type |= arm_isr_value (TREE_VALUE (a));
1454
1455  return type;
1456}
1457
1458/* Returns the type of the current function.  */
1459
1460unsigned long
1461arm_current_func_type (void)
1462{
1463  if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1464    cfun->machine->func_type = arm_compute_func_type ();
1465
1466  return cfun->machine->func_type;
1467}
1468
1469/* Return 1 if it is possible to return using a single instruction.
1470   If SIBLING is non-null, this is a test for a return before a sibling
1471   call.  SIBLING is the call insn, so we can examine its register usage.  */
1472
1473int
1474use_return_insn (int iscond, rtx sibling)
1475{
1476  int regno;
1477  unsigned int func_type;
1478  unsigned long saved_int_regs;
1479  unsigned HOST_WIDE_INT stack_adjust;
1480  arm_stack_offsets *offsets;
1481
1482  /* Never use a return instruction before reload has run.  */
1483  if (!reload_completed)
1484    return 0;
1485
1486  func_type = arm_current_func_type ();
1487
1488  /* Naked functions and volatile functions need special
1489     consideration.  */
1490  if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED))
1491    return 0;
1492
1493  /* So do interrupt functions that use the frame pointer.  */
1494  if (IS_INTERRUPT (func_type) && frame_pointer_needed)
1495    return 0;
1496
1497  offsets = arm_get_frame_offsets ();
1498  stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1499
1500  /* As do variadic functions.  */
1501  if (current_function_pretend_args_size
1502      || cfun->machine->uses_anonymous_args
1503      /* Or if the function calls __builtin_eh_return () */
1504      || current_function_calls_eh_return
1505      /* Or if the function calls alloca */
1506      || current_function_calls_alloca
1507      /* Or if there is a stack adjustment.  However, if the stack pointer
1508	 is saved on the stack, we can use a pre-incrementing stack load.  */
1509      || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1510    return 0;
1511
1512  saved_int_regs = arm_compute_save_reg_mask ();
1513
1514  /* Unfortunately, the insn
1515
1516       ldmib sp, {..., sp, ...}
1517
1518     triggers a bug on most SA-110 based devices, such that the stack
1519     pointer won't be correctly restored if the instruction takes a
1520     page fault.  We work around this problem by popping r3 along with
1521     the other registers, since that is never slower than executing
1522     another instruction.
1523
1524     We test for !arm_arch5 here, because code for any architecture
1525     less than this could potentially be run on one of the buggy
1526     chips.  */
1527  if (stack_adjust == 4 && !arm_arch5)
1528    {
1529      /* Validate that r3 is a call-clobbered register (always true in
1530	 the default abi) ...  */
1531      if (!call_used_regs[3])
1532	return 0;
1533
1534      /* ... that it isn't being used for a return value ... */
1535      if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1536	return 0;
1537
1538      /* ... or for a tail-call argument ...  */
1539      if (sibling)
1540	{
1541	  gcc_assert (GET_CODE (sibling) == CALL_INSN);
1542
1543	  if (find_regno_fusage (sibling, USE, 3))
1544	    return 0;
1545	}
1546
1547      /* ... and that there are no call-saved registers in r0-r2
1548	 (always true in the default ABI).  */
1549      if (saved_int_regs & 0x7)
1550	return 0;
1551    }
1552
1553  /* Can't be done if interworking with Thumb, and any registers have been
1554     stacked.  */
1555  if (TARGET_INTERWORK && saved_int_regs != 0)
1556    return 0;
1557
1558  /* On StrongARM, conditional returns are expensive if they aren't
1559     taken and multiple registers have been stacked.  */
1560  if (iscond && arm_tune_strongarm)
1561    {
1562      /* Conditional return when just the LR is stored is a simple
1563	 conditional-load instruction, that's not expensive.  */
1564      if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1565	return 0;
1566
1567      if (flag_pic
1568	  && arm_pic_register != INVALID_REGNUM
1569	  && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
1570	return 0;
1571    }
1572
1573  /* If there are saved registers but the LR isn't saved, then we need
1574     two instructions for the return.  */
1575  if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1576    return 0;
1577
1578  /* Can't be done if any of the FPA regs are pushed,
1579     since this also requires an insn.  */
1580  if (TARGET_HARD_FLOAT && TARGET_FPA)
1581    for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1582      if (regs_ever_live[regno] && !call_used_regs[regno])
1583	return 0;
1584
1585  /* Likewise VFP regs.  */
1586  if (TARGET_HARD_FLOAT && TARGET_VFP)
1587    for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1588      if (regs_ever_live[regno] && !call_used_regs[regno])
1589	return 0;
1590
1591  if (TARGET_REALLY_IWMMXT)
1592    for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1593      if (regs_ever_live[regno] && ! call_used_regs [regno])
1594	return 0;
1595
1596  return 1;
1597}
1598
1599/* Return TRUE if int I is a valid immediate ARM constant.  */
1600
1601int
1602const_ok_for_arm (HOST_WIDE_INT i)
1603{
1604  int lowbit;
1605
1606  /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1607     be all zero, or all one.  */
1608  if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1609      && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1610	  != ((~(unsigned HOST_WIDE_INT) 0)
1611	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1612    return FALSE;
1613
1614  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1615
1616  /* Fast return for 0 and small values.  We must do this for zero, since
1617     the code below can't handle that one case.  */
1618  if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1619    return TRUE;
1620
1621  /* Get the number of trailing zeros, rounded down to the nearest even
1622     number.  */
1623  lowbit = (ffs ((int) i) - 1) & ~1;
1624
1625  if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1626    return TRUE;
1627  else if (lowbit <= 4
1628	   && ((i & ~0xc000003f) == 0
1629	       || (i & ~0xf000000f) == 0
1630	       || (i & ~0xfc000003) == 0))
1631    return TRUE;
1632
1633  return FALSE;
1634}
1635
1636/* Return true if I is a valid constant for the operation CODE.  */
1637static int
1638const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1639{
1640  if (const_ok_for_arm (i))
1641    return 1;
1642
1643  switch (code)
1644    {
1645    case PLUS:
1646      return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1647
1648    case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
1649    case XOR:
1650    case IOR:
1651      return 0;
1652
1653    case AND:
1654      return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1655
1656    default:
1657      gcc_unreachable ();
1658    }
1659}
1660
1661/* Emit a sequence of insns to handle a large constant.
1662   CODE is the code of the operation required, it can be any of SET, PLUS,
1663   IOR, AND, XOR, MINUS;
1664   MODE is the mode in which the operation is being performed;
1665   VAL is the integer to operate on;
1666   SOURCE is the other operand (a register, or a null-pointer for SET);
1667   SUBTARGETS means it is safe to create scratch registers if that will
1668   either produce a simpler sequence, or we will want to cse the values.
1669   Return value is the number of insns emitted.  */
1670
1671int
1672arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1673		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1674{
1675  rtx cond;
1676
1677  if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1678    cond = COND_EXEC_TEST (PATTERN (insn));
1679  else
1680    cond = NULL_RTX;
1681
1682  if (subtargets || code == SET
1683      || (GET_CODE (target) == REG && GET_CODE (source) == REG
1684	  && REGNO (target) != REGNO (source)))
1685    {
1686      /* After arm_reorg has been called, we can't fix up expensive
1687	 constants by pushing them into memory so we must synthesize
1688	 them in-line, regardless of the cost.  This is only likely to
1689	 be more costly on chips that have load delay slots and we are
1690	 compiling without running the scheduler (so no splitting
1691	 occurred before the final instruction emission).
1692
1693	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1694      */
1695      if (!after_arm_reorg
1696	  && !cond
1697	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1698				1, 0)
1699	      > arm_constant_limit + (code != SET)))
1700	{
1701	  if (code == SET)
1702	    {
1703	      /* Currently SET is the only monadic value for CODE, all
1704		 the rest are diadic.  */
1705	      emit_set_insn (target, GEN_INT (val));
1706	      return 1;
1707	    }
1708	  else
1709	    {
1710	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1711
1712	      emit_set_insn (temp, GEN_INT (val));
1713	      /* For MINUS, the value is subtracted from, since we never
1714		 have subtraction of a constant.  */
1715	      if (code == MINUS)
1716		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1717	      else
1718		emit_set_insn (target,
1719			       gen_rtx_fmt_ee (code, mode, source, temp));
1720	      return 2;
1721	    }
1722	}
1723    }
1724
1725  return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1726			   1);
1727}
1728
1729static int
1730count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1731{
1732  HOST_WIDE_INT temp1;
1733  int num_insns = 0;
1734  do
1735    {
1736      int end;
1737
1738      if (i <= 0)
1739	i += 32;
1740      if (remainder & (3 << (i - 2)))
1741	{
1742	  end = i - 8;
1743	  if (end < 0)
1744	    end += 32;
1745	  temp1 = remainder & ((0x0ff << end)
1746				    | ((i < end) ? (0xff >> (32 - end)) : 0));
1747	  remainder &= ~temp1;
1748	  num_insns++;
1749	  i -= 6;
1750	}
1751      i -= 2;
1752    } while (remainder);
1753  return num_insns;
1754}
1755
1756/* Emit an instruction with the indicated PATTERN.  If COND is
1757   non-NULL, conditionalize the execution of the instruction on COND
1758   being true.  */
1759
1760static void
1761emit_constant_insn (rtx cond, rtx pattern)
1762{
1763  if (cond)
1764    pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1765  emit_insn (pattern);
1766}
1767
1768/* As above, but extra parameter GENERATE which, if clear, suppresses
1769   RTL generation.  */
1770
1771static int
1772arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1773		  HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1774		  int generate)
1775{
1776  int can_invert = 0;
1777  int can_negate = 0;
1778  int can_negate_initial = 0;
1779  int can_shift = 0;
1780  int i;
1781  int num_bits_set = 0;
1782  int set_sign_bit_copies = 0;
1783  int clear_sign_bit_copies = 0;
1784  int clear_zero_bit_copies = 0;
1785  int set_zero_bit_copies = 0;
1786  int insns = 0;
1787  unsigned HOST_WIDE_INT temp1, temp2;
1788  unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1789
1790  /* Find out which operations are safe for a given CODE.  Also do a quick
1791     check for degenerate cases; these can occur when DImode operations
1792     are split.  */
1793  switch (code)
1794    {
1795    case SET:
1796      can_invert = 1;
1797      can_shift = 1;
1798      can_negate = 1;
1799      break;
1800
1801    case PLUS:
1802      can_negate = 1;
1803      can_negate_initial = 1;
1804      break;
1805
1806    case IOR:
1807      if (remainder == 0xffffffff)
1808	{
1809	  if (generate)
1810	    emit_constant_insn (cond,
1811				gen_rtx_SET (VOIDmode, target,
1812					     GEN_INT (ARM_SIGN_EXTEND (val))));
1813	  return 1;
1814	}
1815      if (remainder == 0)
1816	{
1817	  if (reload_completed && rtx_equal_p (target, source))
1818	    return 0;
1819	  if (generate)
1820	    emit_constant_insn (cond,
1821				gen_rtx_SET (VOIDmode, target, source));
1822	  return 1;
1823	}
1824      break;
1825
1826    case AND:
1827      if (remainder == 0)
1828	{
1829	  if (generate)
1830	    emit_constant_insn (cond,
1831				gen_rtx_SET (VOIDmode, target, const0_rtx));
1832	  return 1;
1833	}
1834      if (remainder == 0xffffffff)
1835	{
1836	  if (reload_completed && rtx_equal_p (target, source))
1837	    return 0;
1838	  if (generate)
1839	    emit_constant_insn (cond,
1840				gen_rtx_SET (VOIDmode, target, source));
1841	  return 1;
1842	}
1843      can_invert = 1;
1844      break;
1845
1846    case XOR:
1847      if (remainder == 0)
1848	{
1849	  if (reload_completed && rtx_equal_p (target, source))
1850	    return 0;
1851	  if (generate)
1852	    emit_constant_insn (cond,
1853				gen_rtx_SET (VOIDmode, target, source));
1854	  return 1;
1855	}
1856
1857      /* We don't know how to handle other cases yet.  */
1858      gcc_assert (remainder == 0xffffffff);
1859
1860      if (generate)
1861	emit_constant_insn (cond,
1862			    gen_rtx_SET (VOIDmode, target,
1863					 gen_rtx_NOT (mode, source)));
1864      return 1;
1865
1866    case MINUS:
1867      /* We treat MINUS as (val - source), since (source - val) is always
1868	 passed as (source + (-val)).  */
1869      if (remainder == 0)
1870	{
1871	  if (generate)
1872	    emit_constant_insn (cond,
1873				gen_rtx_SET (VOIDmode, target,
1874					     gen_rtx_NEG (mode, source)));
1875	  return 1;
1876	}
1877      if (const_ok_for_arm (val))
1878	{
1879	  if (generate)
1880	    emit_constant_insn (cond,
1881				gen_rtx_SET (VOIDmode, target,
1882					     gen_rtx_MINUS (mode, GEN_INT (val),
1883							    source)));
1884	  return 1;
1885	}
1886      can_negate = 1;
1887
1888      break;
1889
1890    default:
1891      gcc_unreachable ();
1892    }
1893
1894  /* If we can do it in one insn get out quickly.  */
1895  if (const_ok_for_arm (val)
1896      || (can_negate_initial && const_ok_for_arm (-val))
1897      || (can_invert && const_ok_for_arm (~val)))
1898    {
1899      if (generate)
1900	emit_constant_insn (cond,
1901			    gen_rtx_SET (VOIDmode, target,
1902					 (source
1903					  ? gen_rtx_fmt_ee (code, mode, source,
1904							    GEN_INT (val))
1905					  : GEN_INT (val))));
1906      return 1;
1907    }
1908
1909  /* Calculate a few attributes that may be useful for specific
1910     optimizations.  */
1911  for (i = 31; i >= 0; i--)
1912    {
1913      if ((remainder & (1 << i)) == 0)
1914	clear_sign_bit_copies++;
1915      else
1916	break;
1917    }
1918
1919  for (i = 31; i >= 0; i--)
1920    {
1921      if ((remainder & (1 << i)) != 0)
1922	set_sign_bit_copies++;
1923      else
1924	break;
1925    }
1926
1927  for (i = 0; i <= 31; i++)
1928    {
1929      if ((remainder & (1 << i)) == 0)
1930	clear_zero_bit_copies++;
1931      else
1932	break;
1933    }
1934
1935  for (i = 0; i <= 31; i++)
1936    {
1937      if ((remainder & (1 << i)) != 0)
1938	set_zero_bit_copies++;
1939      else
1940	break;
1941    }
1942
1943  switch (code)
1944    {
1945    case SET:
1946      /* See if we can do this by sign_extending a constant that is known
1947	 to be negative.  This is a good, way of doing it, since the shift
1948	 may well merge into a subsequent insn.  */
1949      if (set_sign_bit_copies > 1)
1950	{
1951	  if (const_ok_for_arm
1952	      (temp1 = ARM_SIGN_EXTEND (remainder
1953					<< (set_sign_bit_copies - 1))))
1954	    {
1955	      if (generate)
1956		{
1957		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1958		  emit_constant_insn (cond,
1959				      gen_rtx_SET (VOIDmode, new_src,
1960						   GEN_INT (temp1)));
1961		  emit_constant_insn (cond,
1962				      gen_ashrsi3 (target, new_src,
1963						   GEN_INT (set_sign_bit_copies - 1)));
1964		}
1965	      return 2;
1966	    }
1967	  /* For an inverted constant, we will need to set the low bits,
1968	     these will be shifted out of harm's way.  */
1969	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
1970	  if (const_ok_for_arm (~temp1))
1971	    {
1972	      if (generate)
1973		{
1974		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1975		  emit_constant_insn (cond,
1976				      gen_rtx_SET (VOIDmode, new_src,
1977						   GEN_INT (temp1)));
1978		  emit_constant_insn (cond,
1979				      gen_ashrsi3 (target, new_src,
1980						   GEN_INT (set_sign_bit_copies - 1)));
1981		}
1982	      return 2;
1983	    }
1984	}
1985
1986      /* See if we can calculate the value as the difference between two
1987	 valid immediates.  */
1988      if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
1989	{
1990	  int topshift = clear_sign_bit_copies & ~1;
1991
1992	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
1993				   & (0xff000000 >> topshift));
1994
1995	  /* If temp1 is zero, then that means the 9 most significant
1996	     bits of remainder were 1 and we've caused it to overflow.
1997	     When topshift is 0 we don't need to do anything since we
1998	     can borrow from 'bit 32'.  */
1999	  if (temp1 == 0 && topshift != 0)
2000	    temp1 = 0x80000000 >> (topshift - 1);
2001
2002	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2003
2004	  if (const_ok_for_arm (temp2))
2005	    {
2006	      if (generate)
2007		{
2008		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2009		  emit_constant_insn (cond,
2010				      gen_rtx_SET (VOIDmode, new_src,
2011						   GEN_INT (temp1)));
2012		  emit_constant_insn (cond,
2013				      gen_addsi3 (target, new_src,
2014						  GEN_INT (-temp2)));
2015		}
2016
2017	      return 2;
2018	    }
2019	}
2020
2021      /* See if we can generate this by setting the bottom (or the top)
2022	 16 bits, and then shifting these into the other half of the
2023	 word.  We only look for the simplest cases, to do more would cost
2024	 too much.  Be careful, however, not to generate this when the
2025	 alternative would take fewer insns.  */
2026      if (val & 0xffff0000)
2027	{
2028	  temp1 = remainder & 0xffff0000;
2029	  temp2 = remainder & 0x0000ffff;
2030
2031	  /* Overlaps outside this range are best done using other methods.  */
2032	  for (i = 9; i < 24; i++)
2033	    {
2034	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2035		  && !const_ok_for_arm (temp2))
2036		{
2037		  rtx new_src = (subtargets
2038				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2039				 : target);
2040		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2041					    source, subtargets, generate);
2042		  source = new_src;
2043		  if (generate)
2044		    emit_constant_insn
2045		      (cond,
2046		       gen_rtx_SET
2047		       (VOIDmode, target,
2048			gen_rtx_IOR (mode,
2049				     gen_rtx_ASHIFT (mode, source,
2050						     GEN_INT (i)),
2051				     source)));
2052		  return insns + 1;
2053		}
2054	    }
2055
2056	  /* Don't duplicate cases already considered.  */
2057	  for (i = 17; i < 24; i++)
2058	    {
2059	      if (((temp1 | (temp1 >> i)) == remainder)
2060		  && !const_ok_for_arm (temp1))
2061		{
2062		  rtx new_src = (subtargets
2063				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2064				 : target);
2065		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2066					    source, subtargets, generate);
2067		  source = new_src;
2068		  if (generate)
2069		    emit_constant_insn
2070		      (cond,
2071		       gen_rtx_SET (VOIDmode, target,
2072				    gen_rtx_IOR
2073				    (mode,
2074				     gen_rtx_LSHIFTRT (mode, source,
2075						       GEN_INT (i)),
2076				     source)));
2077		  return insns + 1;
2078		}
2079	    }
2080	}
2081      break;
2082
2083    case IOR:
2084    case XOR:
2085      /* If we have IOR or XOR, and the constant can be loaded in a
2086	 single instruction, and we can find a temporary to put it in,
2087	 then this can be done in two instructions instead of 3-4.  */
2088      if (subtargets
2089	  /* TARGET can't be NULL if SUBTARGETS is 0 */
2090	  || (reload_completed && !reg_mentioned_p (target, source)))
2091	{
2092	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2093	    {
2094	      if (generate)
2095		{
2096		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2097
2098		  emit_constant_insn (cond,
2099				      gen_rtx_SET (VOIDmode, sub,
2100						   GEN_INT (val)));
2101		  emit_constant_insn (cond,
2102				      gen_rtx_SET (VOIDmode, target,
2103						   gen_rtx_fmt_ee (code, mode,
2104								   source, sub)));
2105		}
2106	      return 2;
2107	    }
2108	}
2109
2110      if (code == XOR)
2111	break;
2112
2113      if (set_sign_bit_copies > 8
2114	  && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2115	{
2116	  if (generate)
2117	    {
2118	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2119	      rtx shift = GEN_INT (set_sign_bit_copies);
2120
2121	      emit_constant_insn
2122		(cond,
2123		 gen_rtx_SET (VOIDmode, sub,
2124			      gen_rtx_NOT (mode,
2125					   gen_rtx_ASHIFT (mode,
2126							   source,
2127							   shift))));
2128	      emit_constant_insn
2129		(cond,
2130		 gen_rtx_SET (VOIDmode, target,
2131			      gen_rtx_NOT (mode,
2132					   gen_rtx_LSHIFTRT (mode, sub,
2133							     shift))));
2134	    }
2135	  return 2;
2136	}
2137
2138      if (set_zero_bit_copies > 8
2139	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2140	{
2141	  if (generate)
2142	    {
2143	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2144	      rtx shift = GEN_INT (set_zero_bit_copies);
2145
2146	      emit_constant_insn
2147		(cond,
2148		 gen_rtx_SET (VOIDmode, sub,
2149			      gen_rtx_NOT (mode,
2150					   gen_rtx_LSHIFTRT (mode,
2151							     source,
2152							     shift))));
2153	      emit_constant_insn
2154		(cond,
2155		 gen_rtx_SET (VOIDmode, target,
2156			      gen_rtx_NOT (mode,
2157					   gen_rtx_ASHIFT (mode, sub,
2158							   shift))));
2159	    }
2160	  return 2;
2161	}
2162
2163      if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2164	{
2165	  if (generate)
2166	    {
2167	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2168	      emit_constant_insn (cond,
2169				  gen_rtx_SET (VOIDmode, sub,
2170					       gen_rtx_NOT (mode, source)));
2171	      source = sub;
2172	      if (subtargets)
2173		sub = gen_reg_rtx (mode);
2174	      emit_constant_insn (cond,
2175				  gen_rtx_SET (VOIDmode, sub,
2176					       gen_rtx_AND (mode, source,
2177							    GEN_INT (temp1))));
2178	      emit_constant_insn (cond,
2179				  gen_rtx_SET (VOIDmode, target,
2180					       gen_rtx_NOT (mode, sub)));
2181	    }
2182	  return 3;
2183	}
2184      break;
2185
2186    case AND:
2187      /* See if two shifts will do 2 or more insn's worth of work.  */
2188      if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2189	{
2190	  HOST_WIDE_INT shift_mask = ((0xffffffff
2191				       << (32 - clear_sign_bit_copies))
2192				      & 0xffffffff);
2193
2194	  if ((remainder | shift_mask) != 0xffffffff)
2195	    {
2196	      if (generate)
2197		{
2198		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2199		  insns = arm_gen_constant (AND, mode, cond,
2200					    remainder | shift_mask,
2201					    new_src, source, subtargets, 1);
2202		  source = new_src;
2203		}
2204	      else
2205		{
2206		  rtx targ = subtargets ? NULL_RTX : target;
2207		  insns = arm_gen_constant (AND, mode, cond,
2208					    remainder | shift_mask,
2209					    targ, source, subtargets, 0);
2210		}
2211	    }
2212
2213	  if (generate)
2214	    {
2215	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2216	      rtx shift = GEN_INT (clear_sign_bit_copies);
2217
2218	      emit_insn (gen_ashlsi3 (new_src, source, shift));
2219	      emit_insn (gen_lshrsi3 (target, new_src, shift));
2220	    }
2221
2222	  return insns + 2;
2223	}
2224
2225      if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2226	{
2227	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2228
2229	  if ((remainder | shift_mask) != 0xffffffff)
2230	    {
2231	      if (generate)
2232		{
2233		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2234
2235		  insns = arm_gen_constant (AND, mode, cond,
2236					    remainder | shift_mask,
2237					    new_src, source, subtargets, 1);
2238		  source = new_src;
2239		}
2240	      else
2241		{
2242		  rtx targ = subtargets ? NULL_RTX : target;
2243
2244		  insns = arm_gen_constant (AND, mode, cond,
2245					    remainder | shift_mask,
2246					    targ, source, subtargets, 0);
2247		}
2248	    }
2249
2250	  if (generate)
2251	    {
2252	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2253	      rtx shift = GEN_INT (clear_zero_bit_copies);
2254
2255	      emit_insn (gen_lshrsi3 (new_src, source, shift));
2256	      emit_insn (gen_ashlsi3 (target, new_src, shift));
2257	    }
2258
2259	  return insns + 2;
2260	}
2261
2262      break;
2263
2264    default:
2265      break;
2266    }
2267
2268  for (i = 0; i < 32; i++)
2269    if (remainder & (1 << i))
2270      num_bits_set++;
2271
2272  if (code == AND || (can_invert && num_bits_set > 16))
2273    remainder = (~remainder) & 0xffffffff;
2274  else if (code == PLUS && num_bits_set > 16)
2275    remainder = (-remainder) & 0xffffffff;
2276  else
2277    {
2278      can_invert = 0;
2279      can_negate = 0;
2280    }
2281
2282  /* Now try and find a way of doing the job in either two or three
2283     instructions.
2284     We start by looking for the largest block of zeros that are aligned on
2285     a 2-bit boundary, we then fill up the temps, wrapping around to the
2286     top of the word when we drop off the bottom.
2287     In the worst case this code should produce no more than four insns.  */
2288  {
2289    int best_start = 0;
2290    int best_consecutive_zeros = 0;
2291
2292    for (i = 0; i < 32; i += 2)
2293      {
2294	int consecutive_zeros = 0;
2295
2296	if (!(remainder & (3 << i)))
2297	  {
2298	    while ((i < 32) && !(remainder & (3 << i)))
2299	      {
2300		consecutive_zeros += 2;
2301		i += 2;
2302	      }
2303	    if (consecutive_zeros > best_consecutive_zeros)
2304	      {
2305		best_consecutive_zeros = consecutive_zeros;
2306		best_start = i - consecutive_zeros;
2307	      }
2308	    i -= 2;
2309	  }
2310      }
2311
2312    /* So long as it won't require any more insns to do so, it's
2313       desirable to emit a small constant (in bits 0...9) in the last
2314       insn.  This way there is more chance that it can be combined with
2315       a later addressing insn to form a pre-indexed load or store
2316       operation.  Consider:
2317
2318	       *((volatile int *)0xe0000100) = 1;
2319	       *((volatile int *)0xe0000110) = 2;
2320
2321       We want this to wind up as:
2322
2323		mov rA, #0xe0000000
2324		mov rB, #1
2325		str rB, [rA, #0x100]
2326		mov rB, #2
2327		str rB, [rA, #0x110]
2328
2329       rather than having to synthesize both large constants from scratch.
2330
2331       Therefore, we calculate how many insns would be required to emit
2332       the constant starting from `best_start', and also starting from
2333       zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
2334       yield a shorter sequence, we may as well use zero.  */
2335    if (best_start != 0
2336	&& ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2337	&& (count_insns_for_constant (remainder, 0) <=
2338	    count_insns_for_constant (remainder, best_start)))
2339      best_start = 0;
2340
2341    /* Now start emitting the insns.  */
2342    i = best_start;
2343    do
2344      {
2345	int end;
2346
2347	if (i <= 0)
2348	  i += 32;
2349	if (remainder & (3 << (i - 2)))
2350	  {
2351	    end = i - 8;
2352	    if (end < 0)
2353	      end += 32;
2354	    temp1 = remainder & ((0x0ff << end)
2355				 | ((i < end) ? (0xff >> (32 - end)) : 0));
2356	    remainder &= ~temp1;
2357
2358	    if (generate)
2359	      {
2360		rtx new_src, temp1_rtx;
2361
2362		if (code == SET || code == MINUS)
2363		  {
2364		    new_src = (subtargets ? gen_reg_rtx (mode) : target);
2365		    if (can_invert && code != MINUS)
2366		      temp1 = ~temp1;
2367		  }
2368		else
2369		  {
2370		    if (remainder && subtargets)
2371		      new_src = gen_reg_rtx (mode);
2372		    else
2373		      new_src = target;
2374		    if (can_invert)
2375		      temp1 = ~temp1;
2376		    else if (can_negate)
2377		      temp1 = -temp1;
2378		  }
2379
2380		temp1 = trunc_int_for_mode (temp1, mode);
2381		temp1_rtx = GEN_INT (temp1);
2382
2383		if (code == SET)
2384		  ;
2385		else if (code == MINUS)
2386		  temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2387		else
2388		  temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2389
2390		emit_constant_insn (cond,
2391				    gen_rtx_SET (VOIDmode, new_src,
2392						 temp1_rtx));
2393		source = new_src;
2394	      }
2395
2396	    if (code == SET)
2397	      {
2398		can_invert = 0;
2399		code = PLUS;
2400	      }
2401	    else if (code == MINUS)
2402	      code = PLUS;
2403
2404	    insns++;
2405	    i -= 6;
2406	  }
2407	i -= 2;
2408      }
2409    while (remainder);
2410  }
2411
2412  return insns;
2413}
2414
2415/* Canonicalize a comparison so that we are more likely to recognize it.
2416   This can be done for a few constant compares, where we can make the
2417   immediate value easier to load.  */
2418
2419enum rtx_code
2420arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2421			     rtx * op1)
2422{
2423  unsigned HOST_WIDE_INT i = INTVAL (*op1);
2424  unsigned HOST_WIDE_INT maxval;
2425  maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2426
2427  switch (code)
2428    {
2429    case EQ:
2430    case NE:
2431      return code;
2432
2433    case GT:
2434    case LE:
2435      if (i != maxval
2436	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2437	{
2438	  *op1 = GEN_INT (i + 1);
2439	  return code == GT ? GE : LT;
2440	}
2441      break;
2442
2443    case GE:
2444    case LT:
2445      if (i != ~maxval
2446	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2447	{
2448	  *op1 = GEN_INT (i - 1);
2449	  return code == GE ? GT : LE;
2450	}
2451      break;
2452
2453    case GTU:
2454    case LEU:
2455      if (i != ~((unsigned HOST_WIDE_INT) 0)
2456	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2457	{
2458	  *op1 = GEN_INT (i + 1);
2459	  return code == GTU ? GEU : LTU;
2460	}
2461      break;
2462
2463    case GEU:
2464    case LTU:
2465      if (i != 0
2466	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2467	{
2468	  *op1 = GEN_INT (i - 1);
2469	  return code == GEU ? GTU : LEU;
2470	}
2471      break;
2472
2473    default:
2474      gcc_unreachable ();
2475    }
2476
2477  return code;
2478}
2479
2480
2481/* Define how to find the value returned by a function.  */
2482
2483rtx
2484arm_function_value(tree type, tree func ATTRIBUTE_UNUSED)
2485{
2486  enum machine_mode mode;
2487  int unsignedp ATTRIBUTE_UNUSED;
2488  rtx r ATTRIBUTE_UNUSED;
2489
2490  mode = TYPE_MODE (type);
2491  /* Promote integer types.  */
2492  if (INTEGRAL_TYPE_P (type))
2493    PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2494
2495  /* Promotes small structs returned in a register to full-word size
2496     for big-endian AAPCS.  */
2497  if (arm_return_in_msb (type))
2498    {
2499      HOST_WIDE_INT size = int_size_in_bytes (type);
2500      if (size % UNITS_PER_WORD != 0)
2501	{
2502	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2503	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2504	}
2505    }
2506
2507  return LIBCALL_VALUE(mode);
2508}
2509
2510/* Determine the amount of memory needed to store the possible return
2511   registers of an untyped call.  */
2512int
2513arm_apply_result_size (void)
2514{
2515  int size = 16;
2516
2517  if (TARGET_ARM)
2518    {
2519      if (TARGET_HARD_FLOAT_ABI)
2520	{
2521	  if (TARGET_FPA)
2522	    size += 12;
2523	  if (TARGET_MAVERICK)
2524	    size += 8;
2525	}
2526      if (TARGET_IWMMXT_ABI)
2527	size += 8;
2528    }
2529
2530  return size;
2531}
2532
2533/* Decide whether a type should be returned in memory (true)
2534   or in a register (false).  This is called by the macro
2535   RETURN_IN_MEMORY.  */
2536int
2537arm_return_in_memory (tree type)
2538{
2539  HOST_WIDE_INT size;
2540
2541  if (!AGGREGATE_TYPE_P (type) &&
2542      (TREE_CODE (type) != VECTOR_TYPE) &&
2543      !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2544    /* All simple types are returned in registers.
2545       For AAPCS, complex types are treated the same as aggregates.  */
2546    return 0;
2547
2548  size = int_size_in_bytes (type);
2549
2550  if (arm_abi != ARM_ABI_APCS)
2551    {
2552      /* ATPCS and later return aggregate types in memory only if they are
2553	 larger than a word (or are variable size).  */
2554      return (size < 0 || size > UNITS_PER_WORD);
2555    }
2556
2557  /* To maximize backwards compatibility with previous versions of gcc,
2558     return vectors up to 4 words in registers.  */
2559  if (TREE_CODE (type) == VECTOR_TYPE)
2560    return (size < 0 || size > (4 * UNITS_PER_WORD));
2561
2562  /* For the arm-wince targets we choose to be compatible with Microsoft's
2563     ARM and Thumb compilers, which always return aggregates in memory.  */
2564#ifndef ARM_WINCE
2565  /* All structures/unions bigger than one word are returned in memory.
2566     Also catch the case where int_size_in_bytes returns -1.  In this case
2567     the aggregate is either huge or of variable size, and in either case
2568     we will want to return it via memory and not in a register.  */
2569  if (size < 0 || size > UNITS_PER_WORD)
2570    return 1;
2571
2572  if (TREE_CODE (type) == RECORD_TYPE)
2573    {
2574      tree field;
2575
2576      /* For a struct the APCS says that we only return in a register
2577	 if the type is 'integer like' and every addressable element
2578	 has an offset of zero.  For practical purposes this means
2579	 that the structure can have at most one non bit-field element
2580	 and that this element must be the first one in the structure.  */
2581
2582      /* Find the first field, ignoring non FIELD_DECL things which will
2583	 have been created by C++.  */
2584      for (field = TYPE_FIELDS (type);
2585	   field && TREE_CODE (field) != FIELD_DECL;
2586	   field = TREE_CHAIN (field))
2587	continue;
2588
2589      if (field == NULL)
2590	return 0; /* An empty structure.  Allowed by an extension to ANSI C.  */
2591
2592      /* Check that the first field is valid for returning in a register.  */
2593
2594      /* ... Floats are not allowed */
2595      if (FLOAT_TYPE_P (TREE_TYPE (field)))
2596	return 1;
2597
2598      /* ... Aggregates that are not themselves valid for returning in
2599	 a register are not allowed.  */
2600      if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2601	return 1;
2602
2603      /* Now check the remaining fields, if any.  Only bitfields are allowed,
2604	 since they are not addressable.  */
2605      for (field = TREE_CHAIN (field);
2606	   field;
2607	   field = TREE_CHAIN (field))
2608	{
2609	  if (TREE_CODE (field) != FIELD_DECL)
2610	    continue;
2611
2612	  if (!DECL_BIT_FIELD_TYPE (field))
2613	    return 1;
2614	}
2615
2616      return 0;
2617    }
2618
2619  if (TREE_CODE (type) == UNION_TYPE)
2620    {
2621      tree field;
2622
2623      /* Unions can be returned in registers if every element is
2624	 integral, or can be returned in an integer register.  */
2625      for (field = TYPE_FIELDS (type);
2626	   field;
2627	   field = TREE_CHAIN (field))
2628	{
2629	  if (TREE_CODE (field) != FIELD_DECL)
2630	    continue;
2631
2632	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
2633	    return 1;
2634
2635	  if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2636	    return 1;
2637	}
2638
2639      return 0;
2640    }
2641#endif /* not ARM_WINCE */
2642
2643  /* Return all other types in memory.  */
2644  return 1;
2645}
2646
2647/* Indicate whether or not words of a double are in big-endian order.  */
2648
2649int
2650arm_float_words_big_endian (void)
2651{
2652  if (TARGET_MAVERICK)
2653    return 0;
2654
2655  /* For FPA, float words are always big-endian.  For VFP, floats words
2656     follow the memory system mode.  */
2657
2658  if (TARGET_FPA)
2659    {
2660      return 1;
2661    }
2662
2663  if (TARGET_VFP)
2664    return (TARGET_BIG_END ? 1 : 0);
2665
2666  return 1;
2667}
2668
2669/* Initialize a variable CUM of type CUMULATIVE_ARGS
2670   for a call to a function whose data type is FNTYPE.
2671   For a library call, FNTYPE is NULL.  */
2672void
2673arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2674			  rtx libname  ATTRIBUTE_UNUSED,
2675			  tree fndecl ATTRIBUTE_UNUSED)
2676{
2677  /* On the ARM, the offset starts at 0.  */
2678  pcum->nregs = 0;
2679  pcum->iwmmxt_nregs = 0;
2680  pcum->can_split = true;
2681
2682  pcum->call_cookie = CALL_NORMAL;
2683
2684  if (TARGET_LONG_CALLS)
2685    pcum->call_cookie = CALL_LONG;
2686
2687  /* Check for long call/short call attributes.  The attributes
2688     override any command line option.  */
2689  if (fntype)
2690    {
2691      if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (fntype)))
2692	pcum->call_cookie = CALL_SHORT;
2693      else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (fntype)))
2694	pcum->call_cookie = CALL_LONG;
2695    }
2696
2697  /* Varargs vectors are treated the same as long long.
2698     named_count avoids having to change the way arm handles 'named' */
2699  pcum->named_count = 0;
2700  pcum->nargs = 0;
2701
2702  if (TARGET_REALLY_IWMMXT && fntype)
2703    {
2704      tree fn_arg;
2705
2706      for (fn_arg = TYPE_ARG_TYPES (fntype);
2707	   fn_arg;
2708	   fn_arg = TREE_CHAIN (fn_arg))
2709	pcum->named_count += 1;
2710
2711      if (! pcum->named_count)
2712	pcum->named_count = INT_MAX;
2713    }
2714}
2715
2716
2717/* Return true if mode/type need doubleword alignment.  */
2718bool
2719arm_needs_doubleword_align (enum machine_mode mode, tree type)
2720{
2721  return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2722	  || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2723}
2724
2725
2726/* Determine where to put an argument to a function.
2727   Value is zero to push the argument on the stack,
2728   or a hard register in which to store the argument.
2729
2730   MODE is the argument's machine mode.
2731   TYPE is the data type of the argument (as a tree).
2732    This is null for libcalls where that information may
2733    not be available.
2734   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2735    the preceding args and about the function being called.
2736   NAMED is nonzero if this argument is a named parameter
2737    (otherwise it is an extra parameter matching an ellipsis).  */
2738
2739rtx
2740arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2741		  tree type, int named)
2742{
2743  int nregs;
2744
2745  /* Varargs vectors are treated the same as long long.
2746     named_count avoids having to change the way arm handles 'named' */
2747  if (TARGET_IWMMXT_ABI
2748      && arm_vector_mode_supported_p (mode)
2749      && pcum->named_count > pcum->nargs + 1)
2750    {
2751      if (pcum->iwmmxt_nregs <= 9)
2752	return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2753      else
2754	{
2755	  pcum->can_split = false;
2756	  return NULL_RTX;
2757	}
2758    }
2759
2760  /* Put doubleword aligned quantities in even register pairs.  */
2761  if (pcum->nregs & 1
2762      && ARM_DOUBLEWORD_ALIGN
2763      && arm_needs_doubleword_align (mode, type))
2764    pcum->nregs++;
2765
2766  if (mode == VOIDmode)
2767    /* Compute operand 2 of the call insn.  */
2768    return GEN_INT (pcum->call_cookie);
2769
2770  /* Only allow splitting an arg between regs and memory if all preceding
2771     args were allocated to regs.  For args passed by reference we only count
2772     the reference pointer.  */
2773  if (pcum->can_split)
2774    nregs = 1;
2775  else
2776    nregs = ARM_NUM_REGS2 (mode, type);
2777
2778  if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2779    return NULL_RTX;
2780
2781  return gen_rtx_REG (mode, pcum->nregs);
2782}
2783
2784static int
2785arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2786		       tree type, bool named ATTRIBUTE_UNUSED)
2787{
2788  int nregs = pcum->nregs;
2789
2790  if (arm_vector_mode_supported_p (mode))
2791    return 0;
2792
2793  if (NUM_ARG_REGS > nregs
2794      && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2795      && pcum->can_split)
2796    return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2797
2798  return 0;
2799}
2800
2801/* Variable sized types are passed by reference.  This is a GCC
2802   extension to the ARM ABI.  */
2803
2804static bool
2805arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2806		       enum machine_mode mode ATTRIBUTE_UNUSED,
2807		       tree type, bool named ATTRIBUTE_UNUSED)
2808{
2809  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2810}
2811
2812/* Encode the current state of the #pragma [no_]long_calls.  */
2813typedef enum
2814{
2815  OFF,		/* No #pragma [no_]long_calls is in effect.  */
2816  LONG,		/* #pragma long_calls is in effect.  */
2817  SHORT		/* #pragma no_long_calls is in effect.  */
2818} arm_pragma_enum;
2819
2820static arm_pragma_enum arm_pragma_long_calls = OFF;
2821
2822void
2823arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2824{
2825  arm_pragma_long_calls = LONG;
2826}
2827
2828void
2829arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2830{
2831  arm_pragma_long_calls = SHORT;
2832}
2833
2834void
2835arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2836{
2837  arm_pragma_long_calls = OFF;
2838}
2839
2840/* Table of machine attributes.  */
2841const struct attribute_spec arm_attribute_table[] =
2842{
2843  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2844  /* Function calls made to this symbol must be done indirectly, because
2845     it may lie outside of the 26 bit addressing range of a normal function
2846     call.  */
2847  { "long_call",    0, 0, false, true,  true,  NULL },
2848  /* Whereas these functions are always known to reside within the 26 bit
2849     addressing range.  */
2850  { "short_call",   0, 0, false, true,  true,  NULL },
2851  /* Interrupt Service Routines have special prologue and epilogue requirements.  */
2852  { "isr",          0, 1, false, false, false, arm_handle_isr_attribute },
2853  { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute },
2854  { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute },
2855#ifdef ARM_PE
2856  /* ARM/PE has three new attributes:
2857     interfacearm - ?
2858     dllexport - for exporting a function/variable that will live in a dll
2859     dllimport - for importing a function/variable from a dll
2860
2861     Microsoft allows multiple declspecs in one __declspec, separating
2862     them with spaces.  We do NOT support this.  Instead, use __declspec
2863     multiple times.
2864  */
2865  { "dllimport",    0, 0, true,  false, false, NULL },
2866  { "dllexport",    0, 0, true,  false, false, NULL },
2867  { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute },
2868#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
2869  { "dllimport",    0, 0, false, false, false, handle_dll_attribute },
2870  { "dllexport",    0, 0, false, false, false, handle_dll_attribute },
2871  { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute },
2872#endif
2873  { NULL,           0, 0, false, false, false, NULL }
2874};
2875
2876/* Handle an attribute requiring a FUNCTION_DECL;
2877   arguments as in struct attribute_spec.handler.  */
2878static tree
2879arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
2880			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
2881{
2882  if (TREE_CODE (*node) != FUNCTION_DECL)
2883    {
2884      warning (OPT_Wattributes, "%qs attribute only applies to functions",
2885	       IDENTIFIER_POINTER (name));
2886      *no_add_attrs = true;
2887    }
2888
2889  return NULL_TREE;
2890}
2891
2892/* Handle an "interrupt" or "isr" attribute;
2893   arguments as in struct attribute_spec.handler.  */
2894static tree
2895arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
2896			  bool *no_add_attrs)
2897{
2898  if (DECL_P (*node))
2899    {
2900      if (TREE_CODE (*node) != FUNCTION_DECL)
2901	{
2902	  warning (OPT_Wattributes, "%qs attribute only applies to functions",
2903		   IDENTIFIER_POINTER (name));
2904	  *no_add_attrs = true;
2905	}
2906      /* FIXME: the argument if any is checked for type attributes;
2907	 should it be checked for decl ones?  */
2908    }
2909  else
2910    {
2911      if (TREE_CODE (*node) == FUNCTION_TYPE
2912	  || TREE_CODE (*node) == METHOD_TYPE)
2913	{
2914	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
2915	    {
2916	      warning (OPT_Wattributes, "%qs attribute ignored",
2917		       IDENTIFIER_POINTER (name));
2918	      *no_add_attrs = true;
2919	    }
2920	}
2921      else if (TREE_CODE (*node) == POINTER_TYPE
2922	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
2923		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
2924	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
2925	{
2926	  *node = build_variant_type_copy (*node);
2927	  TREE_TYPE (*node) = build_type_attribute_variant
2928	    (TREE_TYPE (*node),
2929	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
2930	  *no_add_attrs = true;
2931	}
2932      else
2933	{
2934	  /* Possibly pass this attribute on from the type to a decl.  */
2935	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
2936		       | (int) ATTR_FLAG_FUNCTION_NEXT
2937		       | (int) ATTR_FLAG_ARRAY_NEXT))
2938	    {
2939	      *no_add_attrs = true;
2940	      return tree_cons (name, args, NULL_TREE);
2941	    }
2942	  else
2943	    {
2944	      warning (OPT_Wattributes, "%qs attribute ignored",
2945		       IDENTIFIER_POINTER (name));
2946	    }
2947	}
2948    }
2949
2950  return NULL_TREE;
2951}
2952
2953#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2954/* Handle the "notshared" attribute.  This attribute is another way of
2955   requesting hidden visibility.  ARM's compiler supports
2956   "__declspec(notshared)"; we support the same thing via an
2957   attribute.  */
2958
2959static tree
2960arm_handle_notshared_attribute (tree *node,
2961				tree name ATTRIBUTE_UNUSED,
2962				tree args ATTRIBUTE_UNUSED,
2963				int flags ATTRIBUTE_UNUSED,
2964				bool *no_add_attrs)
2965{
2966  tree decl = TYPE_NAME (*node);
2967
2968  if (decl)
2969    {
2970      DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
2971      DECL_VISIBILITY_SPECIFIED (decl) = 1;
2972      *no_add_attrs = false;
2973    }
2974  return NULL_TREE;
2975}
2976#endif
2977
2978/* Return 0 if the attributes for two types are incompatible, 1 if they
2979   are compatible, and 2 if they are nearly compatible (which causes a
2980   warning to be generated).  */
2981static int
2982arm_comp_type_attributes (tree type1, tree type2)
2983{
2984  int l1, l2, s1, s2;
2985
2986  /* Check for mismatch of non-default calling convention.  */
2987  if (TREE_CODE (type1) != FUNCTION_TYPE)
2988    return 1;
2989
2990  /* Check for mismatched call attributes.  */
2991  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
2992  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
2993  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
2994  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
2995
2996  /* Only bother to check if an attribute is defined.  */
2997  if (l1 | l2 | s1 | s2)
2998    {
2999      /* If one type has an attribute, the other must have the same attribute.  */
3000      if ((l1 != l2) || (s1 != s2))
3001	return 0;
3002
3003      /* Disallow mixed attributes.  */
3004      if ((l1 & s2) || (l2 & s1))
3005	return 0;
3006    }
3007
3008  /* Check for mismatched ISR attribute.  */
3009  l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3010  if (! l1)
3011    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3012  l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3013  if (! l2)
3014    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3015  if (l1 != l2)
3016    return 0;
3017
3018  return 1;
3019}
3020
3021/*  Encode long_call or short_call attribute by prefixing
3022    symbol name in DECL with a special character FLAG.  */
3023void
3024arm_encode_call_attribute (tree decl, int flag)
3025{
3026  const char * str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
3027  int          len = strlen (str);
3028  char *       newstr;
3029
3030  /* Do not allow weak functions to be treated as short call.  */
3031  if (DECL_WEAK (decl) && flag == SHORT_CALL_FLAG_CHAR)
3032    return;
3033
3034  newstr = alloca (len + 2);
3035  newstr[0] = flag;
3036  strcpy (newstr + 1, str);
3037
3038  newstr = (char *) ggc_alloc_string (newstr, len + 1);
3039  XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
3040}
3041
3042/*  Assigns default attributes to newly defined type.  This is used to
3043    set short_call/long_call attributes for function types of
3044    functions defined inside corresponding #pragma scopes.  */
3045static void
3046arm_set_default_type_attributes (tree type)
3047{
3048  /* Add __attribute__ ((long_call)) to all functions, when
3049     inside #pragma long_calls or __attribute__ ((short_call)),
3050     when inside #pragma no_long_calls.  */
3051  if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3052    {
3053      tree type_attr_list, attr_name;
3054      type_attr_list = TYPE_ATTRIBUTES (type);
3055
3056      if (arm_pragma_long_calls == LONG)
3057 	attr_name = get_identifier ("long_call");
3058      else if (arm_pragma_long_calls == SHORT)
3059 	attr_name = get_identifier ("short_call");
3060      else
3061 	return;
3062
3063      type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3064      TYPE_ATTRIBUTES (type) = type_attr_list;
3065    }
3066}
3067
3068/* Return 1 if the operand is a SYMBOL_REF for a function known to be
3069   defined within the current compilation unit.  If this cannot be
3070   determined, then 0 is returned.  */
3071static int
3072current_file_function_operand (rtx sym_ref)
3073{
3074  /* This is a bit of a fib.  A function will have a short call flag
3075     applied to its name if it has the short call attribute, or it has
3076     already been defined within the current compilation unit.  */
3077  if (ENCODED_SHORT_CALL_ATTR_P (XSTR (sym_ref, 0)))
3078    return 1;
3079
3080  /* The current function is always defined within the current compilation
3081     unit.  If it s a weak definition however, then this may not be the real
3082     definition of the function, and so we have to say no.  */
3083  if (sym_ref == XEXP (DECL_RTL (current_function_decl), 0)
3084      && !DECL_WEAK (current_function_decl))
3085    return 1;
3086
3087  /* We cannot make the determination - default to returning 0.  */
3088  return 0;
3089}
3090
3091/* Return nonzero if a 32 bit "long_call" should be generated for
3092   this call.  We generate a long_call if the function:
3093
3094        a.  has an __attribute__((long call))
3095     or b.  is within the scope of a #pragma long_calls
3096     or c.  the -mlong-calls command line switch has been specified
3097         .  and either:
3098                1. -ffunction-sections is in effect
3099	     or 2. the current function has __attribute__ ((section))
3100	     or 3. the target function has __attribute__ ((section))
3101
3102   However we do not generate a long call if the function:
3103
3104        d.  has an __attribute__ ((short_call))
3105     or e.  is inside the scope of a #pragma no_long_calls
3106     or f.  is defined within the current compilation unit.
3107
3108   This function will be called by C fragments contained in the machine
3109   description file.  SYM_REF and CALL_COOKIE correspond to the matched
3110   rtl operands.  CALL_SYMBOL is used to distinguish between
3111   two different callers of the function.  It is set to 1 in the
3112   "call_symbol" and "call_symbol_value" patterns and to 0 in the "call"
3113   and "call_value" patterns.  This is because of the difference in the
3114   SYM_REFs passed by these patterns.  */
3115int
3116arm_is_longcall_p (rtx sym_ref, int call_cookie, int call_symbol)
3117{
3118  if (!call_symbol)
3119    {
3120      if (GET_CODE (sym_ref) != MEM)
3121	return 0;
3122
3123      sym_ref = XEXP (sym_ref, 0);
3124    }
3125
3126  if (GET_CODE (sym_ref) != SYMBOL_REF)
3127    return 0;
3128
3129  if (call_cookie & CALL_SHORT)
3130    return 0;
3131
3132  if (TARGET_LONG_CALLS)
3133    {
3134      if (flag_function_sections
3135	  || DECL_SECTION_NAME (current_function_decl))
3136	/* c.3 is handled by the definition of the
3137	   ARM_DECLARE_FUNCTION_SIZE macro.  */
3138	return 1;
3139    }
3140
3141  if (current_file_function_operand (sym_ref))
3142    return 0;
3143
3144  return (call_cookie & CALL_LONG)
3145    || ENCODED_LONG_CALL_ATTR_P (XSTR (sym_ref, 0))
3146    || TARGET_LONG_CALLS;
3147}
3148
3149/* Return nonzero if it is ok to make a tail-call to DECL.  */
3150static bool
3151arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3152{
3153  int call_type = TARGET_LONG_CALLS ? CALL_LONG : CALL_NORMAL;
3154
3155  if (cfun->machine->sibcall_blocked)
3156    return false;
3157
3158  /* Never tailcall something for which we have no decl, or if we
3159     are in Thumb mode.  */
3160  if (decl == NULL || TARGET_THUMB)
3161    return false;
3162
3163  /* Get the calling method.  */
3164  if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3165    call_type = CALL_SHORT;
3166  else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3167    call_type = CALL_LONG;
3168
3169  /* Cannot tail-call to long calls, since these are out of range of
3170     a branch instruction.  However, if not compiling PIC, we know
3171     we can reach the symbol if it is in this compilation unit.  */
3172  if (call_type == CALL_LONG && (flag_pic || !TREE_ASM_WRITTEN (decl)))
3173    return false;
3174
3175  /* If we are interworking and the function is not declared static
3176     then we can't tail-call it unless we know that it exists in this
3177     compilation unit (since it might be a Thumb routine).  */
3178  if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3179    return false;
3180
3181  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
3182  if (IS_INTERRUPT (arm_current_func_type ()))
3183    return false;
3184
3185  /* Everything else is ok.  */
3186  return true;
3187}
3188
3189
3190/* Addressing mode support functions.  */
3191
3192/* Return nonzero if X is a legitimate immediate operand when compiling
3193   for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
3194int
3195legitimate_pic_operand_p (rtx x)
3196{
3197  if (GET_CODE (x) == SYMBOL_REF
3198      || (GET_CODE (x) == CONST
3199	  && GET_CODE (XEXP (x, 0)) == PLUS
3200	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3201    return 0;
3202
3203  return 1;
3204}
3205
3206rtx
3207legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3208{
3209  if (GET_CODE (orig) == SYMBOL_REF
3210      || GET_CODE (orig) == LABEL_REF)
3211    {
3212#ifndef AOF_ASSEMBLER
3213      rtx pic_ref, address;
3214#endif
3215      rtx insn;
3216      int subregs = 0;
3217
3218      /* If this function doesn't have a pic register, create one now.
3219	 A lot of the logic here is made obscure by the fact that this
3220	 routine gets called as part of the rtx cost estimation
3221	 process.  We don't want those calls to affect any assumptions
3222	 about the real function; and further, we can't call
3223	 entry_of_function() until we start the real expansion
3224	 process.  */
3225      if (!current_function_uses_pic_offset_table)
3226	{
3227	  gcc_assert (!no_new_pseudos);
3228	  if (arm_pic_register != INVALID_REGNUM)
3229	    {
3230	      if (!cfun->machine->pic_reg)
3231		cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3232
3233	      /* Play games to avoid marking the function as needing pic
3234		 if we are being called as part of the cost-estimation
3235		 process.  */
3236	      if (!ir_type())
3237		current_function_uses_pic_offset_table = 1;
3238	    }
3239	  else
3240	    {
3241	      rtx seq;
3242
3243	      if (!cfun->machine->pic_reg)
3244		  cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3245
3246	      /* Play games to avoid marking the function as needing pic
3247		 if we are being called as part of the cost-estimation
3248		 process.  */
3249	      if (!ir_type())
3250		{
3251		  current_function_uses_pic_offset_table = 1;
3252		  start_sequence ();
3253
3254		  arm_load_pic_register (0UL);
3255
3256		  seq = get_insns ();
3257		  end_sequence ();
3258		  emit_insn_after (seq, entry_of_function ());
3259		}
3260	    }
3261	}
3262
3263      if (reg == 0)
3264	{
3265	  gcc_assert (!no_new_pseudos);
3266	  reg = gen_reg_rtx (Pmode);
3267
3268	  subregs = 1;
3269	}
3270
3271#ifdef AOF_ASSEMBLER
3272      /* The AOF assembler can generate relocations for these directly, and
3273	 understands that the PIC register has to be added into the offset.  */
3274      insn = emit_insn (gen_pic_load_addr_based (reg, orig));
3275#else
3276      if (subregs)
3277	address = gen_reg_rtx (Pmode);
3278      else
3279	address = reg;
3280
3281      if (TARGET_ARM)
3282	emit_insn (gen_pic_load_addr_arm (address, orig));
3283      else
3284	emit_insn (gen_pic_load_addr_thumb (address, orig));
3285
3286      if ((GET_CODE (orig) == LABEL_REF
3287	   || (GET_CODE (orig) == SYMBOL_REF &&
3288	       SYMBOL_REF_LOCAL_P (orig)))
3289	  && NEED_GOT_RELOC)
3290	pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3291      else
3292	{
3293	  pic_ref = gen_const_mem (Pmode,
3294				   gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3295					         address));
3296	}
3297
3298      insn = emit_move_insn (reg, pic_ref);
3299#endif
3300      /* Put a REG_EQUAL note on this insn, so that it can be optimized
3301	 by loop.  */
3302      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
3303					    REG_NOTES (insn));
3304      return reg;
3305    }
3306  else if (GET_CODE (orig) == CONST)
3307    {
3308      rtx base, offset;
3309
3310      if (GET_CODE (XEXP (orig, 0)) == PLUS
3311	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3312	return orig;
3313
3314      if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3315	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3316	return orig;
3317
3318      if (reg == 0)
3319	{
3320	  gcc_assert (!no_new_pseudos);
3321	  reg = gen_reg_rtx (Pmode);
3322	}
3323
3324      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3325
3326      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3327      offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3328				       base == reg ? 0 : reg);
3329
3330      if (GET_CODE (offset) == CONST_INT)
3331	{
3332	  /* The base register doesn't really matter, we only want to
3333	     test the index for the appropriate mode.  */
3334	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
3335	    {
3336	      gcc_assert (!no_new_pseudos);
3337	      offset = force_reg (Pmode, offset);
3338	    }
3339
3340	  if (GET_CODE (offset) == CONST_INT)
3341	    return plus_constant (base, INTVAL (offset));
3342	}
3343
3344      if (GET_MODE_SIZE (mode) > 4
3345	  && (GET_MODE_CLASS (mode) == MODE_INT
3346	      || TARGET_SOFT_FLOAT))
3347	{
3348	  emit_insn (gen_addsi3 (reg, base, offset));
3349	  return reg;
3350	}
3351
3352      return gen_rtx_PLUS (Pmode, base, offset);
3353    }
3354
3355  return orig;
3356}
3357
3358
3359/* Find a spare low register to use during the prolog of a function.  */
3360
3361static int
3362thumb_find_work_register (unsigned long pushed_regs_mask)
3363{
3364  int reg;
3365
3366  /* Check the argument registers first as these are call-used.  The
3367     register allocation order means that sometimes r3 might be used
3368     but earlier argument registers might not, so check them all.  */
3369  for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3370    if (!regs_ever_live[reg])
3371      return reg;
3372
3373  /* Before going on to check the call-saved registers we can try a couple
3374     more ways of deducing that r3 is available.  The first is when we are
3375     pushing anonymous arguments onto the stack and we have less than 4
3376     registers worth of fixed arguments(*).  In this case r3 will be part of
3377     the variable argument list and so we can be sure that it will be
3378     pushed right at the start of the function.  Hence it will be available
3379     for the rest of the prologue.
3380     (*): ie current_function_pretend_args_size is greater than 0.  */
3381  if (cfun->machine->uses_anonymous_args
3382      && current_function_pretend_args_size > 0)
3383    return LAST_ARG_REGNUM;
3384
3385  /* The other case is when we have fixed arguments but less than 4 registers
3386     worth.  In this case r3 might be used in the body of the function, but
3387     it is not being used to convey an argument into the function.  In theory
3388     we could just check current_function_args_size to see how many bytes are
3389     being passed in argument registers, but it seems that it is unreliable.
3390     Sometimes it will have the value 0 when in fact arguments are being
3391     passed.  (See testcase execute/20021111-1.c for an example).  So we also
3392     check the args_info.nregs field as well.  The problem with this field is
3393     that it makes no allowances for arguments that are passed to the
3394     function but which are not used.  Hence we could miss an opportunity
3395     when a function has an unused argument in r3.  But it is better to be
3396     safe than to be sorry.  */
3397  if (! cfun->machine->uses_anonymous_args
3398      && current_function_args_size >= 0
3399      && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3400      && cfun->args_info.nregs < 4)
3401    return LAST_ARG_REGNUM;
3402
3403  /* Otherwise look for a call-saved register that is going to be pushed.  */
3404  for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3405    if (pushed_regs_mask & (1 << reg))
3406      return reg;
3407
3408  /* Something went wrong - thumb_compute_save_reg_mask()
3409     should have arranged for a suitable register to be pushed.  */
3410  gcc_unreachable ();
3411}
3412
3413static GTY(()) int pic_labelno;
3414
3415/* Generate code to load the PIC register.  In thumb mode SCRATCH is a
3416   low register.  */
3417
3418void
3419arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3420{
3421#ifndef AOF_ASSEMBLER
3422  rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx;
3423  rtx global_offset_table;
3424
3425  if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3426    return;
3427
3428  gcc_assert (flag_pic);
3429
3430  /* We use an UNSPEC rather than a LABEL_REF because this label never appears
3431     in the code stream.  */
3432
3433  labelno = GEN_INT (pic_labelno++);
3434  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3435  l1 = gen_rtx_CONST (VOIDmode, l1);
3436
3437  global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3438  /* On the ARM the PC register contains 'dot + 8' at the time of the
3439     addition, on the Thumb it is 'dot + 4'.  */
3440  pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3441  if (GOT_PCREL)
3442    pic_tmp2 = gen_rtx_CONST (VOIDmode,
3443			    gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx));
3444  else
3445    pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3446
3447  pic_rtx = gen_rtx_CONST (Pmode, gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp));
3448
3449  if (TARGET_ARM)
3450    {
3451      emit_insn (gen_pic_load_addr_arm (cfun->machine->pic_reg, pic_rtx));
3452      emit_insn (gen_pic_add_dot_plus_eight (cfun->machine->pic_reg,
3453					     cfun->machine->pic_reg, labelno));
3454    }
3455  else
3456    {
3457      if (arm_pic_register != INVALID_REGNUM
3458	  && REGNO (cfun->machine->pic_reg) > LAST_LO_REGNUM)
3459	{
3460	  /* We will have pushed the pic register, so we should always be
3461	     able to find a work register.  */
3462	  pic_tmp = gen_rtx_REG (SImode,
3463				 thumb_find_work_register (saved_regs));
3464	  emit_insn (gen_pic_load_addr_thumb (pic_tmp, pic_rtx));
3465	  emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3466	}
3467      else
3468	emit_insn (gen_pic_load_addr_thumb (cfun->machine->pic_reg, pic_rtx));
3469      emit_insn (gen_pic_add_dot_plus_four (cfun->machine->pic_reg,
3470					    cfun->machine->pic_reg, labelno));
3471    }
3472
3473  /* Need to emit this whether or not we obey regdecls,
3474     since setjmp/longjmp can cause life info to screw up.  */
3475  emit_insn (gen_rtx_USE (VOIDmode, cfun->machine->pic_reg));
3476#endif /* AOF_ASSEMBLER */
3477}
3478
3479
3480/* Return nonzero if X is valid as an ARM state addressing register.  */
3481static int
3482arm_address_register_rtx_p (rtx x, int strict_p)
3483{
3484  int regno;
3485
3486  if (GET_CODE (x) != REG)
3487    return 0;
3488
3489  regno = REGNO (x);
3490
3491  if (strict_p)
3492    return ARM_REGNO_OK_FOR_BASE_P (regno);
3493
3494  return (regno <= LAST_ARM_REGNUM
3495	  || regno >= FIRST_PSEUDO_REGISTER
3496	  || regno == FRAME_POINTER_REGNUM
3497	  || regno == ARG_POINTER_REGNUM);
3498}
3499
3500/* Return TRUE if this rtx is the difference of a symbol and a label,
3501   and will reduce to a PC-relative relocation in the object file.
3502   Expressions like this can be left alone when generating PIC, rather
3503   than forced through the GOT.  */
3504static int
3505pcrel_constant_p (rtx x)
3506{
3507  if (GET_CODE (x) == MINUS)
3508    return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3509
3510  return FALSE;
3511}
3512
3513/* Return nonzero if X is a valid ARM state address operand.  */
3514int
3515arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3516			  int strict_p)
3517{
3518  bool use_ldrd;
3519  enum rtx_code code = GET_CODE (x);
3520
3521  if (arm_address_register_rtx_p (x, strict_p))
3522    return 1;
3523
3524  use_ldrd = (TARGET_LDRD
3525	      && (mode == DImode
3526		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3527
3528  if (code == POST_INC || code == PRE_DEC
3529      || ((code == PRE_INC || code == POST_DEC)
3530	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3531    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3532
3533  else if ((code == POST_MODIFY || code == PRE_MODIFY)
3534	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3535	   && GET_CODE (XEXP (x, 1)) == PLUS
3536	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3537    {
3538      rtx addend = XEXP (XEXP (x, 1), 1);
3539
3540      /* Don't allow ldrd post increment by register because it's hard
3541	 to fixup invalid register choices.  */
3542      if (use_ldrd
3543	  && GET_CODE (x) == POST_MODIFY
3544	  && GET_CODE (addend) == REG)
3545	return 0;
3546
3547      return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3548	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
3549    }
3550
3551  /* After reload constants split into minipools will have addresses
3552     from a LABEL_REF.  */
3553  else if (reload_completed
3554	   && (code == LABEL_REF
3555	       || (code == CONST
3556		   && GET_CODE (XEXP (x, 0)) == PLUS
3557		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3558		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3559    return 1;
3560
3561  else if (mode == TImode)
3562    return 0;
3563
3564  else if (code == PLUS)
3565    {
3566      rtx xop0 = XEXP (x, 0);
3567      rtx xop1 = XEXP (x, 1);
3568
3569      return ((arm_address_register_rtx_p (xop0, strict_p)
3570	       && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3571	      || (arm_address_register_rtx_p (xop1, strict_p)
3572		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3573    }
3574
3575#if 0
3576  /* Reload currently can't handle MINUS, so disable this for now */
3577  else if (GET_CODE (x) == MINUS)
3578    {
3579      rtx xop0 = XEXP (x, 0);
3580      rtx xop1 = XEXP (x, 1);
3581
3582      return (arm_address_register_rtx_p (xop0, strict_p)
3583	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3584    }
3585#endif
3586
3587  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3588	   && code == SYMBOL_REF
3589	   && CONSTANT_POOL_ADDRESS_P (x)
3590	   && ! (flag_pic
3591		 && symbol_mentioned_p (get_pool_constant (x))
3592		 && ! pcrel_constant_p (get_pool_constant (x))))
3593    return 1;
3594
3595  return 0;
3596}
3597
3598/* Return nonzero if INDEX is valid for an address index operand in
3599   ARM state.  */
3600static int
3601arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3602			int strict_p)
3603{
3604  HOST_WIDE_INT range;
3605  enum rtx_code code = GET_CODE (index);
3606
3607  /* Standard coprocessor addressing modes.  */
3608  if (TARGET_HARD_FLOAT
3609      && (TARGET_FPA || TARGET_MAVERICK)
3610      && (GET_MODE_CLASS (mode) == MODE_FLOAT
3611	  || (TARGET_MAVERICK && mode == DImode)))
3612    return (code == CONST_INT && INTVAL (index) < 1024
3613	    && INTVAL (index) > -1024
3614	    && (INTVAL (index) & 3) == 0);
3615
3616  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3617    {
3618      /* For DImode assume values will usually live in core regs
3619	 and only allow LDRD addressing modes.  */
3620      if (!TARGET_LDRD || mode != DImode)
3621	return (code == CONST_INT
3622		&& INTVAL (index) < 1024
3623		&& INTVAL (index) > -1024
3624		&& (INTVAL (index) & 3) == 0);
3625    }
3626
3627  if (arm_address_register_rtx_p (index, strict_p)
3628      && (GET_MODE_SIZE (mode) <= 4))
3629    return 1;
3630
3631  if (mode == DImode || mode == DFmode)
3632    {
3633      if (code == CONST_INT)
3634	{
3635	  HOST_WIDE_INT val = INTVAL (index);
3636
3637	  if (TARGET_LDRD)
3638	    return val > -256 && val < 256;
3639	  else
3640	    return val > -4096 && val < 4092;
3641	}
3642
3643      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3644    }
3645
3646  if (GET_MODE_SIZE (mode) <= 4
3647      && ! (arm_arch4
3648	    && (mode == HImode
3649		|| (mode == QImode && outer == SIGN_EXTEND))))
3650    {
3651      if (code == MULT)
3652	{
3653	  rtx xiop0 = XEXP (index, 0);
3654	  rtx xiop1 = XEXP (index, 1);
3655
3656	  return ((arm_address_register_rtx_p (xiop0, strict_p)
3657		   && power_of_two_operand (xiop1, SImode))
3658		  || (arm_address_register_rtx_p (xiop1, strict_p)
3659		      && power_of_two_operand (xiop0, SImode)));
3660	}
3661      else if (code == LSHIFTRT || code == ASHIFTRT
3662	       || code == ASHIFT || code == ROTATERT)
3663	{
3664	  rtx op = XEXP (index, 1);
3665
3666	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3667		  && GET_CODE (op) == CONST_INT
3668		  && INTVAL (op) > 0
3669		  && INTVAL (op) <= 31);
3670	}
3671    }
3672
3673  /* For ARM v4 we may be doing a sign-extend operation during the
3674     load.  */
3675  if (arm_arch4)
3676    {
3677      if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3678	range = 256;
3679      else
3680	range = 4096;
3681    }
3682  else
3683    range = (mode == HImode) ? 4095 : 4096;
3684
3685  return (code == CONST_INT
3686	  && INTVAL (index) < range
3687	  && INTVAL (index) > -range);
3688}
3689
3690/* Return nonzero if X is valid as a Thumb state base register.  */
3691static int
3692thumb_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
3693{
3694  int regno;
3695
3696  if (GET_CODE (x) != REG)
3697    return 0;
3698
3699  regno = REGNO (x);
3700
3701  if (strict_p)
3702    return THUMB_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
3703
3704  return (regno <= LAST_LO_REGNUM
3705	  || regno > LAST_VIRTUAL_REGISTER
3706	  || regno == FRAME_POINTER_REGNUM
3707	  || (GET_MODE_SIZE (mode) >= 4
3708	      && (regno == STACK_POINTER_REGNUM
3709		  || regno >= FIRST_PSEUDO_REGISTER
3710		  || x == hard_frame_pointer_rtx
3711		  || x == arg_pointer_rtx)));
3712}
3713
3714/* Return nonzero if x is a legitimate index register.  This is the case
3715   for any base register that can access a QImode object.  */
3716inline static int
3717thumb_index_register_rtx_p (rtx x, int strict_p)
3718{
3719  return thumb_base_register_rtx_p (x, QImode, strict_p);
3720}
3721
3722/* Return nonzero if x is a legitimate Thumb-state address.
3723
3724   The AP may be eliminated to either the SP or the FP, so we use the
3725   least common denominator, e.g. SImode, and offsets from 0 to 64.
3726
3727   ??? Verify whether the above is the right approach.
3728
3729   ??? Also, the FP may be eliminated to the SP, so perhaps that
3730   needs special handling also.
3731
3732   ??? Look at how the mips16 port solves this problem.  It probably uses
3733   better ways to solve some of these problems.
3734
3735   Although it is not incorrect, we don't accept QImode and HImode
3736   addresses based on the frame pointer or arg pointer until the
3737   reload pass starts.  This is so that eliminating such addresses
3738   into stack based ones won't produce impossible code.  */
3739int
3740thumb_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3741{
3742  /* ??? Not clear if this is right.  Experiment.  */
3743  if (GET_MODE_SIZE (mode) < 4
3744      && !(reload_in_progress || reload_completed)
3745      && (reg_mentioned_p (frame_pointer_rtx, x)
3746	  || reg_mentioned_p (arg_pointer_rtx, x)
3747	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
3748	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
3749	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
3750	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
3751    return 0;
3752
3753  /* Accept any base register.  SP only in SImode or larger.  */
3754  else if (thumb_base_register_rtx_p (x, mode, strict_p))
3755    return 1;
3756
3757  /* This is PC relative data before arm_reorg runs.  */
3758  else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
3759	   && GET_CODE (x) == SYMBOL_REF
3760           && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
3761    return 1;
3762
3763  /* This is PC relative data after arm_reorg runs.  */
3764  else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
3765	   && (GET_CODE (x) == LABEL_REF
3766	       || (GET_CODE (x) == CONST
3767		   && GET_CODE (XEXP (x, 0)) == PLUS
3768		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3769		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3770    return 1;
3771
3772  /* Post-inc indexing only supported for SImode and larger.  */
3773  else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
3774	   && thumb_index_register_rtx_p (XEXP (x, 0), strict_p))
3775    return 1;
3776
3777  else if (GET_CODE (x) == PLUS)
3778    {
3779      /* REG+REG address can be any two index registers.  */
3780      /* We disallow FRAME+REG addressing since we know that FRAME
3781	 will be replaced with STACK, and SP relative addressing only
3782	 permits SP+OFFSET.  */
3783      if (GET_MODE_SIZE (mode) <= 4
3784	  && XEXP (x, 0) != frame_pointer_rtx
3785	  && XEXP (x, 1) != frame_pointer_rtx
3786	  && thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3787	  && thumb_index_register_rtx_p (XEXP (x, 1), strict_p))
3788	return 1;
3789
3790      /* REG+const has 5-7 bit offset for non-SP registers.  */
3791      else if ((thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3792		|| XEXP (x, 0) == arg_pointer_rtx)
3793	       && GET_CODE (XEXP (x, 1)) == CONST_INT
3794	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
3795	return 1;
3796
3797      /* REG+const has 10 bit offset for SP, but only SImode and
3798	 larger is supported.  */
3799      /* ??? Should probably check for DI/DFmode overflow here
3800	 just like GO_IF_LEGITIMATE_OFFSET does.  */
3801      else if (GET_CODE (XEXP (x, 0)) == REG
3802	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
3803	       && GET_MODE_SIZE (mode) >= 4
3804	       && GET_CODE (XEXP (x, 1)) == CONST_INT
3805	       && INTVAL (XEXP (x, 1)) >= 0
3806	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
3807	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
3808	return 1;
3809
3810      else if (GET_CODE (XEXP (x, 0)) == REG
3811	       && REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
3812	       && GET_MODE_SIZE (mode) >= 4
3813	       && GET_CODE (XEXP (x, 1)) == CONST_INT
3814	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
3815	return 1;
3816    }
3817
3818  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3819	   && GET_MODE_SIZE (mode) == 4
3820	   && GET_CODE (x) == SYMBOL_REF
3821	   && CONSTANT_POOL_ADDRESS_P (x)
3822	   && ! (flag_pic
3823		 && symbol_mentioned_p (get_pool_constant (x))
3824		 && ! pcrel_constant_p (get_pool_constant (x))))
3825    return 1;
3826
3827  return 0;
3828}
3829
3830/* Return nonzero if VAL can be used as an offset in a Thumb-state address
3831   instruction of mode MODE.  */
3832int
3833thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
3834{
3835  switch (GET_MODE_SIZE (mode))
3836    {
3837    case 1:
3838      return val >= 0 && val < 32;
3839
3840    case 2:
3841      return val >= 0 && val < 64 && (val & 1) == 0;
3842
3843    default:
3844      return (val >= 0
3845	      && (val + GET_MODE_SIZE (mode)) <= 128
3846	      && (val & 3) == 0);
3847    }
3848}
3849
3850/* Build the SYMBOL_REF for __tls_get_addr.  */
3851
3852static GTY(()) rtx tls_get_addr_libfunc;
3853
3854static rtx
3855get_tls_get_addr (void)
3856{
3857  if (!tls_get_addr_libfunc)
3858    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
3859  return tls_get_addr_libfunc;
3860}
3861
3862static rtx
3863arm_load_tp (rtx target)
3864{
3865  if (!target)
3866    target = gen_reg_rtx (SImode);
3867
3868  if (TARGET_HARD_TP)
3869    {
3870      /* Can return in any reg.  */
3871      emit_insn (gen_load_tp_hard (target));
3872    }
3873  else
3874    {
3875      /* Always returned in r0.  Immediately copy the result into a pseudo,
3876	 otherwise other uses of r0 (e.g. setting up function arguments) may
3877	 clobber the value.  */
3878
3879      rtx tmp;
3880
3881      emit_insn (gen_load_tp_soft ());
3882
3883      tmp = gen_rtx_REG (SImode, 0);
3884      emit_move_insn (target, tmp);
3885    }
3886  return target;
3887}
3888
3889static rtx
3890load_tls_operand (rtx x, rtx reg)
3891{
3892  rtx tmp;
3893
3894  if (reg == NULL_RTX)
3895    reg = gen_reg_rtx (SImode);
3896
3897  tmp = gen_rtx_CONST (SImode, x);
3898
3899  emit_move_insn (reg, tmp);
3900
3901  return reg;
3902}
3903
3904static rtx
3905arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
3906{
3907  rtx insns, label, labelno, sum;
3908
3909  start_sequence ();
3910
3911  labelno = GEN_INT (pic_labelno++);
3912  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3913  label = gen_rtx_CONST (VOIDmode, label);
3914
3915  sum = gen_rtx_UNSPEC (Pmode,
3916			gen_rtvec (4, x, GEN_INT (reloc), label,
3917				   GEN_INT (TARGET_ARM ? 8 : 4)),
3918			UNSPEC_TLS);
3919  reg = load_tls_operand (sum, reg);
3920
3921  if (TARGET_ARM)
3922    emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
3923  else
3924    emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
3925
3926  *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST?  */
3927				     Pmode, 1, reg, Pmode);
3928
3929  insns = get_insns ();
3930  end_sequence ();
3931
3932  return insns;
3933}
3934
3935rtx
3936legitimize_tls_address (rtx x, rtx reg)
3937{
3938  rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
3939  unsigned int model = SYMBOL_REF_TLS_MODEL (x);
3940
3941  switch (model)
3942    {
3943    case TLS_MODEL_GLOBAL_DYNAMIC:
3944      insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
3945      dest = gen_reg_rtx (Pmode);
3946      emit_libcall_block (insns, dest, ret, x);
3947      return dest;
3948
3949    case TLS_MODEL_LOCAL_DYNAMIC:
3950      insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
3951
3952      /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
3953	 share the LDM result with other LD model accesses.  */
3954      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
3955			    UNSPEC_TLS);
3956      dest = gen_reg_rtx (Pmode);
3957      emit_libcall_block (insns, dest, ret, eqv);
3958
3959      /* Load the addend.  */
3960      addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
3961			       UNSPEC_TLS);
3962      addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
3963      return gen_rtx_PLUS (Pmode, dest, addend);
3964
3965    case TLS_MODEL_INITIAL_EXEC:
3966      labelno = GEN_INT (pic_labelno++);
3967      label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3968      label = gen_rtx_CONST (VOIDmode, label);
3969      sum = gen_rtx_UNSPEC (Pmode,
3970			    gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
3971				       GEN_INT (TARGET_ARM ? 8 : 4)),
3972			    UNSPEC_TLS);
3973      reg = load_tls_operand (sum, reg);
3974
3975      if (TARGET_ARM)
3976	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
3977      else
3978	{
3979	  emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
3980	  emit_move_insn (reg, gen_const_mem (SImode, reg));
3981	}
3982
3983      tp = arm_load_tp (NULL_RTX);
3984
3985      return gen_rtx_PLUS (Pmode, tp, reg);
3986
3987    case TLS_MODEL_LOCAL_EXEC:
3988      tp = arm_load_tp (NULL_RTX);
3989
3990      reg = gen_rtx_UNSPEC (Pmode,
3991			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
3992			    UNSPEC_TLS);
3993      reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
3994
3995      return gen_rtx_PLUS (Pmode, tp, reg);
3996
3997    default:
3998      abort ();
3999    }
4000}
4001
4002/* Try machine-dependent ways of modifying an illegitimate address
4003   to be legitimate.  If we find one, return the new, valid address.  */
4004rtx
4005arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4006{
4007  if (arm_tls_symbol_p (x))
4008    return legitimize_tls_address (x, NULL_RTX);
4009
4010  if (GET_CODE (x) == PLUS)
4011    {
4012      rtx xop0 = XEXP (x, 0);
4013      rtx xop1 = XEXP (x, 1);
4014
4015      if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4016	xop0 = force_reg (SImode, xop0);
4017
4018      if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4019	xop1 = force_reg (SImode, xop1);
4020
4021      if (ARM_BASE_REGISTER_RTX_P (xop0)
4022	  && GET_CODE (xop1) == CONST_INT)
4023	{
4024	  HOST_WIDE_INT n, low_n;
4025	  rtx base_reg, val;
4026	  n = INTVAL (xop1);
4027
4028	  /* VFP addressing modes actually allow greater offsets, but for
4029	     now we just stick with the lowest common denominator.  */
4030	  if (mode == DImode
4031	      || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4032	    {
4033	      low_n = n & 0x0f;
4034	      n &= ~0x0f;
4035	      if (low_n > 4)
4036		{
4037		  n += 16;
4038		  low_n -= 16;
4039		}
4040	    }
4041	  else
4042	    {
4043	      low_n = ((mode) == TImode ? 0
4044		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4045	      n -= low_n;
4046	    }
4047
4048	  base_reg = gen_reg_rtx (SImode);
4049	  val = force_operand (plus_constant (xop0, n), NULL_RTX);
4050	  emit_move_insn (base_reg, val);
4051	  x = plus_constant (base_reg, low_n);
4052	}
4053      else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4054	x = gen_rtx_PLUS (SImode, xop0, xop1);
4055    }
4056
4057  /* XXX We don't allow MINUS any more -- see comment in
4058     arm_legitimate_address_p ().  */
4059  else if (GET_CODE (x) == MINUS)
4060    {
4061      rtx xop0 = XEXP (x, 0);
4062      rtx xop1 = XEXP (x, 1);
4063
4064      if (CONSTANT_P (xop0))
4065	xop0 = force_reg (SImode, xop0);
4066
4067      if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4068	xop1 = force_reg (SImode, xop1);
4069
4070      if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4071	x = gen_rtx_MINUS (SImode, xop0, xop1);
4072    }
4073
4074  /* Make sure to take full advantage of the pre-indexed addressing mode
4075     with absolute addresses which often allows for the base register to
4076     be factorized for multiple adjacent memory references, and it might
4077     even allows for the mini pool to be avoided entirely. */
4078  else if (GET_CODE (x) == CONST_INT && optimize > 0)
4079    {
4080      unsigned int bits;
4081      HOST_WIDE_INT mask, base, index;
4082      rtx base_reg;
4083
4084      /* ldr and ldrb can use a 12 bit index, ldrsb and the rest can only
4085         use a 8 bit index. So let's use a 12 bit index for SImode only and
4086         hope that arm_gen_constant will enable ldrb to use more bits. */
4087      bits = (mode == SImode) ? 12 : 8;
4088      mask = (1 << bits) - 1;
4089      base = INTVAL (x) & ~mask;
4090      index = INTVAL (x) & mask;
4091      if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4092        {
4093	  /* It'll most probably be more efficient to generate the base
4094	     with more bits set and use a negative index instead. */
4095	  base |= mask;
4096	  index -= mask;
4097	}
4098      base_reg = force_reg (SImode, GEN_INT (base));
4099      x = plus_constant (base_reg, index);
4100    }
4101
4102  if (flag_pic)
4103    {
4104      /* We need to find and carefully transform any SYMBOL and LABEL
4105	 references; so go back to the original address expression.  */
4106      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4107
4108      if (new_x != orig_x)
4109	x = new_x;
4110    }
4111
4112  return x;
4113}
4114
4115
4116/* Try machine-dependent ways of modifying an illegitimate Thumb address
4117   to be legitimate.  If we find one, return the new, valid address.  */
4118rtx
4119thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4120{
4121  if (arm_tls_symbol_p (x))
4122    return legitimize_tls_address (x, NULL_RTX);
4123
4124  if (GET_CODE (x) == PLUS
4125      && GET_CODE (XEXP (x, 1)) == CONST_INT
4126      && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4127	  || INTVAL (XEXP (x, 1)) < 0))
4128    {
4129      rtx xop0 = XEXP (x, 0);
4130      rtx xop1 = XEXP (x, 1);
4131      HOST_WIDE_INT offset = INTVAL (xop1);
4132
4133      /* Try and fold the offset into a biasing of the base register and
4134	 then offsetting that.  Don't do this when optimizing for space
4135	 since it can cause too many CSEs.  */
4136      if (optimize_size && offset >= 0
4137	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
4138	{
4139	  HOST_WIDE_INT delta;
4140
4141	  if (offset >= 256)
4142	    delta = offset - (256 - GET_MODE_SIZE (mode));
4143	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4144	    delta = 31 * GET_MODE_SIZE (mode);
4145	  else
4146	    delta = offset & (~31 * GET_MODE_SIZE (mode));
4147
4148	  xop0 = force_operand (plus_constant (xop0, offset - delta),
4149				NULL_RTX);
4150	  x = plus_constant (xop0, delta);
4151	}
4152      else if (offset < 0 && offset > -256)
4153	/* Small negative offsets are best done with a subtract before the
4154	   dereference, forcing these into a register normally takes two
4155	   instructions.  */
4156	x = force_operand (x, NULL_RTX);
4157      else
4158	{
4159	  /* For the remaining cases, force the constant into a register.  */
4160	  xop1 = force_reg (SImode, xop1);
4161	  x = gen_rtx_PLUS (SImode, xop0, xop1);
4162	}
4163    }
4164  else if (GET_CODE (x) == PLUS
4165	   && s_register_operand (XEXP (x, 1), SImode)
4166	   && !s_register_operand (XEXP (x, 0), SImode))
4167    {
4168      rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4169
4170      x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4171    }
4172
4173  if (flag_pic)
4174    {
4175      /* We need to find and carefully transform any SYMBOL and LABEL
4176	 references; so go back to the original address expression.  */
4177      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4178
4179      if (new_x != orig_x)
4180	x = new_x;
4181    }
4182
4183  return x;
4184}
4185
4186rtx
4187thumb_legitimize_reload_address (rtx *x_p,
4188				 enum machine_mode mode,
4189				 int opnum, int type,
4190				 int ind_levels ATTRIBUTE_UNUSED)
4191{
4192  rtx x = *x_p;
4193
4194  if (GET_CODE (x) == PLUS
4195      && GET_MODE_SIZE (mode) < 4
4196      && REG_P (XEXP (x, 0))
4197      && XEXP (x, 0) == stack_pointer_rtx
4198      && GET_CODE (XEXP (x, 1)) == CONST_INT
4199      && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4200    {
4201      rtx orig_x = x;
4202
4203      x = copy_rtx (x);
4204      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4205		   Pmode, VOIDmode, 0, 0, opnum, type);
4206      return x;
4207    }
4208
4209  /* If both registers are hi-regs, then it's better to reload the
4210     entire expression rather than each register individually.  That
4211     only requires one reload register rather than two.  */
4212  if (GET_CODE (x) == PLUS
4213      && REG_P (XEXP (x, 0))
4214      && REG_P (XEXP (x, 1))
4215      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4216      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4217    {
4218      rtx orig_x = x;
4219
4220      x = copy_rtx (x);
4221      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4222		   Pmode, VOIDmode, 0, 0, opnum, type);
4223      return x;
4224    }
4225
4226  return NULL;
4227}
4228
4229/* Test for various thread-local symbols.  */
4230
4231/* Return TRUE if X is a thread-local symbol.  */
4232
4233static bool
4234arm_tls_symbol_p (rtx x)
4235{
4236  if (! TARGET_HAVE_TLS)
4237    return false;
4238
4239  if (GET_CODE (x) != SYMBOL_REF)
4240    return false;
4241
4242  return SYMBOL_REF_TLS_MODEL (x) != 0;
4243}
4244
4245/* Helper for arm_tls_referenced_p.  */
4246
4247static int
4248arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4249{
4250  if (GET_CODE (*x) == SYMBOL_REF)
4251    return SYMBOL_REF_TLS_MODEL (*x) != 0;
4252
4253  /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4254     TLS offsets, not real symbol references.  */
4255  if (GET_CODE (*x) == UNSPEC
4256      && XINT (*x, 1) == UNSPEC_TLS)
4257    return -1;
4258
4259  return 0;
4260}
4261
4262/* Return TRUE if X contains any TLS symbol references.  */
4263
4264bool
4265arm_tls_referenced_p (rtx x)
4266{
4267  if (! TARGET_HAVE_TLS)
4268    return false;
4269
4270  return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4271}
4272
4273#define REG_OR_SUBREG_REG(X)						\
4274  (GET_CODE (X) == REG							\
4275   || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4276
4277#define REG_OR_SUBREG_RTX(X)			\
4278   (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4279
4280#ifndef COSTS_N_INSNS
4281#define COSTS_N_INSNS(N) ((N) * 4 - 2)
4282#endif
4283static inline int
4284thumb_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4285{
4286  enum machine_mode mode = GET_MODE (x);
4287
4288  switch (code)
4289    {
4290    case ASHIFT:
4291    case ASHIFTRT:
4292    case LSHIFTRT:
4293    case ROTATERT:
4294    case PLUS:
4295    case MINUS:
4296    case COMPARE:
4297    case NEG:
4298    case NOT:
4299      return COSTS_N_INSNS (1);
4300
4301    case MULT:
4302      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4303	{
4304	  int cycles = 0;
4305	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4306
4307	  while (i)
4308	    {
4309	      i >>= 2;
4310	      cycles++;
4311	    }
4312	  return COSTS_N_INSNS (2) + cycles;
4313	}
4314      return COSTS_N_INSNS (1) + 16;
4315
4316    case SET:
4317      return (COSTS_N_INSNS (1)
4318	      + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4319		     + GET_CODE (SET_DEST (x)) == MEM));
4320
4321    case CONST_INT:
4322      if (outer == SET)
4323	{
4324	  if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4325	    return 0;
4326	  if (thumb_shiftable_const (INTVAL (x)))
4327	    return COSTS_N_INSNS (2);
4328	  return COSTS_N_INSNS (3);
4329	}
4330      else if ((outer == PLUS || outer == COMPARE)
4331	       && INTVAL (x) < 256 && INTVAL (x) > -256)
4332	return 0;
4333      else if (outer == AND
4334	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
4335	return COSTS_N_INSNS (1);
4336      else if (outer == ASHIFT || outer == ASHIFTRT
4337	       || outer == LSHIFTRT)
4338	return 0;
4339      return COSTS_N_INSNS (2);
4340
4341    case CONST:
4342    case CONST_DOUBLE:
4343    case LABEL_REF:
4344    case SYMBOL_REF:
4345      return COSTS_N_INSNS (3);
4346
4347    case UDIV:
4348    case UMOD:
4349    case DIV:
4350    case MOD:
4351      return 100;
4352
4353    case TRUNCATE:
4354      return 99;
4355
4356    case AND:
4357    case XOR:
4358    case IOR:
4359      /* XXX guess.  */
4360      return 8;
4361
4362    case MEM:
4363      /* XXX another guess.  */
4364      /* Memory costs quite a lot for the first word, but subsequent words
4365	 load at the equivalent of a single insn each.  */
4366      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4367	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4368		 ? 4 : 0));
4369
4370    case IF_THEN_ELSE:
4371      /* XXX a guess.  */
4372      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4373	return 14;
4374      return 2;
4375
4376    case ZERO_EXTEND:
4377      /* XXX still guessing.  */
4378      switch (GET_MODE (XEXP (x, 0)))
4379	{
4380	case QImode:
4381	  return (1 + (mode == DImode ? 4 : 0)
4382		  + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4383
4384	case HImode:
4385	  return (4 + (mode == DImode ? 4 : 0)
4386		  + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4387
4388	case SImode:
4389	  return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4390
4391	default:
4392	  return 99;
4393	}
4394
4395    default:
4396      return 99;
4397    }
4398}
4399
4400
4401/* Worker routine for arm_rtx_costs.  */
4402static inline int
4403arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4404{
4405  enum machine_mode mode = GET_MODE (x);
4406  enum rtx_code subcode;
4407  int extra_cost;
4408
4409  switch (code)
4410    {
4411    case MEM:
4412      /* Memory costs quite a lot for the first word, but subsequent words
4413	 load at the equivalent of a single insn each.  */
4414      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4415	      + (GET_CODE (x) == SYMBOL_REF
4416		 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4417
4418    case DIV:
4419    case MOD:
4420    case UDIV:
4421    case UMOD:
4422      return optimize_size ? COSTS_N_INSNS (2) : 100;
4423
4424    case ROTATE:
4425      if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4426	return 4;
4427      /* Fall through */
4428    case ROTATERT:
4429      if (mode != SImode)
4430	return 8;
4431      /* Fall through */
4432    case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4433      if (mode == DImode)
4434	return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4435		+ ((GET_CODE (XEXP (x, 0)) == REG
4436		    || (GET_CODE (XEXP (x, 0)) == SUBREG
4437			&& GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4438		   ? 0 : 8));
4439      return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4440		    || (GET_CODE (XEXP (x, 0)) == SUBREG
4441			&& GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4442		   ? 0 : 4)
4443	      + ((GET_CODE (XEXP (x, 1)) == REG
4444		  || (GET_CODE (XEXP (x, 1)) == SUBREG
4445		      && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4446		  || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4447		 ? 0 : 4));
4448
4449    case MINUS:
4450      if (mode == DImode)
4451	return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4452		+ ((REG_OR_SUBREG_REG (XEXP (x, 0))
4453		    || (GET_CODE (XEXP (x, 0)) == CONST_INT
4454		       && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4455		   ? 0 : 8));
4456
4457      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4458	return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4459		      || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4460			  && arm_const_double_rtx (XEXP (x, 1))))
4461		     ? 0 : 8)
4462		+ ((REG_OR_SUBREG_REG (XEXP (x, 0))
4463		    || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4464			&& arm_const_double_rtx (XEXP (x, 0))))
4465		   ? 0 : 8));
4466
4467      if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4468	    && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4469	    && REG_OR_SUBREG_REG (XEXP (x, 1))))
4470	  || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4471	       || subcode == ASHIFTRT || subcode == LSHIFTRT
4472	       || subcode == ROTATE || subcode == ROTATERT
4473	       || (subcode == MULT
4474		   && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4475		   && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4476			(INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4477	      && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4478	      && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4479		  || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4480	      && REG_OR_SUBREG_REG (XEXP (x, 0))))
4481	return 1;
4482      /* Fall through */
4483
4484    case PLUS:
4485      if (GET_CODE (XEXP (x, 0)) == MULT)
4486	{
4487	  extra_cost = rtx_cost (XEXP (x, 0), code);
4488	  if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4489	    extra_cost += 4 * ARM_NUM_REGS (mode);
4490	  return extra_cost;
4491	}
4492
4493      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4494	return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4495		+ ((REG_OR_SUBREG_REG (XEXP (x, 1))
4496		    || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4497			&& arm_const_double_rtx (XEXP (x, 1))))
4498		   ? 0 : 8));
4499
4500      /* Fall through */
4501    case AND: case XOR: case IOR:
4502      extra_cost = 0;
4503
4504      /* Normally the frame registers will be spilt into reg+const during
4505	 reload, so it is a bad idea to combine them with other instructions,
4506	 since then they might not be moved outside of loops.  As a compromise
4507	 we allow integration with ops that have a constant as their second
4508	 operand.  */
4509      if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4510	   && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4511	   && GET_CODE (XEXP (x, 1)) != CONST_INT)
4512	  || (REG_OR_SUBREG_REG (XEXP (x, 0))
4513	      && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4514	extra_cost = 4;
4515
4516      if (mode == DImode)
4517	return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4518		+ ((REG_OR_SUBREG_REG (XEXP (x, 1))
4519		    || (GET_CODE (XEXP (x, 1)) == CONST_INT
4520			&& const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4521		   ? 0 : 8));
4522
4523      if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4524	return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4525		+ ((REG_OR_SUBREG_REG (XEXP (x, 1))
4526		    || (GET_CODE (XEXP (x, 1)) == CONST_INT
4527			&& const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4528		   ? 0 : 4));
4529
4530      else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4531	return (1 + extra_cost
4532		+ ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4533		     || subcode == LSHIFTRT || subcode == ASHIFTRT
4534		     || subcode == ROTATE || subcode == ROTATERT
4535		     || (subcode == MULT
4536			 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4537			 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4538			      (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4539		    && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4540		    && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4541			|| GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4542		   ? 0 : 4));
4543
4544      return 8;
4545
4546    case MULT:
4547      /* This should have been handled by the CPU specific routines.  */
4548      gcc_unreachable ();
4549
4550    case TRUNCATE:
4551      if (arm_arch3m && mode == SImode
4552	  && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4553	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4554	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4555	      == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4556	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4557	      || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4558	return 8;
4559      return 99;
4560
4561    case NEG:
4562      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4563	return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4564      /* Fall through */
4565    case NOT:
4566      if (mode == DImode)
4567	return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4568
4569      return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4570
4571    case IF_THEN_ELSE:
4572      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4573	return 14;
4574      return 2;
4575
4576    case COMPARE:
4577      return 1;
4578
4579    case ABS:
4580      return 4 + (mode == DImode ? 4 : 0);
4581
4582    case SIGN_EXTEND:
4583      if (GET_MODE (XEXP (x, 0)) == QImode)
4584	return (4 + (mode == DImode ? 4 : 0)
4585		+ (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4586      /* Fall through */
4587    case ZERO_EXTEND:
4588      switch (GET_MODE (XEXP (x, 0)))
4589	{
4590	case QImode:
4591	  return (1 + (mode == DImode ? 4 : 0)
4592		  + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4593
4594	case HImode:
4595	  return (4 + (mode == DImode ? 4 : 0)
4596		  + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4597
4598	case SImode:
4599	  return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4600
4601	case V8QImode:
4602	case V4HImode:
4603	case V2SImode:
4604	case V4QImode:
4605	case V2HImode:
4606	    return 1;
4607
4608	default:
4609	  gcc_unreachable ();
4610	}
4611      gcc_unreachable ();
4612
4613    case CONST_INT:
4614      if (const_ok_for_arm (INTVAL (x)))
4615	return outer == SET ? 2 : -1;
4616      else if (outer == AND
4617	       && const_ok_for_arm (~INTVAL (x)))
4618	return -1;
4619      else if ((outer == COMPARE
4620		|| outer == PLUS || outer == MINUS)
4621	       && const_ok_for_arm (-INTVAL (x)))
4622	return -1;
4623      else
4624	return 5;
4625
4626    case CONST:
4627    case LABEL_REF:
4628    case SYMBOL_REF:
4629      return 6;
4630
4631    case CONST_DOUBLE:
4632      if (arm_const_double_rtx (x))
4633	return outer == SET ? 2 : -1;
4634      else if ((outer == COMPARE || outer == PLUS)
4635	       && neg_const_double_rtx_ok_for_fpa (x))
4636	return -1;
4637      return 7;
4638
4639    default:
4640      return 99;
4641    }
4642}
4643
4644/* RTX costs when optimizing for size.  */
4645static bool
4646arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
4647{
4648  enum machine_mode mode = GET_MODE (x);
4649
4650  if (TARGET_THUMB)
4651    {
4652      /* XXX TBD.  For now, use the standard costs.  */
4653      *total = thumb_rtx_costs (x, code, outer_code);
4654      return true;
4655    }
4656
4657  switch (code)
4658    {
4659    case MEM:
4660      /* A memory access costs 1 insn if the mode is small, or the address is
4661	 a single register, otherwise it costs one insn per word.  */
4662      if (REG_P (XEXP (x, 0)))
4663	*total = COSTS_N_INSNS (1);
4664      else
4665	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4666      return true;
4667
4668    case DIV:
4669    case MOD:
4670    case UDIV:
4671    case UMOD:
4672      /* Needs a libcall, so it costs about this.  */
4673      *total = COSTS_N_INSNS (2);
4674      return false;
4675
4676    case ROTATE:
4677      if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4678	{
4679	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
4680	  return true;
4681	}
4682      /* Fall through */
4683    case ROTATERT:
4684    case ASHIFT:
4685    case LSHIFTRT:
4686    case ASHIFTRT:
4687      if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
4688	{
4689	  *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
4690	  return true;
4691	}
4692      else if (mode == SImode)
4693	{
4694	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
4695	  /* Slightly disparage register shifts, but not by much.  */
4696	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4697	    *total += 1 + rtx_cost (XEXP (x, 1), code);
4698	  return true;
4699	}
4700
4701      /* Needs a libcall.  */
4702      *total = COSTS_N_INSNS (2);
4703      return false;
4704
4705    case MINUS:
4706      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4707	{
4708	  *total = COSTS_N_INSNS (1);
4709	  return false;
4710	}
4711
4712      if (mode == SImode)
4713	{
4714	  enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
4715	  enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
4716
4717	  if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
4718	      || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
4719	      || subcode1 == ROTATE || subcode1 == ROTATERT
4720	      || subcode1 == ASHIFT || subcode1 == LSHIFTRT
4721	      || subcode1 == ASHIFTRT)
4722	    {
4723	      /* It's just the cost of the two operands.  */
4724	      *total = 0;
4725	      return false;
4726	    }
4727
4728	  *total = COSTS_N_INSNS (1);
4729	  return false;
4730	}
4731
4732      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4733      return false;
4734
4735    case PLUS:
4736      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4737	{
4738	  *total = COSTS_N_INSNS (1);
4739	  return false;
4740	}
4741
4742      /* Fall through */
4743    case AND: case XOR: case IOR:
4744      if (mode == SImode)
4745	{
4746	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
4747
4748	  if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
4749	      || subcode == LSHIFTRT || subcode == ASHIFTRT
4750	      || (code == AND && subcode == NOT))
4751	    {
4752	      /* It's just the cost of the two operands.  */
4753	      *total = 0;
4754	      return false;
4755	    }
4756	}
4757
4758      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4759      return false;
4760
4761    case MULT:
4762      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4763      return false;
4764
4765    case NEG:
4766      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4767	*total = COSTS_N_INSNS (1);
4768      /* Fall through */
4769    case NOT:
4770      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4771
4772      return false;
4773
4774    case IF_THEN_ELSE:
4775      *total = 0;
4776      return false;
4777
4778    case COMPARE:
4779      if (cc_register (XEXP (x, 0), VOIDmode))
4780	* total = 0;
4781      else
4782	*total = COSTS_N_INSNS (1);
4783      return false;
4784
4785    case ABS:
4786      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4787	*total = COSTS_N_INSNS (1);
4788      else
4789	*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
4790      return false;
4791
4792    case SIGN_EXTEND:
4793      *total = 0;
4794      if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
4795	{
4796	  if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4797	    *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4798	}
4799      if (mode == DImode)
4800	*total += COSTS_N_INSNS (1);
4801      return false;
4802
4803    case ZERO_EXTEND:
4804      *total = 0;
4805      if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4806	{
4807	  switch (GET_MODE (XEXP (x, 0)))
4808	    {
4809	    case QImode:
4810	      *total += COSTS_N_INSNS (1);
4811	      break;
4812
4813	    case HImode:
4814	      *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4815
4816	    case SImode:
4817	      break;
4818
4819	    default:
4820	      *total += COSTS_N_INSNS (2);
4821	    }
4822	}
4823
4824      if (mode == DImode)
4825	*total += COSTS_N_INSNS (1);
4826
4827      return false;
4828
4829    case CONST_INT:
4830      if (const_ok_for_arm (INTVAL (x)))
4831	*total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
4832      else if (const_ok_for_arm (~INTVAL (x)))
4833	*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
4834      else if (const_ok_for_arm (-INTVAL (x)))
4835	{
4836	  if (outer_code == COMPARE || outer_code == PLUS
4837	      || outer_code == MINUS)
4838	    *total = 0;
4839	  else
4840	    *total = COSTS_N_INSNS (1);
4841	}
4842      else
4843	*total = COSTS_N_INSNS (2);
4844      return true;
4845
4846    case CONST:
4847    case LABEL_REF:
4848    case SYMBOL_REF:
4849      *total = COSTS_N_INSNS (2);
4850      return true;
4851
4852    case CONST_DOUBLE:
4853      *total = COSTS_N_INSNS (4);
4854      return true;
4855
4856    default:
4857      if (mode != VOIDmode)
4858	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4859      else
4860	*total = COSTS_N_INSNS (4); /* How knows?  */
4861      return false;
4862    }
4863}
4864
4865/* RTX costs for cores with a slow MUL implementation.  */
4866
4867static bool
4868arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4869{
4870  enum machine_mode mode = GET_MODE (x);
4871
4872  if (TARGET_THUMB)
4873    {
4874      *total = thumb_rtx_costs (x, code, outer_code);
4875      return true;
4876    }
4877
4878  switch (code)
4879    {
4880    case MULT:
4881      if (GET_MODE_CLASS (mode) == MODE_FLOAT
4882	  || mode == DImode)
4883	{
4884	  *total = 30;
4885	  return true;
4886	}
4887
4888      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4889	{
4890	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4891				      & (unsigned HOST_WIDE_INT) 0xffffffff);
4892	  int cost, const_ok = const_ok_for_arm (i);
4893	  int j, booth_unit_size;
4894
4895	  /* Tune as appropriate.  */
4896	  cost = const_ok ? 4 : 8;
4897	  booth_unit_size = 2;
4898	  for (j = 0; i && j < 32; j += booth_unit_size)
4899	    {
4900	      i >>= booth_unit_size;
4901	      cost += 2;
4902	    }
4903
4904	  *total = cost;
4905	  return true;
4906	}
4907
4908      *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4909	          + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4910      return true;
4911
4912    default:
4913      *total = arm_rtx_costs_1 (x, code, outer_code);
4914      return true;
4915    }
4916}
4917
4918
4919/* RTX cost for cores with a fast multiply unit (M variants).  */
4920
4921static bool
4922arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4923{
4924  enum machine_mode mode = GET_MODE (x);
4925
4926  if (TARGET_THUMB)
4927    {
4928      *total = thumb_rtx_costs (x, code, outer_code);
4929      return true;
4930    }
4931
4932  switch (code)
4933    {
4934    case MULT:
4935      /* There is no point basing this on the tuning, since it is always the
4936	 fast variant if it exists at all.  */
4937      if (mode == DImode
4938	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4939	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4940	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4941	{
4942	  *total = 8;
4943	  return true;
4944	}
4945
4946
4947      if (GET_MODE_CLASS (mode) == MODE_FLOAT
4948	  || mode == DImode)
4949	{
4950	  *total = 30;
4951	  return true;
4952	}
4953
4954      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4955	{
4956	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4957				      & (unsigned HOST_WIDE_INT) 0xffffffff);
4958	  int cost, const_ok = const_ok_for_arm (i);
4959	  int j, booth_unit_size;
4960
4961	  /* Tune as appropriate.  */
4962	  cost = const_ok ? 4 : 8;
4963	  booth_unit_size = 8;
4964	  for (j = 0; i && j < 32; j += booth_unit_size)
4965	    {
4966	      i >>= booth_unit_size;
4967	      cost += 2;
4968	    }
4969
4970	  *total = cost;
4971	  return true;
4972	}
4973
4974      *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4975	         + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4976      return true;
4977
4978    default:
4979      *total = arm_rtx_costs_1 (x, code, outer_code);
4980      return true;
4981    }
4982}
4983
4984
4985/* RTX cost for XScale CPUs.  */
4986
4987static bool
4988arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
4989{
4990  enum machine_mode mode = GET_MODE (x);
4991
4992  if (TARGET_THUMB)
4993    {
4994      *total = thumb_rtx_costs (x, code, outer_code);
4995      return true;
4996    }
4997
4998  switch (code)
4999    {
5000    case MULT:
5001      /* There is no point basing this on the tuning, since it is always the
5002	 fast variant if it exists at all.  */
5003      if (mode == DImode
5004	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5005	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5006	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5007	{
5008	  *total = 8;
5009	  return true;
5010	}
5011
5012
5013      if (GET_MODE_CLASS (mode) == MODE_FLOAT
5014	  || mode == DImode)
5015	{
5016	  *total = 30;
5017	  return true;
5018	}
5019
5020      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5021	{
5022	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5023				      & (unsigned HOST_WIDE_INT) 0xffffffff);
5024	  int cost, const_ok = const_ok_for_arm (i);
5025	  unsigned HOST_WIDE_INT masked_const;
5026
5027	  /* The cost will be related to two insns.
5028	     First a load of the constant (MOV or LDR), then a multiply.  */
5029	  cost = 2;
5030	  if (! const_ok)
5031	    cost += 1;      /* LDR is probably more expensive because
5032			       of longer result latency.  */
5033	  masked_const = i & 0xffff8000;
5034	  if (masked_const != 0 && masked_const != 0xffff8000)
5035	    {
5036	      masked_const = i & 0xf8000000;
5037	      if (masked_const == 0 || masked_const == 0xf8000000)
5038		cost += 1;
5039	      else
5040		cost += 2;
5041	    }
5042	  *total = cost;
5043	  return true;
5044	}
5045
5046      *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5047		 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5048      return true;
5049
5050    case COMPARE:
5051      /* A COMPARE of a MULT is slow on XScale; the muls instruction
5052	 will stall until the multiplication is complete.  */
5053      if (GET_CODE (XEXP (x, 0)) == MULT)
5054	*total = 4 + rtx_cost (XEXP (x, 0), code);
5055      else
5056	*total = arm_rtx_costs_1 (x, code, outer_code);
5057      return true;
5058
5059    default:
5060      *total = arm_rtx_costs_1 (x, code, outer_code);
5061      return true;
5062    }
5063}
5064
5065
5066/* RTX costs for 9e (and later) cores.  */
5067
5068static bool
5069arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5070{
5071  enum machine_mode mode = GET_MODE (x);
5072  int nonreg_cost;
5073  int cost;
5074
5075  if (TARGET_THUMB)
5076    {
5077      switch (code)
5078	{
5079	case MULT:
5080	  *total = COSTS_N_INSNS (3);
5081	  return true;
5082
5083	default:
5084	  *total = thumb_rtx_costs (x, code, outer_code);
5085	  return true;
5086	}
5087    }
5088
5089  switch (code)
5090    {
5091    case MULT:
5092      /* There is no point basing this on the tuning, since it is always the
5093	 fast variant if it exists at all.  */
5094      if (mode == DImode
5095	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5096	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5097	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5098	{
5099	  *total = 3;
5100	  return true;
5101	}
5102
5103
5104      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5105	{
5106	  *total = 30;
5107	  return true;
5108	}
5109      if (mode == DImode)
5110	{
5111	  cost = 7;
5112	  nonreg_cost = 8;
5113	}
5114      else
5115	{
5116	  cost = 2;
5117	  nonreg_cost = 4;
5118	}
5119
5120
5121      *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5122		    + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5123      return true;
5124
5125    default:
5126      *total = arm_rtx_costs_1 (x, code, outer_code);
5127      return true;
5128    }
5129}
5130/* All address computations that can be done are free, but rtx cost returns
5131   the same for practically all of them.  So we weight the different types
5132   of address here in the order (most pref first):
5133   PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
5134static inline int
5135arm_arm_address_cost (rtx x)
5136{
5137  enum rtx_code c  = GET_CODE (x);
5138
5139  if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5140    return 0;
5141  if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5142    return 10;
5143
5144  if (c == PLUS || c == MINUS)
5145    {
5146      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5147	return 2;
5148
5149      if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5150	return 3;
5151
5152      return 4;
5153    }
5154
5155  return 6;
5156}
5157
5158static inline int
5159arm_thumb_address_cost (rtx x)
5160{
5161  enum rtx_code c  = GET_CODE (x);
5162
5163  if (c == REG)
5164    return 1;
5165  if (c == PLUS
5166      && GET_CODE (XEXP (x, 0)) == REG
5167      && GET_CODE (XEXP (x, 1)) == CONST_INT)
5168    return 1;
5169
5170  return 2;
5171}
5172
5173static int
5174arm_address_cost (rtx x)
5175{
5176  return TARGET_ARM ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5177}
5178
5179static int
5180arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5181{
5182  rtx i_pat, d_pat;
5183
5184  /* Some true dependencies can have a higher cost depending
5185     on precisely how certain input operands are used.  */
5186  if (arm_tune_xscale
5187      && REG_NOTE_KIND (link) == 0
5188      && recog_memoized (insn) >= 0
5189      && recog_memoized (dep) >= 0)
5190    {
5191      int shift_opnum = get_attr_shift (insn);
5192      enum attr_type attr_type = get_attr_type (dep);
5193
5194      /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5195	 operand for INSN.  If we have a shifted input operand and the
5196	 instruction we depend on is another ALU instruction, then we may
5197	 have to account for an additional stall.  */
5198      if (shift_opnum != 0
5199	  && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5200	{
5201	  rtx shifted_operand;
5202	  int opno;
5203
5204	  /* Get the shifted operand.  */
5205	  extract_insn (insn);
5206	  shifted_operand = recog_data.operand[shift_opnum];
5207
5208	  /* Iterate over all the operands in DEP.  If we write an operand
5209	     that overlaps with SHIFTED_OPERAND, then we have increase the
5210	     cost of this dependency.  */
5211	  extract_insn (dep);
5212	  preprocess_constraints ();
5213	  for (opno = 0; opno < recog_data.n_operands; opno++)
5214	    {
5215	      /* We can ignore strict inputs.  */
5216	      if (recog_data.operand_type[opno] == OP_IN)
5217		continue;
5218
5219	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
5220					   shifted_operand))
5221		return 2;
5222	    }
5223	}
5224    }
5225
5226  /* XXX This is not strictly true for the FPA.  */
5227  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5228      || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5229    return 0;
5230
5231  /* Call insns don't incur a stall, even if they follow a load.  */
5232  if (REG_NOTE_KIND (link) == 0
5233      && GET_CODE (insn) == CALL_INSN)
5234    return 1;
5235
5236  if ((i_pat = single_set (insn)) != NULL
5237      && GET_CODE (SET_SRC (i_pat)) == MEM
5238      && (d_pat = single_set (dep)) != NULL
5239      && GET_CODE (SET_DEST (d_pat)) == MEM)
5240    {
5241      rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5242      /* This is a load after a store, there is no conflict if the load reads
5243	 from a cached area.  Assume that loads from the stack, and from the
5244	 constant pool are cached, and that others will miss.  This is a
5245	 hack.  */
5246
5247      if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5248	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
5249	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
5250	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5251	return 1;
5252    }
5253
5254  return cost;
5255}
5256
5257static int fp_consts_inited = 0;
5258
5259/* Only zero is valid for VFP.  Other values are also valid for FPA.  */
5260static const char * const strings_fp[8] =
5261{
5262  "0",   "1",   "2",   "3",
5263  "4",   "5",   "0.5", "10"
5264};
5265
5266static REAL_VALUE_TYPE values_fp[8];
5267
5268static void
5269init_fp_table (void)
5270{
5271  int i;
5272  REAL_VALUE_TYPE r;
5273
5274  if (TARGET_VFP)
5275    fp_consts_inited = 1;
5276  else
5277    fp_consts_inited = 8;
5278
5279  for (i = 0; i < fp_consts_inited; i++)
5280    {
5281      r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5282      values_fp[i] = r;
5283    }
5284}
5285
5286/* Return TRUE if rtx X is a valid immediate FP constant.  */
5287int
5288arm_const_double_rtx (rtx x)
5289{
5290  REAL_VALUE_TYPE r;
5291  int i;
5292
5293  if (!fp_consts_inited)
5294    init_fp_table ();
5295
5296  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5297  if (REAL_VALUE_MINUS_ZERO (r))
5298    return 0;
5299
5300  for (i = 0; i < fp_consts_inited; i++)
5301    if (REAL_VALUES_EQUAL (r, values_fp[i]))
5302      return 1;
5303
5304  return 0;
5305}
5306
5307/* Return TRUE if rtx X is a valid immediate FPA constant.  */
5308int
5309neg_const_double_rtx_ok_for_fpa (rtx x)
5310{
5311  REAL_VALUE_TYPE r;
5312  int i;
5313
5314  if (!fp_consts_inited)
5315    init_fp_table ();
5316
5317  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5318  r = REAL_VALUE_NEGATE (r);
5319  if (REAL_VALUE_MINUS_ZERO (r))
5320    return 0;
5321
5322  for (i = 0; i < 8; i++)
5323    if (REAL_VALUES_EQUAL (r, values_fp[i]))
5324      return 1;
5325
5326  return 0;
5327}
5328
5329/* Predicates for `match_operand' and `match_operator'.  */
5330
5331/* Return nonzero if OP is a valid Cirrus memory address pattern.  */
5332int
5333cirrus_memory_offset (rtx op)
5334{
5335  /* Reject eliminable registers.  */
5336  if (! (reload_in_progress || reload_completed)
5337      && (   reg_mentioned_p (frame_pointer_rtx, op)
5338	  || reg_mentioned_p (arg_pointer_rtx, op)
5339	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
5340	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
5341	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
5342	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
5343    return 0;
5344
5345  if (GET_CODE (op) == MEM)
5346    {
5347      rtx ind;
5348
5349      ind = XEXP (op, 0);
5350
5351      /* Match: (mem (reg)).  */
5352      if (GET_CODE (ind) == REG)
5353	return 1;
5354
5355      /* Match:
5356	 (mem (plus (reg)
5357	            (const))).  */
5358      if (GET_CODE (ind) == PLUS
5359	  && GET_CODE (XEXP (ind, 0)) == REG
5360	  && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
5361	  && GET_CODE (XEXP (ind, 1)) == CONST_INT)
5362	return 1;
5363    }
5364
5365  return 0;
5366}
5367
5368/* Return TRUE if OP is a valid coprocessor memory address pattern.
5369   WB if true if writeback address modes are allowed.  */
5370
5371int
5372arm_coproc_mem_operand (rtx op, bool wb)
5373{
5374  rtx ind;
5375
5376  /* Reject eliminable registers.  */
5377  if (! (reload_in_progress || reload_completed)
5378      && (   reg_mentioned_p (frame_pointer_rtx, op)
5379	  || reg_mentioned_p (arg_pointer_rtx, op)
5380	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
5381	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
5382	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
5383	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
5384    return FALSE;
5385
5386  /* Constants are converted into offsets from labels.  */
5387  if (GET_CODE (op) != MEM)
5388    return FALSE;
5389
5390  ind = XEXP (op, 0);
5391
5392  if (reload_completed
5393      && (GET_CODE (ind) == LABEL_REF
5394	  || (GET_CODE (ind) == CONST
5395	      && GET_CODE (XEXP (ind, 0)) == PLUS
5396	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
5397	      && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
5398    return TRUE;
5399
5400  /* Match: (mem (reg)).  */
5401  if (GET_CODE (ind) == REG)
5402    return arm_address_register_rtx_p (ind, 0);
5403
5404  /* Autoincremment addressing modes.  */
5405  if (wb
5406      && (GET_CODE (ind) == PRE_INC
5407	  || GET_CODE (ind) == POST_INC
5408	  || GET_CODE (ind) == PRE_DEC
5409	  || GET_CODE (ind) == POST_DEC))
5410    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
5411
5412  if (wb
5413      && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
5414      && arm_address_register_rtx_p (XEXP (ind, 0), 0)
5415      && GET_CODE (XEXP (ind, 1)) == PLUS
5416      && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
5417    ind = XEXP (ind, 1);
5418
5419  /* Match:
5420     (plus (reg)
5421	   (const)).  */
5422  if (GET_CODE (ind) == PLUS
5423      && GET_CODE (XEXP (ind, 0)) == REG
5424      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
5425      && GET_CODE (XEXP (ind, 1)) == CONST_INT
5426      && INTVAL (XEXP (ind, 1)) > -1024
5427      && INTVAL (XEXP (ind, 1)) <  1024
5428      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
5429    return TRUE;
5430
5431  return FALSE;
5432}
5433
5434/* Return true if X is a register that will be eliminated later on.  */
5435int
5436arm_eliminable_register (rtx x)
5437{
5438  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
5439		       || REGNO (x) == ARG_POINTER_REGNUM
5440		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
5441			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
5442}
5443
5444/* Return GENERAL_REGS if a scratch register required to reload x to/from
5445   coprocessor registers.  Otherwise return NO_REGS.  */
5446
5447enum reg_class
5448coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
5449{
5450  if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
5451    return NO_REGS;
5452
5453  return GENERAL_REGS;
5454}
5455
5456/* Values which must be returned in the most-significant end of the return
5457   register.  */
5458
5459static bool
5460arm_return_in_msb (tree valtype)
5461{
5462  return (TARGET_AAPCS_BASED
5463          && BYTES_BIG_ENDIAN
5464          && (AGGREGATE_TYPE_P (valtype)
5465              || TREE_CODE (valtype) == COMPLEX_TYPE));
5466}
5467
5468/* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
5469   Use by the Cirrus Maverick code which has to workaround
5470   a hardware bug triggered by such instructions.  */
5471static bool
5472arm_memory_load_p (rtx insn)
5473{
5474  rtx body, lhs, rhs;;
5475
5476  if (insn == NULL_RTX || GET_CODE (insn) != INSN)
5477    return false;
5478
5479  body = PATTERN (insn);
5480
5481  if (GET_CODE (body) != SET)
5482    return false;
5483
5484  lhs = XEXP (body, 0);
5485  rhs = XEXP (body, 1);
5486
5487  lhs = REG_OR_SUBREG_RTX (lhs);
5488
5489  /* If the destination is not a general purpose
5490     register we do not have to worry.  */
5491  if (GET_CODE (lhs) != REG
5492      || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
5493    return false;
5494
5495  /* As well as loads from memory we also have to react
5496     to loads of invalid constants which will be turned
5497     into loads from the minipool.  */
5498  return (GET_CODE (rhs) == MEM
5499	  || GET_CODE (rhs) == SYMBOL_REF
5500	  || note_invalid_constants (insn, -1, false));
5501}
5502
5503/* Return TRUE if INSN is a Cirrus instruction.  */
5504static bool
5505arm_cirrus_insn_p (rtx insn)
5506{
5507  enum attr_cirrus attr;
5508
5509  /* get_attr cannot accept USE or CLOBBER.  */
5510  if (!insn
5511      || GET_CODE (insn) != INSN
5512      || GET_CODE (PATTERN (insn)) == USE
5513      || GET_CODE (PATTERN (insn)) == CLOBBER)
5514    return 0;
5515
5516  attr = get_attr_cirrus (insn);
5517
5518  return attr != CIRRUS_NOT;
5519}
5520
5521/* Cirrus reorg for invalid instruction combinations.  */
5522static void
5523cirrus_reorg (rtx first)
5524{
5525  enum attr_cirrus attr;
5526  rtx body = PATTERN (first);
5527  rtx t;
5528  int nops;
5529
5530  /* Any branch must be followed by 2 non Cirrus instructions.  */
5531  if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
5532    {
5533      nops = 0;
5534      t = next_nonnote_insn (first);
5535
5536      if (arm_cirrus_insn_p (t))
5537	++ nops;
5538
5539      if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5540	++ nops;
5541
5542      while (nops --)
5543	emit_insn_after (gen_nop (), first);
5544
5545      return;
5546    }
5547
5548  /* (float (blah)) is in parallel with a clobber.  */
5549  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
5550    body = XVECEXP (body, 0, 0);
5551
5552  if (GET_CODE (body) == SET)
5553    {
5554      rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
5555
5556      /* cfldrd, cfldr64, cfstrd, cfstr64 must
5557	 be followed by a non Cirrus insn.  */
5558      if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
5559	{
5560	  if (arm_cirrus_insn_p (next_nonnote_insn (first)))
5561	    emit_insn_after (gen_nop (), first);
5562
5563	  return;
5564	}
5565      else if (arm_memory_load_p (first))
5566	{
5567	  unsigned int arm_regno;
5568
5569	  /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
5570	     ldr/cfmv64hr combination where the Rd field is the same
5571	     in both instructions must be split with a non Cirrus
5572	     insn.  Example:
5573
5574	     ldr r0, blah
5575	     nop
5576	     cfmvsr mvf0, r0.  */
5577
5578	  /* Get Arm register number for ldr insn.  */
5579	  if (GET_CODE (lhs) == REG)
5580	    arm_regno = REGNO (lhs);
5581	  else
5582	    {
5583	      gcc_assert (GET_CODE (rhs) == REG);
5584	      arm_regno = REGNO (rhs);
5585	    }
5586
5587	  /* Next insn.  */
5588	  first = next_nonnote_insn (first);
5589
5590	  if (! arm_cirrus_insn_p (first))
5591	    return;
5592
5593	  body = PATTERN (first);
5594
5595          /* (float (blah)) is in parallel with a clobber.  */
5596          if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
5597	    body = XVECEXP (body, 0, 0);
5598
5599	  if (GET_CODE (body) == FLOAT)
5600	    body = XEXP (body, 0);
5601
5602	  if (get_attr_cirrus (first) == CIRRUS_MOVE
5603	      && GET_CODE (XEXP (body, 1)) == REG
5604	      && arm_regno == REGNO (XEXP (body, 1)))
5605	    emit_insn_after (gen_nop (), first);
5606
5607	  return;
5608	}
5609    }
5610
5611  /* get_attr cannot accept USE or CLOBBER.  */
5612  if (!first
5613      || GET_CODE (first) != INSN
5614      || GET_CODE (PATTERN (first)) == USE
5615      || GET_CODE (PATTERN (first)) == CLOBBER)
5616    return;
5617
5618  attr = get_attr_cirrus (first);
5619
5620  /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
5621     must be followed by a non-coprocessor instruction.  */
5622  if (attr == CIRRUS_COMPARE)
5623    {
5624      nops = 0;
5625
5626      t = next_nonnote_insn (first);
5627
5628      if (arm_cirrus_insn_p (t))
5629	++ nops;
5630
5631      if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5632	++ nops;
5633
5634      while (nops --)
5635	emit_insn_after (gen_nop (), first);
5636
5637      return;
5638    }
5639}
5640
5641/* Return TRUE if X references a SYMBOL_REF.  */
5642int
5643symbol_mentioned_p (rtx x)
5644{
5645  const char * fmt;
5646  int i;
5647
5648  if (GET_CODE (x) == SYMBOL_REF)
5649    return 1;
5650
5651  /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
5652     are constant offsets, not symbols.  */
5653  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5654    return 0;
5655
5656  fmt = GET_RTX_FORMAT (GET_CODE (x));
5657
5658  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5659    {
5660      if (fmt[i] == 'E')
5661	{
5662	  int j;
5663
5664	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5665	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
5666	      return 1;
5667	}
5668      else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
5669	return 1;
5670    }
5671
5672  return 0;
5673}
5674
5675/* Return TRUE if X references a LABEL_REF.  */
5676int
5677label_mentioned_p (rtx x)
5678{
5679  const char * fmt;
5680  int i;
5681
5682  if (GET_CODE (x) == LABEL_REF)
5683    return 1;
5684
5685  /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
5686     instruction, but they are constant offsets, not symbols.  */
5687  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5688    return 0;
5689
5690  fmt = GET_RTX_FORMAT (GET_CODE (x));
5691  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5692    {
5693      if (fmt[i] == 'E')
5694	{
5695	  int j;
5696
5697	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5698	    if (label_mentioned_p (XVECEXP (x, i, j)))
5699	      return 1;
5700	}
5701      else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
5702	return 1;
5703    }
5704
5705  return 0;
5706}
5707
5708int
5709tls_mentioned_p (rtx x)
5710{
5711  switch (GET_CODE (x))
5712    {
5713    case CONST:
5714      return tls_mentioned_p (XEXP (x, 0));
5715
5716    case UNSPEC:
5717      if (XINT (x, 1) == UNSPEC_TLS)
5718	return 1;
5719
5720    default:
5721      return 0;
5722    }
5723}
5724
5725/* Must not copy a SET whose source operand is PC-relative.  */
5726
5727static bool
5728arm_cannot_copy_insn_p (rtx insn)
5729{
5730  rtx pat = PATTERN (insn);
5731
5732  if (GET_CODE (pat) == PARALLEL
5733      && GET_CODE (XVECEXP (pat, 0, 0)) == SET)
5734    {
5735      rtx rhs = SET_SRC (XVECEXP (pat, 0, 0));
5736
5737      if (GET_CODE (rhs) == UNSPEC
5738	  && XINT (rhs, 1) == UNSPEC_PIC_BASE)
5739	return TRUE;
5740
5741      if (GET_CODE (rhs) == MEM
5742	  && GET_CODE (XEXP (rhs, 0)) == UNSPEC
5743	  && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
5744	return TRUE;
5745    }
5746
5747  return FALSE;
5748}
5749
5750enum rtx_code
5751minmax_code (rtx x)
5752{
5753  enum rtx_code code = GET_CODE (x);
5754
5755  switch (code)
5756    {
5757    case SMAX:
5758      return GE;
5759    case SMIN:
5760      return LE;
5761    case UMIN:
5762      return LEU;
5763    case UMAX:
5764      return GEU;
5765    default:
5766      gcc_unreachable ();
5767    }
5768}
5769
5770/* Return 1 if memory locations are adjacent.  */
5771int
5772adjacent_mem_locations (rtx a, rtx b)
5773{
5774  /* We don't guarantee to preserve the order of these memory refs.  */
5775  if (volatile_refs_p (a) || volatile_refs_p (b))
5776    return 0;
5777
5778  if ((GET_CODE (XEXP (a, 0)) == REG
5779       || (GET_CODE (XEXP (a, 0)) == PLUS
5780	   && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
5781      && (GET_CODE (XEXP (b, 0)) == REG
5782	  || (GET_CODE (XEXP (b, 0)) == PLUS
5783	      && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
5784    {
5785      HOST_WIDE_INT val0 = 0, val1 = 0;
5786      rtx reg0, reg1;
5787      int val_diff;
5788
5789      if (GET_CODE (XEXP (a, 0)) == PLUS)
5790        {
5791	  reg0 = XEXP (XEXP (a, 0), 0);
5792	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
5793        }
5794      else
5795	reg0 = XEXP (a, 0);
5796
5797      if (GET_CODE (XEXP (b, 0)) == PLUS)
5798        {
5799	  reg1 = XEXP (XEXP (b, 0), 0);
5800	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
5801        }
5802      else
5803	reg1 = XEXP (b, 0);
5804
5805      /* Don't accept any offset that will require multiple
5806	 instructions to handle, since this would cause the
5807	 arith_adjacentmem pattern to output an overlong sequence.  */
5808      if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
5809	return 0;
5810
5811      /* Don't allow an eliminable register: register elimination can make
5812	 the offset too large.  */
5813      if (arm_eliminable_register (reg0))
5814	return 0;
5815
5816      val_diff = val1 - val0;
5817
5818      if (arm_ld_sched)
5819	{
5820	  /* If the target has load delay slots, then there's no benefit
5821	     to using an ldm instruction unless the offset is zero and
5822	     we are optimizing for size.  */
5823	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
5824		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
5825		  && (val_diff == 4 || val_diff == -4));
5826	}
5827
5828      return ((REGNO (reg0) == REGNO (reg1))
5829	      && (val_diff == 4 || val_diff == -4));
5830    }
5831
5832  return 0;
5833}
5834
5835int
5836load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5837			HOST_WIDE_INT *load_offset)
5838{
5839  int unsorted_regs[4];
5840  HOST_WIDE_INT unsorted_offsets[4];
5841  int order[4];
5842  int base_reg = -1;
5843  int i;
5844
5845  /* Can only handle 2, 3, or 4 insns at present,
5846     though could be easily extended if required.  */
5847  gcc_assert (nops >= 2 && nops <= 4);
5848
5849  /* Loop over the operands and check that the memory references are
5850     suitable (i.e. immediate offsets from the same base register).  At
5851     the same time, extract the target register, and the memory
5852     offsets.  */
5853  for (i = 0; i < nops; i++)
5854    {
5855      rtx reg;
5856      rtx offset;
5857
5858      /* Convert a subreg of a mem into the mem itself.  */
5859      if (GET_CODE (operands[nops + i]) == SUBREG)
5860	operands[nops + i] = alter_subreg (operands + (nops + i));
5861
5862      gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5863
5864      /* Don't reorder volatile memory references; it doesn't seem worth
5865	 looking for the case where the order is ok anyway.  */
5866      if (MEM_VOLATILE_P (operands[nops + i]))
5867	return 0;
5868
5869      offset = const0_rtx;
5870
5871      if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5872	   || (GET_CODE (reg) == SUBREG
5873	       && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5874	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5875	      && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5876		   == REG)
5877		  || (GET_CODE (reg) == SUBREG
5878		      && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5879	      && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5880		  == CONST_INT)))
5881	{
5882	  if (i == 0)
5883	    {
5884	      base_reg = REGNO (reg);
5885	      unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5886				  ? REGNO (operands[i])
5887				  : REGNO (SUBREG_REG (operands[i])));
5888	      order[0] = 0;
5889	    }
5890	  else
5891	    {
5892	      if (base_reg != (int) REGNO (reg))
5893		/* Not addressed from the same base register.  */
5894		return 0;
5895
5896	      unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5897				  ? REGNO (operands[i])
5898				  : REGNO (SUBREG_REG (operands[i])));
5899	      if (unsorted_regs[i] < unsorted_regs[order[0]])
5900		order[0] = i;
5901	    }
5902
5903	  /* If it isn't an integer register, or if it overwrites the
5904	     base register but isn't the last insn in the list, then
5905	     we can't do this.  */
5906	  if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
5907	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
5908	    return 0;
5909
5910	  unsorted_offsets[i] = INTVAL (offset);
5911	}
5912      else
5913	/* Not a suitable memory address.  */
5914	return 0;
5915    }
5916
5917  /* All the useful information has now been extracted from the
5918     operands into unsorted_regs and unsorted_offsets; additionally,
5919     order[0] has been set to the lowest numbered register in the
5920     list.  Sort the registers into order, and check that the memory
5921     offsets are ascending and adjacent.  */
5922
5923  for (i = 1; i < nops; i++)
5924    {
5925      int j;
5926
5927      order[i] = order[i - 1];
5928      for (j = 0; j < nops; j++)
5929	if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5930	    && (order[i] == order[i - 1]
5931		|| unsorted_regs[j] < unsorted_regs[order[i]]))
5932	  order[i] = j;
5933
5934      /* Have we found a suitable register? if not, one must be used more
5935	 than once.  */
5936      if (order[i] == order[i - 1])
5937	return 0;
5938
5939      /* Is the memory address adjacent and ascending? */
5940      if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5941	return 0;
5942    }
5943
5944  if (base)
5945    {
5946      *base = base_reg;
5947
5948      for (i = 0; i < nops; i++)
5949	regs[i] = unsorted_regs[order[i]];
5950
5951      *load_offset = unsorted_offsets[order[0]];
5952    }
5953
5954  if (unsorted_offsets[order[0]] == 0)
5955    return 1; /* ldmia */
5956
5957  if (unsorted_offsets[order[0]] == 4)
5958    return 2; /* ldmib */
5959
5960  if (unsorted_offsets[order[nops - 1]] == 0)
5961    return 3; /* ldmda */
5962
5963  if (unsorted_offsets[order[nops - 1]] == -4)
5964    return 4; /* ldmdb */
5965
5966  /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
5967     if the offset isn't small enough.  The reason 2 ldrs are faster
5968     is because these ARMs are able to do more than one cache access
5969     in a single cycle.  The ARM9 and StrongARM have Harvard caches,
5970     whilst the ARM8 has a double bandwidth cache.  This means that
5971     these cores can do both an instruction fetch and a data fetch in
5972     a single cycle, so the trick of calculating the address into a
5973     scratch register (one of the result regs) and then doing a load
5974     multiple actually becomes slower (and no smaller in code size).
5975     That is the transformation
5976
5977 	ldr	rd1, [rbase + offset]
5978 	ldr	rd2, [rbase + offset + 4]
5979
5980     to
5981
5982 	add	rd1, rbase, offset
5983 	ldmia	rd1, {rd1, rd2}
5984
5985     produces worse code -- '3 cycles + any stalls on rd2' instead of
5986     '2 cycles + any stalls on rd2'.  On ARMs with only one cache
5987     access per cycle, the first sequence could never complete in less
5988     than 6 cycles, whereas the ldm sequence would only take 5 and
5989     would make better use of sequential accesses if not hitting the
5990     cache.
5991
5992     We cheat here and test 'arm_ld_sched' which we currently know to
5993     only be true for the ARM8, ARM9 and StrongARM.  If this ever
5994     changes, then the test below needs to be reworked.  */
5995  if (nops == 2 && arm_ld_sched)
5996    return 0;
5997
5998  /* Can't do it without setting up the offset, only do this if it takes
5999     no more than one insn.  */
6000  return (const_ok_for_arm (unsorted_offsets[order[0]])
6001	  || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
6002}
6003
6004const char *
6005emit_ldm_seq (rtx *operands, int nops)
6006{
6007  int regs[4];
6008  int base_reg;
6009  HOST_WIDE_INT offset;
6010  char buf[100];
6011  int i;
6012
6013  switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
6014    {
6015    case 1:
6016      strcpy (buf, "ldm%?ia\t");
6017      break;
6018
6019    case 2:
6020      strcpy (buf, "ldm%?ib\t");
6021      break;
6022
6023    case 3:
6024      strcpy (buf, "ldm%?da\t");
6025      break;
6026
6027    case 4:
6028      strcpy (buf, "ldm%?db\t");
6029      break;
6030
6031    case 5:
6032      if (offset >= 0)
6033	sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
6034		 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
6035		 (long) offset);
6036      else
6037	sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
6038		 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
6039		 (long) -offset);
6040      output_asm_insn (buf, operands);
6041      base_reg = regs[0];
6042      strcpy (buf, "ldm%?ia\t");
6043      break;
6044
6045    default:
6046      gcc_unreachable ();
6047    }
6048
6049  sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
6050	   reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
6051
6052  for (i = 1; i < nops; i++)
6053    sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
6054	     reg_names[regs[i]]);
6055
6056  strcat (buf, "}\t%@ phole ldm");
6057
6058  output_asm_insn (buf, operands);
6059  return "";
6060}
6061
6062int
6063store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6064			 HOST_WIDE_INT * load_offset)
6065{
6066  int unsorted_regs[4];
6067  HOST_WIDE_INT unsorted_offsets[4];
6068  int order[4];
6069  int base_reg = -1;
6070  int i;
6071
6072  /* Can only handle 2, 3, or 4 insns at present, though could be easily
6073     extended if required.  */
6074  gcc_assert (nops >= 2 && nops <= 4);
6075
6076  /* Loop over the operands and check that the memory references are
6077     suitable (i.e. immediate offsets from the same base register).  At
6078     the same time, extract the target register, and the memory
6079     offsets.  */
6080  for (i = 0; i < nops; i++)
6081    {
6082      rtx reg;
6083      rtx offset;
6084
6085      /* Convert a subreg of a mem into the mem itself.  */
6086      if (GET_CODE (operands[nops + i]) == SUBREG)
6087	operands[nops + i] = alter_subreg (operands + (nops + i));
6088
6089      gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6090
6091      /* Don't reorder volatile memory references; it doesn't seem worth
6092	 looking for the case where the order is ok anyway.  */
6093      if (MEM_VOLATILE_P (operands[nops + i]))
6094	return 0;
6095
6096      offset = const0_rtx;
6097
6098      if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6099	   || (GET_CODE (reg) == SUBREG
6100	       && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6101	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6102	      && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6103		   == REG)
6104		  || (GET_CODE (reg) == SUBREG
6105		      && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6106	      && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6107		  == CONST_INT)))
6108	{
6109	  if (i == 0)
6110	    {
6111	      base_reg = REGNO (reg);
6112	      unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6113				  ? REGNO (operands[i])
6114				  : REGNO (SUBREG_REG (operands[i])));
6115	      order[0] = 0;
6116	    }
6117	  else
6118	    {
6119	      if (base_reg != (int) REGNO (reg))
6120		/* Not addressed from the same base register.  */
6121		return 0;
6122
6123	      unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6124				  ? REGNO (operands[i])
6125				  : REGNO (SUBREG_REG (operands[i])));
6126	      if (unsorted_regs[i] < unsorted_regs[order[0]])
6127		order[0] = i;
6128	    }
6129
6130	  /* If it isn't an integer register, then we can't do this.  */
6131	  if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
6132	    return 0;
6133
6134	  unsorted_offsets[i] = INTVAL (offset);
6135	}
6136      else
6137	/* Not a suitable memory address.  */
6138	return 0;
6139    }
6140
6141  /* All the useful information has now been extracted from the
6142     operands into unsorted_regs and unsorted_offsets; additionally,
6143     order[0] has been set to the lowest numbered register in the
6144     list.  Sort the registers into order, and check that the memory
6145     offsets are ascending and adjacent.  */
6146
6147  for (i = 1; i < nops; i++)
6148    {
6149      int j;
6150
6151      order[i] = order[i - 1];
6152      for (j = 0; j < nops; j++)
6153	if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6154	    && (order[i] == order[i - 1]
6155		|| unsorted_regs[j] < unsorted_regs[order[i]]))
6156	  order[i] = j;
6157
6158      /* Have we found a suitable register? if not, one must be used more
6159	 than once.  */
6160      if (order[i] == order[i - 1])
6161	return 0;
6162
6163      /* Is the memory address adjacent and ascending? */
6164      if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
6165	return 0;
6166    }
6167
6168  if (base)
6169    {
6170      *base = base_reg;
6171
6172      for (i = 0; i < nops; i++)
6173	regs[i] = unsorted_regs[order[i]];
6174
6175      *load_offset = unsorted_offsets[order[0]];
6176    }
6177
6178  if (unsorted_offsets[order[0]] == 0)
6179    return 1; /* stmia */
6180
6181  if (unsorted_offsets[order[0]] == 4)
6182    return 2; /* stmib */
6183
6184  if (unsorted_offsets[order[nops - 1]] == 0)
6185    return 3; /* stmda */
6186
6187  if (unsorted_offsets[order[nops - 1]] == -4)
6188    return 4; /* stmdb */
6189
6190  return 0;
6191}
6192
6193const char *
6194emit_stm_seq (rtx *operands, int nops)
6195{
6196  int regs[4];
6197  int base_reg;
6198  HOST_WIDE_INT offset;
6199  char buf[100];
6200  int i;
6201
6202  switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
6203    {
6204    case 1:
6205      strcpy (buf, "stm%?ia\t");
6206      break;
6207
6208    case 2:
6209      strcpy (buf, "stm%?ib\t");
6210      break;
6211
6212    case 3:
6213      strcpy (buf, "stm%?da\t");
6214      break;
6215
6216    case 4:
6217      strcpy (buf, "stm%?db\t");
6218      break;
6219
6220    default:
6221      gcc_unreachable ();
6222    }
6223
6224  sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
6225	   reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
6226
6227  for (i = 1; i < nops; i++)
6228    sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
6229	     reg_names[regs[i]]);
6230
6231  strcat (buf, "}\t%@ phole stm");
6232
6233  output_asm_insn (buf, operands);
6234  return "";
6235}
6236
6237/* Routines for use in generating RTL.  */
6238
6239rtx
6240arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
6241		       int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
6242{
6243  HOST_WIDE_INT offset = *offsetp;
6244  int i = 0, j;
6245  rtx result;
6246  int sign = up ? 1 : -1;
6247  rtx mem, addr;
6248
6249  /* XScale has load-store double instructions, but they have stricter
6250     alignment requirements than load-store multiple, so we cannot
6251     use them.
6252
6253     For XScale ldm requires 2 + NREGS cycles to complete and blocks
6254     the pipeline until completion.
6255
6256	NREGS		CYCLES
6257	  1		  3
6258	  2		  4
6259	  3		  5
6260	  4		  6
6261
6262     An ldr instruction takes 1-3 cycles, but does not block the
6263     pipeline.
6264
6265	NREGS		CYCLES
6266	  1		 1-3
6267	  2		 2-6
6268	  3		 3-9
6269	  4		 4-12
6270
6271     Best case ldr will always win.  However, the more ldr instructions
6272     we issue, the less likely we are to be able to schedule them well.
6273     Using ldr instructions also increases code size.
6274
6275     As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
6276     for counts of 3 or 4 regs.  */
6277  if (arm_tune_xscale && count <= 2 && ! optimize_size)
6278    {
6279      rtx seq;
6280
6281      start_sequence ();
6282
6283      for (i = 0; i < count; i++)
6284	{
6285	  addr = plus_constant (from, i * 4 * sign);
6286	  mem = adjust_automodify_address (basemem, SImode, addr, offset);
6287	  emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
6288	  offset += 4 * sign;
6289	}
6290
6291      if (write_back)
6292	{
6293	  emit_move_insn (from, plus_constant (from, count * 4 * sign));
6294	  *offsetp = offset;
6295	}
6296
6297      seq = get_insns ();
6298      end_sequence ();
6299
6300      return seq;
6301    }
6302
6303  result = gen_rtx_PARALLEL (VOIDmode,
6304			     rtvec_alloc (count + (write_back ? 1 : 0)));
6305  if (write_back)
6306    {
6307      XVECEXP (result, 0, 0)
6308	= gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
6309      i = 1;
6310      count++;
6311    }
6312
6313  for (j = 0; i < count; i++, j++)
6314    {
6315      addr = plus_constant (from, j * 4 * sign);
6316      mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
6317      XVECEXP (result, 0, i)
6318	= gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
6319      offset += 4 * sign;
6320    }
6321
6322  if (write_back)
6323    *offsetp = offset;
6324
6325  return result;
6326}
6327
6328rtx
6329arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
6330			int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
6331{
6332  HOST_WIDE_INT offset = *offsetp;
6333  int i = 0, j;
6334  rtx result;
6335  int sign = up ? 1 : -1;
6336  rtx mem, addr;
6337
6338  /* See arm_gen_load_multiple for discussion of
6339     the pros/cons of ldm/stm usage for XScale.  */
6340  if (arm_tune_xscale && count <= 2 && ! optimize_size)
6341    {
6342      rtx seq;
6343
6344      start_sequence ();
6345
6346      for (i = 0; i < count; i++)
6347	{
6348	  addr = plus_constant (to, i * 4 * sign);
6349	  mem = adjust_automodify_address (basemem, SImode, addr, offset);
6350	  emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
6351	  offset += 4 * sign;
6352	}
6353
6354      if (write_back)
6355	{
6356	  emit_move_insn (to, plus_constant (to, count * 4 * sign));
6357	  *offsetp = offset;
6358	}
6359
6360      seq = get_insns ();
6361      end_sequence ();
6362
6363      return seq;
6364    }
6365
6366  result = gen_rtx_PARALLEL (VOIDmode,
6367			     rtvec_alloc (count + (write_back ? 1 : 0)));
6368  if (write_back)
6369    {
6370      XVECEXP (result, 0, 0)
6371	= gen_rtx_SET (VOIDmode, to,
6372		       plus_constant (to, count * 4 * sign));
6373      i = 1;
6374      count++;
6375    }
6376
6377  for (j = 0; i < count; i++, j++)
6378    {
6379      addr = plus_constant (to, j * 4 * sign);
6380      mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
6381      XVECEXP (result, 0, i)
6382	= gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
6383      offset += 4 * sign;
6384    }
6385
6386  if (write_back)
6387    *offsetp = offset;
6388
6389  return result;
6390}
6391
6392int
6393arm_gen_movmemqi (rtx *operands)
6394{
6395  HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
6396  HOST_WIDE_INT srcoffset, dstoffset;
6397  int i;
6398  rtx src, dst, srcbase, dstbase;
6399  rtx part_bytes_reg = NULL;
6400  rtx mem;
6401
6402  if (GET_CODE (operands[2]) != CONST_INT
6403      || GET_CODE (operands[3]) != CONST_INT
6404      || INTVAL (operands[2]) > 64
6405      || INTVAL (operands[3]) & 3)
6406    return 0;
6407
6408  dstbase = operands[0];
6409  srcbase = operands[1];
6410
6411  dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
6412  src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
6413
6414  in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
6415  out_words_to_go = INTVAL (operands[2]) / 4;
6416  last_bytes = INTVAL (operands[2]) & 3;
6417  dstoffset = srcoffset = 0;
6418
6419  if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
6420    part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
6421
6422  for (i = 0; in_words_to_go >= 2; i+=4)
6423    {
6424      if (in_words_to_go > 4)
6425	emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
6426					  srcbase, &srcoffset));
6427      else
6428	emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
6429					  FALSE, srcbase, &srcoffset));
6430
6431      if (out_words_to_go)
6432	{
6433	  if (out_words_to_go > 4)
6434	    emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
6435					       dstbase, &dstoffset));
6436	  else if (out_words_to_go != 1)
6437	    emit_insn (arm_gen_store_multiple (0, out_words_to_go,
6438					       dst, TRUE,
6439					       (last_bytes == 0
6440						? FALSE : TRUE),
6441					       dstbase, &dstoffset));
6442	  else
6443	    {
6444	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
6445	      emit_move_insn (mem, gen_rtx_REG (SImode, 0));
6446	      if (last_bytes != 0)
6447		{
6448		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
6449		  dstoffset += 4;
6450		}
6451	    }
6452	}
6453
6454      in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
6455      out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
6456    }
6457
6458  /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
6459  if (out_words_to_go)
6460    {
6461      rtx sreg;
6462
6463      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
6464      sreg = copy_to_reg (mem);
6465
6466      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
6467      emit_move_insn (mem, sreg);
6468      in_words_to_go--;
6469
6470      gcc_assert (!in_words_to_go);	/* Sanity check */
6471    }
6472
6473  if (in_words_to_go)
6474    {
6475      gcc_assert (in_words_to_go > 0);
6476
6477      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
6478      part_bytes_reg = copy_to_mode_reg (SImode, mem);
6479    }
6480
6481  gcc_assert (!last_bytes || part_bytes_reg);
6482
6483  if (BYTES_BIG_ENDIAN && last_bytes)
6484    {
6485      rtx tmp = gen_reg_rtx (SImode);
6486
6487      /* The bytes we want are in the top end of the word.  */
6488      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
6489			      GEN_INT (8 * (4 - last_bytes))));
6490      part_bytes_reg = tmp;
6491
6492      while (last_bytes)
6493	{
6494	  mem = adjust_automodify_address (dstbase, QImode,
6495					   plus_constant (dst, last_bytes - 1),
6496					   dstoffset + last_bytes - 1);
6497	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6498
6499	  if (--last_bytes)
6500	    {
6501	      tmp = gen_reg_rtx (SImode);
6502	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
6503	      part_bytes_reg = tmp;
6504	    }
6505	}
6506
6507    }
6508  else
6509    {
6510      if (last_bytes > 1)
6511	{
6512	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
6513	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
6514	  last_bytes -= 2;
6515	  if (last_bytes)
6516	    {
6517	      rtx tmp = gen_reg_rtx (SImode);
6518	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
6519	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
6520	      part_bytes_reg = tmp;
6521	      dstoffset += 2;
6522	    }
6523	}
6524
6525      if (last_bytes)
6526	{
6527	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
6528	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6529	}
6530    }
6531
6532  return 1;
6533}
6534
6535/* Select a dominance comparison mode if possible for a test of the general
6536   form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
6537   COND_OR == DOM_CC_X_AND_Y => (X && Y)
6538   COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
6539   COND_OR == DOM_CC_X_OR_Y => (X || Y)
6540   In all cases OP will be either EQ or NE, but we don't need to know which
6541   here.  If we are unable to support a dominance comparison we return
6542   CC mode.  This will then fail to match for the RTL expressions that
6543   generate this call.  */
6544enum machine_mode
6545arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
6546{
6547  enum rtx_code cond1, cond2;
6548  int swapped = 0;
6549
6550  /* Currently we will probably get the wrong result if the individual
6551     comparisons are not simple.  This also ensures that it is safe to
6552     reverse a comparison if necessary.  */
6553  if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
6554       != CCmode)
6555      || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
6556	  != CCmode))
6557    return CCmode;
6558
6559  /* The if_then_else variant of this tests the second condition if the
6560     first passes, but is true if the first fails.  Reverse the first
6561     condition to get a true "inclusive-or" expression.  */
6562  if (cond_or == DOM_CC_NX_OR_Y)
6563    cond1 = reverse_condition (cond1);
6564
6565  /* If the comparisons are not equal, and one doesn't dominate the other,
6566     then we can't do this.  */
6567  if (cond1 != cond2
6568      && !comparison_dominates_p (cond1, cond2)
6569      && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
6570    return CCmode;
6571
6572  if (swapped)
6573    {
6574      enum rtx_code temp = cond1;
6575      cond1 = cond2;
6576      cond2 = temp;
6577    }
6578
6579  switch (cond1)
6580    {
6581    case EQ:
6582      if (cond_or == DOM_CC_X_AND_Y)
6583	return CC_DEQmode;
6584
6585      switch (cond2)
6586	{
6587	case EQ: return CC_DEQmode;
6588	case LE: return CC_DLEmode;
6589	case LEU: return CC_DLEUmode;
6590	case GE: return CC_DGEmode;
6591	case GEU: return CC_DGEUmode;
6592	default: gcc_unreachable ();
6593	}
6594
6595    case LT:
6596      if (cond_or == DOM_CC_X_AND_Y)
6597	return CC_DLTmode;
6598
6599      switch (cond2)
6600	{
6601	case  LT:
6602	    return CC_DLTmode;
6603	case LE:
6604	  return CC_DLEmode;
6605	case NE:
6606	  return CC_DNEmode;
6607	default:
6608	  gcc_unreachable ();
6609	}
6610
6611    case GT:
6612      if (cond_or == DOM_CC_X_AND_Y)
6613	return CC_DGTmode;
6614
6615      switch (cond2)
6616	{
6617	case GT:
6618	  return CC_DGTmode;
6619	case GE:
6620	  return CC_DGEmode;
6621	case NE:
6622	  return CC_DNEmode;
6623	default:
6624	  gcc_unreachable ();
6625	}
6626
6627    case LTU:
6628      if (cond_or == DOM_CC_X_AND_Y)
6629	return CC_DLTUmode;
6630
6631      switch (cond2)
6632	{
6633	case LTU:
6634	  return CC_DLTUmode;
6635	case LEU:
6636	  return CC_DLEUmode;
6637	case NE:
6638	  return CC_DNEmode;
6639	default:
6640	  gcc_unreachable ();
6641	}
6642
6643    case GTU:
6644      if (cond_or == DOM_CC_X_AND_Y)
6645	return CC_DGTUmode;
6646
6647      switch (cond2)
6648	{
6649	case GTU:
6650	  return CC_DGTUmode;
6651	case GEU:
6652	  return CC_DGEUmode;
6653	case NE:
6654	  return CC_DNEmode;
6655	default:
6656	  gcc_unreachable ();
6657	}
6658
6659    /* The remaining cases only occur when both comparisons are the
6660       same.  */
6661    case NE:
6662      gcc_assert (cond1 == cond2);
6663      return CC_DNEmode;
6664
6665    case LE:
6666      gcc_assert (cond1 == cond2);
6667      return CC_DLEmode;
6668
6669    case GE:
6670      gcc_assert (cond1 == cond2);
6671      return CC_DGEmode;
6672
6673    case LEU:
6674      gcc_assert (cond1 == cond2);
6675      return CC_DLEUmode;
6676
6677    case GEU:
6678      gcc_assert (cond1 == cond2);
6679      return CC_DGEUmode;
6680
6681    default:
6682      gcc_unreachable ();
6683    }
6684}
6685
6686enum machine_mode
6687arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
6688{
6689  /* All floating point compares return CCFP if it is an equality
6690     comparison, and CCFPE otherwise.  */
6691  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6692    {
6693      switch (op)
6694	{
6695	case EQ:
6696	case NE:
6697	case UNORDERED:
6698	case ORDERED:
6699	case UNLT:
6700	case UNLE:
6701	case UNGT:
6702	case UNGE:
6703	case UNEQ:
6704	case LTGT:
6705	  return CCFPmode;
6706
6707	case LT:
6708	case LE:
6709	case GT:
6710	case GE:
6711	  if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
6712	    return CCFPmode;
6713	  return CCFPEmode;
6714
6715	default:
6716	  gcc_unreachable ();
6717	}
6718    }
6719
6720  /* A compare with a shifted operand.  Because of canonicalization, the
6721     comparison will have to be swapped when we emit the assembler.  */
6722  if (GET_MODE (y) == SImode && GET_CODE (y) == REG
6723      && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6724	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
6725	  || GET_CODE (x) == ROTATERT))
6726    return CC_SWPmode;
6727
6728  /* This operation is performed swapped, but since we only rely on the Z
6729     flag we don't need an additional mode.  */
6730  if (GET_MODE (y) == SImode && REG_P (y)
6731      && GET_CODE (x) == NEG
6732      && (op ==	EQ || op == NE))
6733    return CC_Zmode;
6734
6735  /* This is a special case that is used by combine to allow a
6736     comparison of a shifted byte load to be split into a zero-extend
6737     followed by a comparison of the shifted integer (only valid for
6738     equalities and unsigned inequalities).  */
6739  if (GET_MODE (x) == SImode
6740      && GET_CODE (x) == ASHIFT
6741      && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
6742      && GET_CODE (XEXP (x, 0)) == SUBREG
6743      && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
6744      && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
6745      && (op == EQ || op == NE
6746	  || op == GEU || op == GTU || op == LTU || op == LEU)
6747      && GET_CODE (y) == CONST_INT)
6748    return CC_Zmode;
6749
6750  /* A construct for a conditional compare, if the false arm contains
6751     0, then both conditions must be true, otherwise either condition
6752     must be true.  Not all conditions are possible, so CCmode is
6753     returned if it can't be done.  */
6754  if (GET_CODE (x) == IF_THEN_ELSE
6755      && (XEXP (x, 2) == const0_rtx
6756	  || XEXP (x, 2) == const1_rtx)
6757      && COMPARISON_P (XEXP (x, 0))
6758      && COMPARISON_P (XEXP (x, 1)))
6759    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6760					 INTVAL (XEXP (x, 2)));
6761
6762  /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
6763  if (GET_CODE (x) == AND
6764      && COMPARISON_P (XEXP (x, 0))
6765      && COMPARISON_P (XEXP (x, 1)))
6766    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6767					 DOM_CC_X_AND_Y);
6768
6769  if (GET_CODE (x) == IOR
6770      && COMPARISON_P (XEXP (x, 0))
6771      && COMPARISON_P (XEXP (x, 1)))
6772    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6773					 DOM_CC_X_OR_Y);
6774
6775  /* An operation (on Thumb) where we want to test for a single bit.
6776     This is done by shifting that bit up into the top bit of a
6777     scratch register; we can then branch on the sign bit.  */
6778  if (TARGET_THUMB
6779      && GET_MODE (x) == SImode
6780      && (op == EQ || op == NE)
6781      && GET_CODE (x) == ZERO_EXTRACT
6782      && XEXP (x, 1) == const1_rtx)
6783    return CC_Nmode;
6784
6785  /* An operation that sets the condition codes as a side-effect, the
6786     V flag is not set correctly, so we can only use comparisons where
6787     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
6788     instead.)  */
6789  if (GET_MODE (x) == SImode
6790      && y == const0_rtx
6791      && (op == EQ || op == NE || op == LT || op == GE)
6792      && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
6793	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
6794	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
6795	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
6796	  || GET_CODE (x) == LSHIFTRT
6797	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6798	  || GET_CODE (x) == ROTATERT
6799	  || (TARGET_ARM && GET_CODE (x) == ZERO_EXTRACT)))
6800    return CC_NOOVmode;
6801
6802  if (GET_MODE (x) == QImode && (op == EQ || op == NE))
6803    return CC_Zmode;
6804
6805  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
6806      && GET_CODE (x) == PLUS
6807      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
6808    return CC_Cmode;
6809
6810  return CCmode;
6811}
6812
6813/* X and Y are two things to compare using CODE.  Emit the compare insn and
6814   return the rtx for register 0 in the proper mode.  FP means this is a
6815   floating point compare: I don't think that it is needed on the arm.  */
6816rtx
6817arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
6818{
6819  enum machine_mode mode = SELECT_CC_MODE (code, x, y);
6820  rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
6821
6822  emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
6823
6824  return cc_reg;
6825}
6826
6827/* Generate a sequence of insns that will generate the correct return
6828   address mask depending on the physical architecture that the program
6829   is running on.  */
6830rtx
6831arm_gen_return_addr_mask (void)
6832{
6833  rtx reg = gen_reg_rtx (Pmode);
6834
6835  emit_insn (gen_return_addr_mask (reg));
6836  return reg;
6837}
6838
6839void
6840arm_reload_in_hi (rtx *operands)
6841{
6842  rtx ref = operands[1];
6843  rtx base, scratch;
6844  HOST_WIDE_INT offset = 0;
6845
6846  if (GET_CODE (ref) == SUBREG)
6847    {
6848      offset = SUBREG_BYTE (ref);
6849      ref = SUBREG_REG (ref);
6850    }
6851
6852  if (GET_CODE (ref) == REG)
6853    {
6854      /* We have a pseudo which has been spilt onto the stack; there
6855	 are two cases here: the first where there is a simple
6856	 stack-slot replacement and a second where the stack-slot is
6857	 out of range, or is used as a subreg.  */
6858      if (reg_equiv_mem[REGNO (ref)])
6859	{
6860	  ref = reg_equiv_mem[REGNO (ref)];
6861	  base = find_replacement (&XEXP (ref, 0));
6862	}
6863      else
6864	/* The slot is out of range, or was dressed up in a SUBREG.  */
6865	base = reg_equiv_address[REGNO (ref)];
6866    }
6867  else
6868    base = find_replacement (&XEXP (ref, 0));
6869
6870  /* Handle the case where the address is too complex to be offset by 1.  */
6871  if (GET_CODE (base) == MINUS
6872      || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6873    {
6874      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6875
6876      emit_set_insn (base_plus, base);
6877      base = base_plus;
6878    }
6879  else if (GET_CODE (base) == PLUS)
6880    {
6881      /* The addend must be CONST_INT, or we would have dealt with it above.  */
6882      HOST_WIDE_INT hi, lo;
6883
6884      offset += INTVAL (XEXP (base, 1));
6885      base = XEXP (base, 0);
6886
6887      /* Rework the address into a legal sequence of insns.  */
6888      /* Valid range for lo is -4095 -> 4095 */
6889      lo = (offset >= 0
6890	    ? (offset & 0xfff)
6891	    : -((-offset) & 0xfff));
6892
6893      /* Corner case, if lo is the max offset then we would be out of range
6894	 once we have added the additional 1 below, so bump the msb into the
6895	 pre-loading insn(s).  */
6896      if (lo == 4095)
6897	lo &= 0x7ff;
6898
6899      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6900	     ^ (HOST_WIDE_INT) 0x80000000)
6901	    - (HOST_WIDE_INT) 0x80000000);
6902
6903      gcc_assert (hi + lo == offset);
6904
6905      if (hi != 0)
6906	{
6907	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6908
6909	  /* Get the base address; addsi3 knows how to handle constants
6910	     that require more than one insn.  */
6911	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6912	  base = base_plus;
6913	  offset = lo;
6914	}
6915    }
6916
6917  /* Operands[2] may overlap operands[0] (though it won't overlap
6918     operands[1]), that's why we asked for a DImode reg -- so we can
6919     use the bit that does not overlap.  */
6920  if (REGNO (operands[2]) == REGNO (operands[0]))
6921    scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6922  else
6923    scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6924
6925  emit_insn (gen_zero_extendqisi2 (scratch,
6926				   gen_rtx_MEM (QImode,
6927						plus_constant (base,
6928							       offset))));
6929  emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
6930				   gen_rtx_MEM (QImode,
6931						plus_constant (base,
6932							       offset + 1))));
6933  if (!BYTES_BIG_ENDIAN)
6934    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
6935		   gen_rtx_IOR (SImode,
6936				gen_rtx_ASHIFT
6937				(SImode,
6938				 gen_rtx_SUBREG (SImode, operands[0], 0),
6939				 GEN_INT (8)),
6940				scratch));
6941  else
6942    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
6943		   gen_rtx_IOR (SImode,
6944				gen_rtx_ASHIFT (SImode, scratch,
6945						GEN_INT (8)),
6946				gen_rtx_SUBREG (SImode, operands[0], 0)));
6947}
6948
6949/* Handle storing a half-word to memory during reload by synthesizing as two
6950   byte stores.  Take care not to clobber the input values until after we
6951   have moved them somewhere safe.  This code assumes that if the DImode
6952   scratch in operands[2] overlaps either the input value or output address
6953   in some way, then that value must die in this insn (we absolutely need
6954   two scratch registers for some corner cases).  */
6955void
6956arm_reload_out_hi (rtx *operands)
6957{
6958  rtx ref = operands[0];
6959  rtx outval = operands[1];
6960  rtx base, scratch;
6961  HOST_WIDE_INT offset = 0;
6962
6963  if (GET_CODE (ref) == SUBREG)
6964    {
6965      offset = SUBREG_BYTE (ref);
6966      ref = SUBREG_REG (ref);
6967    }
6968
6969  if (GET_CODE (ref) == REG)
6970    {
6971      /* We have a pseudo which has been spilt onto the stack; there
6972	 are two cases here: the first where there is a simple
6973	 stack-slot replacement and a second where the stack-slot is
6974	 out of range, or is used as a subreg.  */
6975      if (reg_equiv_mem[REGNO (ref)])
6976	{
6977	  ref = reg_equiv_mem[REGNO (ref)];
6978	  base = find_replacement (&XEXP (ref, 0));
6979	}
6980      else
6981	/* The slot is out of range, or was dressed up in a SUBREG.  */
6982	base = reg_equiv_address[REGNO (ref)];
6983    }
6984  else
6985    base = find_replacement (&XEXP (ref, 0));
6986
6987  scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6988
6989  /* Handle the case where the address is too complex to be offset by 1.  */
6990  if (GET_CODE (base) == MINUS
6991      || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6992    {
6993      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6994
6995      /* Be careful not to destroy OUTVAL.  */
6996      if (reg_overlap_mentioned_p (base_plus, outval))
6997	{
6998	  /* Updating base_plus might destroy outval, see if we can
6999	     swap the scratch and base_plus.  */
7000	  if (!reg_overlap_mentioned_p (scratch, outval))
7001	    {
7002	      rtx tmp = scratch;
7003	      scratch = base_plus;
7004	      base_plus = tmp;
7005	    }
7006	  else
7007	    {
7008	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
7009
7010	      /* Be conservative and copy OUTVAL into the scratch now,
7011		 this should only be necessary if outval is a subreg
7012		 of something larger than a word.  */
7013	      /* XXX Might this clobber base?  I can't see how it can,
7014		 since scratch is known to overlap with OUTVAL, and
7015		 must be wider than a word.  */
7016	      emit_insn (gen_movhi (scratch_hi, outval));
7017	      outval = scratch_hi;
7018	    }
7019	}
7020
7021      emit_set_insn (base_plus, base);
7022      base = base_plus;
7023    }
7024  else if (GET_CODE (base) == PLUS)
7025    {
7026      /* The addend must be CONST_INT, or we would have dealt with it above.  */
7027      HOST_WIDE_INT hi, lo;
7028
7029      offset += INTVAL (XEXP (base, 1));
7030      base = XEXP (base, 0);
7031
7032      /* Rework the address into a legal sequence of insns.  */
7033      /* Valid range for lo is -4095 -> 4095 */
7034      lo = (offset >= 0
7035	    ? (offset & 0xfff)
7036	    : -((-offset) & 0xfff));
7037
7038      /* Corner case, if lo is the max offset then we would be out of range
7039	 once we have added the additional 1 below, so bump the msb into the
7040	 pre-loading insn(s).  */
7041      if (lo == 4095)
7042	lo &= 0x7ff;
7043
7044      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7045	     ^ (HOST_WIDE_INT) 0x80000000)
7046	    - (HOST_WIDE_INT) 0x80000000);
7047
7048      gcc_assert (hi + lo == offset);
7049
7050      if (hi != 0)
7051	{
7052	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7053
7054	  /* Be careful not to destroy OUTVAL.  */
7055	  if (reg_overlap_mentioned_p (base_plus, outval))
7056	    {
7057	      /* Updating base_plus might destroy outval, see if we
7058		 can swap the scratch and base_plus.  */
7059	      if (!reg_overlap_mentioned_p (scratch, outval))
7060		{
7061		  rtx tmp = scratch;
7062		  scratch = base_plus;
7063		  base_plus = tmp;
7064		}
7065	      else
7066		{
7067		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
7068
7069		  /* Be conservative and copy outval into scratch now,
7070		     this should only be necessary if outval is a
7071		     subreg of something larger than a word.  */
7072		  /* XXX Might this clobber base?  I can't see how it
7073		     can, since scratch is known to overlap with
7074		     outval.  */
7075		  emit_insn (gen_movhi (scratch_hi, outval));
7076		  outval = scratch_hi;
7077		}
7078	    }
7079
7080	  /* Get the base address; addsi3 knows how to handle constants
7081	     that require more than one insn.  */
7082	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7083	  base = base_plus;
7084	  offset = lo;
7085	}
7086    }
7087
7088  if (BYTES_BIG_ENDIAN)
7089    {
7090      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
7091					 plus_constant (base, offset + 1)),
7092			    gen_lowpart (QImode, outval)));
7093      emit_insn (gen_lshrsi3 (scratch,
7094			      gen_rtx_SUBREG (SImode, outval, 0),
7095			      GEN_INT (8)));
7096      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
7097			    gen_lowpart (QImode, scratch)));
7098    }
7099  else
7100    {
7101      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
7102			    gen_lowpart (QImode, outval)));
7103      emit_insn (gen_lshrsi3 (scratch,
7104			      gen_rtx_SUBREG (SImode, outval, 0),
7105			      GEN_INT (8)));
7106      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
7107					 plus_constant (base, offset + 1)),
7108			    gen_lowpart (QImode, scratch)));
7109    }
7110}
7111
7112/* Return true if a type must be passed in memory. For AAPCS, small aggregates
7113   (padded to the size of a word) should be passed in a register.  */
7114
7115static bool
7116arm_must_pass_in_stack (enum machine_mode mode, tree type)
7117{
7118  if (TARGET_AAPCS_BASED)
7119    return must_pass_in_stack_var_size (mode, type);
7120  else
7121    return must_pass_in_stack_var_size_or_pad (mode, type);
7122}
7123
7124
7125/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
7126   Return true if an argument passed on the stack should be padded upwards,
7127   i.e. if the least-significant byte has useful data.
7128   For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
7129   aggregate types are placed in the lowest memory address.  */
7130
7131bool
7132arm_pad_arg_upward (enum machine_mode mode, tree type)
7133{
7134  if (!TARGET_AAPCS_BASED)
7135    return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
7136
7137  if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
7138    return false;
7139
7140  return true;
7141}
7142
7143
7144/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
7145   For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
7146   byte of the register has useful data, and return the opposite if the
7147   most significant byte does.
7148   For AAPCS, small aggregates and small complex types are always padded
7149   upwards.  */
7150
7151bool
7152arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
7153                    tree type, int first ATTRIBUTE_UNUSED)
7154{
7155  if (TARGET_AAPCS_BASED
7156      && BYTES_BIG_ENDIAN
7157      && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
7158      && int_size_in_bytes (type) <= 4)
7159    return true;
7160
7161  /* Otherwise, use default padding.  */
7162  return !BYTES_BIG_ENDIAN;
7163}
7164
7165
7166/* Print a symbolic form of X to the debug file, F.  */
7167static void
7168arm_print_value (FILE *f, rtx x)
7169{
7170  switch (GET_CODE (x))
7171    {
7172    case CONST_INT:
7173      fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
7174      return;
7175
7176    case CONST_DOUBLE:
7177      fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
7178      return;
7179
7180    case CONST_VECTOR:
7181      {
7182	int i;
7183
7184	fprintf (f, "<");
7185	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
7186	  {
7187	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
7188	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
7189	      fputc (',', f);
7190	  }
7191	fprintf (f, ">");
7192      }
7193      return;
7194
7195    case CONST_STRING:
7196      fprintf (f, "\"%s\"", XSTR (x, 0));
7197      return;
7198
7199    case SYMBOL_REF:
7200      fprintf (f, "`%s'", XSTR (x, 0));
7201      return;
7202
7203    case LABEL_REF:
7204      fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
7205      return;
7206
7207    case CONST:
7208      arm_print_value (f, XEXP (x, 0));
7209      return;
7210
7211    case PLUS:
7212      arm_print_value (f, XEXP (x, 0));
7213      fprintf (f, "+");
7214      arm_print_value (f, XEXP (x, 1));
7215      return;
7216
7217    case PC:
7218      fprintf (f, "pc");
7219      return;
7220
7221    default:
7222      fprintf (f, "????");
7223      return;
7224    }
7225}
7226
7227/* Routines for manipulation of the constant pool.  */
7228
7229/* Arm instructions cannot load a large constant directly into a
7230   register; they have to come from a pc relative load.  The constant
7231   must therefore be placed in the addressable range of the pc
7232   relative load.  Depending on the precise pc relative load
7233   instruction the range is somewhere between 256 bytes and 4k.  This
7234   means that we often have to dump a constant inside a function, and
7235   generate code to branch around it.
7236
7237   It is important to minimize this, since the branches will slow
7238   things down and make the code larger.
7239
7240   Normally we can hide the table after an existing unconditional
7241   branch so that there is no interruption of the flow, but in the
7242   worst case the code looks like this:
7243
7244	ldr	rn, L1
7245	...
7246	b	L2
7247	align
7248	L1:	.long value
7249	L2:
7250	...
7251
7252	ldr	rn, L3
7253	...
7254	b	L4
7255	align
7256	L3:	.long value
7257	L4:
7258	...
7259
7260   We fix this by performing a scan after scheduling, which notices
7261   which instructions need to have their operands fetched from the
7262   constant table and builds the table.
7263
7264   The algorithm starts by building a table of all the constants that
7265   need fixing up and all the natural barriers in the function (places
7266   where a constant table can be dropped without breaking the flow).
7267   For each fixup we note how far the pc-relative replacement will be
7268   able to reach and the offset of the instruction into the function.
7269
7270   Having built the table we then group the fixes together to form
7271   tables that are as large as possible (subject to addressing
7272   constraints) and emit each table of constants after the last
7273   barrier that is within range of all the instructions in the group.
7274   If a group does not contain a barrier, then we forcibly create one
7275   by inserting a jump instruction into the flow.  Once the table has
7276   been inserted, the insns are then modified to reference the
7277   relevant entry in the pool.
7278
7279   Possible enhancements to the algorithm (not implemented) are:
7280
7281   1) For some processors and object formats, there may be benefit in
7282   aligning the pools to the start of cache lines; this alignment
7283   would need to be taken into account when calculating addressability
7284   of a pool.  */
7285
7286/* These typedefs are located at the start of this file, so that
7287   they can be used in the prototypes there.  This comment is to
7288   remind readers of that fact so that the following structures
7289   can be understood more easily.
7290
7291     typedef struct minipool_node    Mnode;
7292     typedef struct minipool_fixup   Mfix;  */
7293
7294struct minipool_node
7295{
7296  /* Doubly linked chain of entries.  */
7297  Mnode * next;
7298  Mnode * prev;
7299  /* The maximum offset into the code that this entry can be placed.  While
7300     pushing fixes for forward references, all entries are sorted in order
7301     of increasing max_address.  */
7302  HOST_WIDE_INT max_address;
7303  /* Similarly for an entry inserted for a backwards ref.  */
7304  HOST_WIDE_INT min_address;
7305  /* The number of fixes referencing this entry.  This can become zero
7306     if we "unpush" an entry.  In this case we ignore the entry when we
7307     come to emit the code.  */
7308  int refcount;
7309  /* The offset from the start of the minipool.  */
7310  HOST_WIDE_INT offset;
7311  /* The value in table.  */
7312  rtx value;
7313  /* The mode of value.  */
7314  enum machine_mode mode;
7315  /* The size of the value.  With iWMMXt enabled
7316     sizes > 4 also imply an alignment of 8-bytes.  */
7317  int fix_size;
7318};
7319
7320struct minipool_fixup
7321{
7322  Mfix *            next;
7323  rtx               insn;
7324  HOST_WIDE_INT     address;
7325  rtx *             loc;
7326  enum machine_mode mode;
7327  int               fix_size;
7328  rtx               value;
7329  Mnode *           minipool;
7330  HOST_WIDE_INT     forwards;
7331  HOST_WIDE_INT     backwards;
7332};
7333
7334/* Fixes less than a word need padding out to a word boundary.  */
7335#define MINIPOOL_FIX_SIZE(mode) \
7336  (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
7337
7338static Mnode *	minipool_vector_head;
7339static Mnode *	minipool_vector_tail;
7340static rtx	minipool_vector_label;
7341static int	minipool_pad;
7342
7343/* The linked list of all minipool fixes required for this function.  */
7344Mfix * 		minipool_fix_head;
7345Mfix * 		minipool_fix_tail;
7346/* The fix entry for the current minipool, once it has been placed.  */
7347Mfix *		minipool_barrier;
7348
7349/* Determines if INSN is the start of a jump table.  Returns the end
7350   of the TABLE or NULL_RTX.  */
7351static rtx
7352is_jump_table (rtx insn)
7353{
7354  rtx table;
7355
7356  if (GET_CODE (insn) == JUMP_INSN
7357      && JUMP_LABEL (insn) != NULL
7358      && ((table = next_real_insn (JUMP_LABEL (insn)))
7359	  == next_real_insn (insn))
7360      && table != NULL
7361      && GET_CODE (table) == JUMP_INSN
7362      && (GET_CODE (PATTERN (table)) == ADDR_VEC
7363	  || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
7364    return table;
7365
7366  return NULL_RTX;
7367}
7368
7369#ifndef JUMP_TABLES_IN_TEXT_SECTION
7370#define JUMP_TABLES_IN_TEXT_SECTION 0
7371#endif
7372
7373static HOST_WIDE_INT
7374get_jump_table_size (rtx insn)
7375{
7376  /* ADDR_VECs only take room if read-only data does into the text
7377     section.  */
7378  if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
7379    {
7380      rtx body = PATTERN (insn);
7381      int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
7382
7383      return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, elt);
7384    }
7385
7386  return 0;
7387}
7388
7389/* Move a minipool fix MP from its current location to before MAX_MP.
7390   If MAX_MP is NULL, then MP doesn't need moving, but the addressing
7391   constraints may need updating.  */
7392static Mnode *
7393move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
7394			       HOST_WIDE_INT max_address)
7395{
7396  /* The code below assumes these are different.  */
7397  gcc_assert (mp != max_mp);
7398
7399  if (max_mp == NULL)
7400    {
7401      if (max_address < mp->max_address)
7402	mp->max_address = max_address;
7403    }
7404  else
7405    {
7406      if (max_address > max_mp->max_address - mp->fix_size)
7407	mp->max_address = max_mp->max_address - mp->fix_size;
7408      else
7409	mp->max_address = max_address;
7410
7411      /* Unlink MP from its current position.  Since max_mp is non-null,
7412       mp->prev must be non-null.  */
7413      mp->prev->next = mp->next;
7414      if (mp->next != NULL)
7415	mp->next->prev = mp->prev;
7416      else
7417	minipool_vector_tail = mp->prev;
7418
7419      /* Re-insert it before MAX_MP.  */
7420      mp->next = max_mp;
7421      mp->prev = max_mp->prev;
7422      max_mp->prev = mp;
7423
7424      if (mp->prev != NULL)
7425	mp->prev->next = mp;
7426      else
7427	minipool_vector_head = mp;
7428    }
7429
7430  /* Save the new entry.  */
7431  max_mp = mp;
7432
7433  /* Scan over the preceding entries and adjust their addresses as
7434     required.  */
7435  while (mp->prev != NULL
7436	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
7437    {
7438      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
7439      mp = mp->prev;
7440    }
7441
7442  return max_mp;
7443}
7444
7445/* Add a constant to the minipool for a forward reference.  Returns the
7446   node added or NULL if the constant will not fit in this pool.  */
7447static Mnode *
7448add_minipool_forward_ref (Mfix *fix)
7449{
7450  /* If set, max_mp is the first pool_entry that has a lower
7451     constraint than the one we are trying to add.  */
7452  Mnode *       max_mp = NULL;
7453  HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
7454  Mnode *       mp;
7455
7456  /* If the minipool starts before the end of FIX->INSN then this FIX
7457     can not be placed into the current pool.  Furthermore, adding the
7458     new constant pool entry may cause the pool to start FIX_SIZE bytes
7459     earlier.  */
7460  if (minipool_vector_head &&
7461      (fix->address + get_attr_length (fix->insn)
7462       >= minipool_vector_head->max_address - fix->fix_size))
7463    return NULL;
7464
7465  /* Scan the pool to see if a constant with the same value has
7466     already been added.  While we are doing this, also note the
7467     location where we must insert the constant if it doesn't already
7468     exist.  */
7469  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7470    {
7471      if (GET_CODE (fix->value) == GET_CODE (mp->value)
7472	  && fix->mode == mp->mode
7473	  && (GET_CODE (fix->value) != CODE_LABEL
7474	      || (CODE_LABEL_NUMBER (fix->value)
7475		  == CODE_LABEL_NUMBER (mp->value)))
7476	  && rtx_equal_p (fix->value, mp->value))
7477	{
7478	  /* More than one fix references this entry.  */
7479	  mp->refcount++;
7480	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
7481	}
7482
7483      /* Note the insertion point if necessary.  */
7484      if (max_mp == NULL
7485	  && mp->max_address > max_address)
7486	max_mp = mp;
7487
7488      /* If we are inserting an 8-bytes aligned quantity and
7489	 we have not already found an insertion point, then
7490	 make sure that all such 8-byte aligned quantities are
7491	 placed at the start of the pool.  */
7492      if (ARM_DOUBLEWORD_ALIGN
7493	  && max_mp == NULL
7494	  && fix->fix_size == 8
7495	  && mp->fix_size != 8)
7496	{
7497	  max_mp = mp;
7498	  max_address = mp->max_address;
7499	}
7500    }
7501
7502  /* The value is not currently in the minipool, so we need to create
7503     a new entry for it.  If MAX_MP is NULL, the entry will be put on
7504     the end of the list since the placement is less constrained than
7505     any existing entry.  Otherwise, we insert the new fix before
7506     MAX_MP and, if necessary, adjust the constraints on the other
7507     entries.  */
7508  mp = XNEW (Mnode);
7509  mp->fix_size = fix->fix_size;
7510  mp->mode = fix->mode;
7511  mp->value = fix->value;
7512  mp->refcount = 1;
7513  /* Not yet required for a backwards ref.  */
7514  mp->min_address = -65536;
7515
7516  if (max_mp == NULL)
7517    {
7518      mp->max_address = max_address;
7519      mp->next = NULL;
7520      mp->prev = minipool_vector_tail;
7521
7522      if (mp->prev == NULL)
7523	{
7524	  minipool_vector_head = mp;
7525	  minipool_vector_label = gen_label_rtx ();
7526	}
7527      else
7528	mp->prev->next = mp;
7529
7530      minipool_vector_tail = mp;
7531    }
7532  else
7533    {
7534      if (max_address > max_mp->max_address - mp->fix_size)
7535	mp->max_address = max_mp->max_address - mp->fix_size;
7536      else
7537	mp->max_address = max_address;
7538
7539      mp->next = max_mp;
7540      mp->prev = max_mp->prev;
7541      max_mp->prev = mp;
7542      if (mp->prev != NULL)
7543	mp->prev->next = mp;
7544      else
7545	minipool_vector_head = mp;
7546    }
7547
7548  /* Save the new entry.  */
7549  max_mp = mp;
7550
7551  /* Scan over the preceding entries and adjust their addresses as
7552     required.  */
7553  while (mp->prev != NULL
7554	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
7555    {
7556      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
7557      mp = mp->prev;
7558    }
7559
7560  return max_mp;
7561}
7562
7563static Mnode *
7564move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
7565				HOST_WIDE_INT  min_address)
7566{
7567  HOST_WIDE_INT offset;
7568
7569  /* The code below assumes these are different.  */
7570  gcc_assert (mp != min_mp);
7571
7572  if (min_mp == NULL)
7573    {
7574      if (min_address > mp->min_address)
7575	mp->min_address = min_address;
7576    }
7577  else
7578    {
7579      /* We will adjust this below if it is too loose.  */
7580      mp->min_address = min_address;
7581
7582      /* Unlink MP from its current position.  Since min_mp is non-null,
7583	 mp->next must be non-null.  */
7584      mp->next->prev = mp->prev;
7585      if (mp->prev != NULL)
7586	mp->prev->next = mp->next;
7587      else
7588	minipool_vector_head = mp->next;
7589
7590      /* Reinsert it after MIN_MP.  */
7591      mp->prev = min_mp;
7592      mp->next = min_mp->next;
7593      min_mp->next = mp;
7594      if (mp->next != NULL)
7595	mp->next->prev = mp;
7596      else
7597	minipool_vector_tail = mp;
7598    }
7599
7600  min_mp = mp;
7601
7602  offset = 0;
7603  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7604    {
7605      mp->offset = offset;
7606      if (mp->refcount > 0)
7607	offset += mp->fix_size;
7608
7609      if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
7610	mp->next->min_address = mp->min_address + mp->fix_size;
7611    }
7612
7613  return min_mp;
7614}
7615
7616/* Add a constant to the minipool for a backward reference.  Returns the
7617   node added or NULL if the constant will not fit in this pool.
7618
7619   Note that the code for insertion for a backwards reference can be
7620   somewhat confusing because the calculated offsets for each fix do
7621   not take into account the size of the pool (which is still under
7622   construction.  */
7623static Mnode *
7624add_minipool_backward_ref (Mfix *fix)
7625{
7626  /* If set, min_mp is the last pool_entry that has a lower constraint
7627     than the one we are trying to add.  */
7628  Mnode *min_mp = NULL;
7629  /* This can be negative, since it is only a constraint.  */
7630  HOST_WIDE_INT  min_address = fix->address - fix->backwards;
7631  Mnode *mp;
7632
7633  /* If we can't reach the current pool from this insn, or if we can't
7634     insert this entry at the end of the pool without pushing other
7635     fixes out of range, then we don't try.  This ensures that we
7636     can't fail later on.  */
7637  if (min_address >= minipool_barrier->address
7638      || (minipool_vector_tail->min_address + fix->fix_size
7639	  >= minipool_barrier->address))
7640    return NULL;
7641
7642  /* Scan the pool to see if a constant with the same value has
7643     already been added.  While we are doing this, also note the
7644     location where we must insert the constant if it doesn't already
7645     exist.  */
7646  for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
7647    {
7648      if (GET_CODE (fix->value) == GET_CODE (mp->value)
7649	  && fix->mode == mp->mode
7650	  && (GET_CODE (fix->value) != CODE_LABEL
7651	      || (CODE_LABEL_NUMBER (fix->value)
7652		  == CODE_LABEL_NUMBER (mp->value)))
7653	  && rtx_equal_p (fix->value, mp->value)
7654	  /* Check that there is enough slack to move this entry to the
7655	     end of the table (this is conservative).  */
7656	  && (mp->max_address
7657	      > (minipool_barrier->address
7658		 + minipool_vector_tail->offset
7659		 + minipool_vector_tail->fix_size)))
7660	{
7661	  mp->refcount++;
7662	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
7663	}
7664
7665      if (min_mp != NULL)
7666	mp->min_address += fix->fix_size;
7667      else
7668	{
7669	  /* Note the insertion point if necessary.  */
7670	  if (mp->min_address < min_address)
7671	    {
7672	      /* For now, we do not allow the insertion of 8-byte alignment
7673		 requiring nodes anywhere but at the start of the pool.  */
7674	      if (ARM_DOUBLEWORD_ALIGN
7675		  && fix->fix_size == 8 && mp->fix_size != 8)
7676		return NULL;
7677	      else
7678		min_mp = mp;
7679	    }
7680	  else if (mp->max_address
7681		   < minipool_barrier->address + mp->offset + fix->fix_size)
7682	    {
7683	      /* Inserting before this entry would push the fix beyond
7684		 its maximum address (which can happen if we have
7685		 re-located a forwards fix); force the new fix to come
7686		 after it.  */
7687	      min_mp = mp;
7688	      min_address = mp->min_address + fix->fix_size;
7689	    }
7690	  /* If we are inserting an 8-bytes aligned quantity and
7691	     we have not already found an insertion point, then
7692	     make sure that all such 8-byte aligned quantities are
7693	     placed at the start of the pool.  */
7694	  else if (ARM_DOUBLEWORD_ALIGN
7695		   && min_mp == NULL
7696		   && fix->fix_size == 8
7697		   && mp->fix_size < 8)
7698	    {
7699	      min_mp = mp;
7700	      min_address = mp->min_address + fix->fix_size;
7701	    }
7702	}
7703    }
7704
7705  /* We need to create a new entry.  */
7706  mp = XNEW (Mnode);
7707  mp->fix_size = fix->fix_size;
7708  mp->mode = fix->mode;
7709  mp->value = fix->value;
7710  mp->refcount = 1;
7711  mp->max_address = minipool_barrier->address + 65536;
7712
7713  mp->min_address = min_address;
7714
7715  if (min_mp == NULL)
7716    {
7717      mp->prev = NULL;
7718      mp->next = minipool_vector_head;
7719
7720      if (mp->next == NULL)
7721	{
7722	  minipool_vector_tail = mp;
7723	  minipool_vector_label = gen_label_rtx ();
7724	}
7725      else
7726	mp->next->prev = mp;
7727
7728      minipool_vector_head = mp;
7729    }
7730  else
7731    {
7732      mp->next = min_mp->next;
7733      mp->prev = min_mp;
7734      min_mp->next = mp;
7735
7736      if (mp->next != NULL)
7737	mp->next->prev = mp;
7738      else
7739	minipool_vector_tail = mp;
7740    }
7741
7742  /* Save the new entry.  */
7743  min_mp = mp;
7744
7745  if (mp->prev)
7746    mp = mp->prev;
7747  else
7748    mp->offset = 0;
7749
7750  /* Scan over the following entries and adjust their offsets.  */
7751  while (mp->next != NULL)
7752    {
7753      if (mp->next->min_address < mp->min_address + mp->fix_size)
7754	mp->next->min_address = mp->min_address + mp->fix_size;
7755
7756      if (mp->refcount)
7757	mp->next->offset = mp->offset + mp->fix_size;
7758      else
7759	mp->next->offset = mp->offset;
7760
7761      mp = mp->next;
7762    }
7763
7764  return min_mp;
7765}
7766
7767static void
7768assign_minipool_offsets (Mfix *barrier)
7769{
7770  HOST_WIDE_INT offset = 0;
7771  Mnode *mp;
7772
7773  minipool_barrier = barrier;
7774
7775  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7776    {
7777      mp->offset = offset;
7778
7779      if (mp->refcount > 0)
7780	offset += mp->fix_size;
7781    }
7782}
7783
7784/* Output the literal table */
7785static void
7786dump_minipool (rtx scan)
7787{
7788  Mnode * mp;
7789  Mnode * nmp;
7790  int align64 = 0;
7791
7792  if (ARM_DOUBLEWORD_ALIGN)
7793    for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7794      if (mp->refcount > 0 && mp->fix_size == 8)
7795	{
7796	  align64 = 1;
7797	  break;
7798	}
7799
7800  if (dump_file)
7801    fprintf (dump_file,
7802	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
7803	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
7804
7805  scan = emit_label_after (gen_label_rtx (), scan);
7806  scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
7807  scan = emit_label_after (minipool_vector_label, scan);
7808
7809  for (mp = minipool_vector_head; mp != NULL; mp = nmp)
7810    {
7811      if (mp->refcount > 0)
7812	{
7813	  if (dump_file)
7814	    {
7815	      fprintf (dump_file,
7816		       ";;  Offset %u, min %ld, max %ld ",
7817		       (unsigned) mp->offset, (unsigned long) mp->min_address,
7818		       (unsigned long) mp->max_address);
7819	      arm_print_value (dump_file, mp->value);
7820	      fputc ('\n', dump_file);
7821	    }
7822
7823	  switch (mp->fix_size)
7824	    {
7825#ifdef HAVE_consttable_1
7826	    case 1:
7827	      scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
7828	      break;
7829
7830#endif
7831#ifdef HAVE_consttable_2
7832	    case 2:
7833	      scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
7834	      break;
7835
7836#endif
7837#ifdef HAVE_consttable_4
7838	    case 4:
7839	      scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
7840	      break;
7841
7842#endif
7843#ifdef HAVE_consttable_8
7844	    case 8:
7845	      scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
7846	      break;
7847
7848#endif
7849	    default:
7850	      gcc_unreachable ();
7851	    }
7852	}
7853
7854      nmp = mp->next;
7855      free (mp);
7856    }
7857
7858  minipool_vector_head = minipool_vector_tail = NULL;
7859  scan = emit_insn_after (gen_consttable_end (), scan);
7860  scan = emit_barrier_after (scan);
7861}
7862
7863/* Return the cost of forcibly inserting a barrier after INSN.  */
7864static int
7865arm_barrier_cost (rtx insn)
7866{
7867  /* Basing the location of the pool on the loop depth is preferable,
7868     but at the moment, the basic block information seems to be
7869     corrupt by this stage of the compilation.  */
7870  int base_cost = 50;
7871  rtx next = next_nonnote_insn (insn);
7872
7873  if (next != NULL && GET_CODE (next) == CODE_LABEL)
7874    base_cost -= 20;
7875
7876  switch (GET_CODE (insn))
7877    {
7878    case CODE_LABEL:
7879      /* It will always be better to place the table before the label, rather
7880	 than after it.  */
7881      return 50;
7882
7883    case INSN:
7884    case CALL_INSN:
7885      return base_cost;
7886
7887    case JUMP_INSN:
7888      return base_cost - 10;
7889
7890    default:
7891      return base_cost + 10;
7892    }
7893}
7894
7895/* Find the best place in the insn stream in the range
7896   (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
7897   Create the barrier by inserting a jump and add a new fix entry for
7898   it.  */
7899static Mfix *
7900create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
7901{
7902  HOST_WIDE_INT count = 0;
7903  rtx barrier;
7904  rtx from = fix->insn;
7905  /* The instruction after which we will insert the jump.  */
7906  rtx selected = NULL;
7907  int selected_cost;
7908  /* The address at which the jump instruction will be placed.  */
7909  HOST_WIDE_INT selected_address;
7910  Mfix * new_fix;
7911  HOST_WIDE_INT max_count = max_address - fix->address;
7912  rtx label = gen_label_rtx ();
7913
7914  selected_cost = arm_barrier_cost (from);
7915  selected_address = fix->address;
7916
7917  while (from && count < max_count)
7918    {
7919      rtx tmp;
7920      int new_cost;
7921
7922      /* This code shouldn't have been called if there was a natural barrier
7923	 within range.  */
7924      gcc_assert (GET_CODE (from) != BARRIER);
7925
7926      /* Count the length of this insn.  */
7927      count += get_attr_length (from);
7928
7929      /* If there is a jump table, add its length.  */
7930      tmp = is_jump_table (from);
7931      if (tmp != NULL)
7932	{
7933	  count += get_jump_table_size (tmp);
7934
7935	  /* Jump tables aren't in a basic block, so base the cost on
7936	     the dispatch insn.  If we select this location, we will
7937	     still put the pool after the table.  */
7938	  new_cost = arm_barrier_cost (from);
7939
7940	  if (count < max_count
7941	      && (!selected || new_cost <= selected_cost))
7942	    {
7943	      selected = tmp;
7944	      selected_cost = new_cost;
7945	      selected_address = fix->address + count;
7946	    }
7947
7948	  /* Continue after the dispatch table.  */
7949	  from = NEXT_INSN (tmp);
7950	  continue;
7951	}
7952
7953      new_cost = arm_barrier_cost (from);
7954
7955      if (count < max_count
7956	  && (!selected || new_cost <= selected_cost))
7957	{
7958	  selected = from;
7959	  selected_cost = new_cost;
7960	  selected_address = fix->address + count;
7961	}
7962
7963      from = NEXT_INSN (from);
7964    }
7965
7966  /* Make sure that we found a place to insert the jump.  */
7967  gcc_assert (selected);
7968
7969  /* Create a new JUMP_INSN that branches around a barrier.  */
7970  from = emit_jump_insn_after (gen_jump (label), selected);
7971  JUMP_LABEL (from) = label;
7972  barrier = emit_barrier_after (from);
7973  emit_label_after (label, barrier);
7974
7975  /* Create a minipool barrier entry for the new barrier.  */
7976  new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
7977  new_fix->insn = barrier;
7978  new_fix->address = selected_address;
7979  new_fix->next = fix->next;
7980  fix->next = new_fix;
7981
7982  return new_fix;
7983}
7984
7985/* Record that there is a natural barrier in the insn stream at
7986   ADDRESS.  */
7987static void
7988push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
7989{
7990  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7991
7992  fix->insn = insn;
7993  fix->address = address;
7994
7995  fix->next = NULL;
7996  if (minipool_fix_head != NULL)
7997    minipool_fix_tail->next = fix;
7998  else
7999    minipool_fix_head = fix;
8000
8001  minipool_fix_tail = fix;
8002}
8003
8004/* Record INSN, which will need fixing up to load a value from the
8005   minipool.  ADDRESS is the offset of the insn since the start of the
8006   function; LOC is a pointer to the part of the insn which requires
8007   fixing; VALUE is the constant that must be loaded, which is of type
8008   MODE.  */
8009static void
8010push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
8011		   enum machine_mode mode, rtx value)
8012{
8013  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
8014
8015#ifdef AOF_ASSEMBLER
8016  /* PIC symbol references need to be converted into offsets into the
8017     based area.  */
8018  /* XXX This shouldn't be done here.  */
8019  if (flag_pic && GET_CODE (value) == SYMBOL_REF)
8020    value = aof_pic_entry (value);
8021#endif /* AOF_ASSEMBLER */
8022
8023  fix->insn = insn;
8024  fix->address = address;
8025  fix->loc = loc;
8026  fix->mode = mode;
8027  fix->fix_size = MINIPOOL_FIX_SIZE (mode);
8028  fix->value = value;
8029  fix->forwards = get_attr_pool_range (insn);
8030  fix->backwards = get_attr_neg_pool_range (insn);
8031  fix->minipool = NULL;
8032
8033  /* If an insn doesn't have a range defined for it, then it isn't
8034     expecting to be reworked by this code.  Better to stop now than
8035     to generate duff assembly code.  */
8036  gcc_assert (fix->forwards || fix->backwards);
8037
8038  /* If an entry requires 8-byte alignment then assume all constant pools
8039     require 4 bytes of padding.  Trying to do this later on a per-pool
8040     basis is awkward because existing pool entries have to be modified.  */
8041  if (ARM_DOUBLEWORD_ALIGN && fix->fix_size == 8)
8042    minipool_pad = 4;
8043
8044  if (dump_file)
8045    {
8046      fprintf (dump_file,
8047	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
8048	       GET_MODE_NAME (mode),
8049	       INSN_UID (insn), (unsigned long) address,
8050	       -1 * (long)fix->backwards, (long)fix->forwards);
8051      arm_print_value (dump_file, fix->value);
8052      fprintf (dump_file, "\n");
8053    }
8054
8055  /* Add it to the chain of fixes.  */
8056  fix->next = NULL;
8057
8058  if (minipool_fix_head != NULL)
8059    minipool_fix_tail->next = fix;
8060  else
8061    minipool_fix_head = fix;
8062
8063  minipool_fix_tail = fix;
8064}
8065
8066/* Return the cost of synthesizing a 64-bit constant VAL inline.
8067   Returns the number of insns needed, or 99 if we don't know how to
8068   do it.  */
8069int
8070arm_const_double_inline_cost (rtx val)
8071{
8072  rtx lowpart, highpart;
8073  enum machine_mode mode;
8074
8075  mode = GET_MODE (val);
8076
8077  if (mode == VOIDmode)
8078    mode = DImode;
8079
8080  gcc_assert (GET_MODE_SIZE (mode) == 8);
8081
8082  lowpart = gen_lowpart (SImode, val);
8083  highpart = gen_highpart_mode (SImode, mode, val);
8084
8085  gcc_assert (GET_CODE (lowpart) == CONST_INT);
8086  gcc_assert (GET_CODE (highpart) == CONST_INT);
8087
8088  return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
8089			    NULL_RTX, NULL_RTX, 0, 0)
8090	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
8091			      NULL_RTX, NULL_RTX, 0, 0));
8092}
8093
8094/* Return true if it is worthwhile to split a 64-bit constant into two
8095   32-bit operations.  This is the case if optimizing for size, or
8096   if we have load delay slots, or if one 32-bit part can be done with
8097   a single data operation.  */
8098bool
8099arm_const_double_by_parts (rtx val)
8100{
8101  enum machine_mode mode = GET_MODE (val);
8102  rtx part;
8103
8104  if (optimize_size || arm_ld_sched)
8105    return true;
8106
8107  if (mode == VOIDmode)
8108    mode = DImode;
8109
8110  part = gen_highpart_mode (SImode, mode, val);
8111
8112  gcc_assert (GET_CODE (part) == CONST_INT);
8113
8114  if (const_ok_for_arm (INTVAL (part))
8115      || const_ok_for_arm (~INTVAL (part)))
8116    return true;
8117
8118  part = gen_lowpart (SImode, val);
8119
8120  gcc_assert (GET_CODE (part) == CONST_INT);
8121
8122  if (const_ok_for_arm (INTVAL (part))
8123      || const_ok_for_arm (~INTVAL (part)))
8124    return true;
8125
8126  return false;
8127}
8128
8129/* Scan INSN and note any of its operands that need fixing.
8130   If DO_PUSHES is false we do not actually push any of the fixups
8131   needed.  The function returns TRUE if any fixups were needed/pushed.
8132   This is used by arm_memory_load_p() which needs to know about loads
8133   of constants that will be converted into minipool loads.  */
8134static bool
8135note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
8136{
8137  bool result = false;
8138  int opno;
8139
8140  extract_insn (insn);
8141
8142  if (!constrain_operands (1))
8143    fatal_insn_not_found (insn);
8144
8145  if (recog_data.n_alternatives == 0)
8146    return false;
8147
8148  /* Fill in recog_op_alt with information about the constraints of
8149     this insn.  */
8150  preprocess_constraints ();
8151
8152  for (opno = 0; opno < recog_data.n_operands; opno++)
8153    {
8154      /* Things we need to fix can only occur in inputs.  */
8155      if (recog_data.operand_type[opno] != OP_IN)
8156	continue;
8157
8158      /* If this alternative is a memory reference, then any mention
8159	 of constants in this alternative is really to fool reload
8160	 into allowing us to accept one there.  We need to fix them up
8161	 now so that we output the right code.  */
8162      if (recog_op_alt[opno][which_alternative].memory_ok)
8163	{
8164	  rtx op = recog_data.operand[opno];
8165
8166	  if (CONSTANT_P (op))
8167	    {
8168	      if (do_pushes)
8169		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
8170				   recog_data.operand_mode[opno], op);
8171	      result = true;
8172	    }
8173	  else if (GET_CODE (op) == MEM
8174		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
8175		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
8176	    {
8177	      if (do_pushes)
8178		{
8179		  rtx cop = avoid_constant_pool_reference (op);
8180
8181		  /* Casting the address of something to a mode narrower
8182		     than a word can cause avoid_constant_pool_reference()
8183		     to return the pool reference itself.  That's no good to
8184		     us here.  Lets just hope that we can use the
8185		     constant pool value directly.  */
8186		  if (op == cop)
8187		    cop = get_pool_constant (XEXP (op, 0));
8188
8189		  push_minipool_fix (insn, address,
8190				     recog_data.operand_loc[opno],
8191				     recog_data.operand_mode[opno], cop);
8192		}
8193
8194	      result = true;
8195	    }
8196	}
8197    }
8198
8199  return result;
8200}
8201
8202/* Gcc puts the pool in the wrong place for ARM, since we can only
8203   load addresses a limited distance around the pc.  We do some
8204   special munging to move the constant pool values to the correct
8205   point in the code.  */
8206static void
8207arm_reorg (void)
8208{
8209  rtx insn;
8210  HOST_WIDE_INT address = 0;
8211  Mfix * fix;
8212
8213  minipool_fix_head = minipool_fix_tail = NULL;
8214
8215  /* The first insn must always be a note, or the code below won't
8216     scan it properly.  */
8217  insn = get_insns ();
8218  gcc_assert (GET_CODE (insn) == NOTE);
8219  minipool_pad = 0;
8220
8221  /* Scan all the insns and record the operands that will need fixing.  */
8222  for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
8223    {
8224      if (TARGET_CIRRUS_FIX_INVALID_INSNS
8225          && (arm_cirrus_insn_p (insn)
8226	      || GET_CODE (insn) == JUMP_INSN
8227	      || arm_memory_load_p (insn)))
8228	cirrus_reorg (insn);
8229
8230      if (GET_CODE (insn) == BARRIER)
8231	push_minipool_barrier (insn, address);
8232      else if (INSN_P (insn))
8233	{
8234	  rtx table;
8235
8236	  note_invalid_constants (insn, address, true);
8237	  address += get_attr_length (insn);
8238
8239	  /* If the insn is a vector jump, add the size of the table
8240	     and skip the table.  */
8241	  if ((table = is_jump_table (insn)) != NULL)
8242	    {
8243	      address += get_jump_table_size (table);
8244	      insn = table;
8245	    }
8246	}
8247    }
8248
8249  fix = minipool_fix_head;
8250
8251  /* Now scan the fixups and perform the required changes.  */
8252  while (fix)
8253    {
8254      Mfix * ftmp;
8255      Mfix * fdel;
8256      Mfix *  last_added_fix;
8257      Mfix * last_barrier = NULL;
8258      Mfix * this_fix;
8259
8260      /* Skip any further barriers before the next fix.  */
8261      while (fix && GET_CODE (fix->insn) == BARRIER)
8262	fix = fix->next;
8263
8264      /* No more fixes.  */
8265      if (fix == NULL)
8266	break;
8267
8268      last_added_fix = NULL;
8269
8270      for (ftmp = fix; ftmp; ftmp = ftmp->next)
8271	{
8272	  if (GET_CODE (ftmp->insn) == BARRIER)
8273	    {
8274	      if (ftmp->address >= minipool_vector_head->max_address)
8275		break;
8276
8277	      last_barrier = ftmp;
8278	    }
8279	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
8280	    break;
8281
8282	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
8283	}
8284
8285      /* If we found a barrier, drop back to that; any fixes that we
8286	 could have reached but come after the barrier will now go in
8287	 the next mini-pool.  */
8288      if (last_barrier != NULL)
8289	{
8290	  /* Reduce the refcount for those fixes that won't go into this
8291	     pool after all.  */
8292	  for (fdel = last_barrier->next;
8293	       fdel && fdel != ftmp;
8294	       fdel = fdel->next)
8295	    {
8296	      fdel->minipool->refcount--;
8297	      fdel->minipool = NULL;
8298	    }
8299
8300	  ftmp = last_barrier;
8301	}
8302      else
8303        {
8304	  /* ftmp is first fix that we can't fit into this pool and
8305	     there no natural barriers that we could use.  Insert a
8306	     new barrier in the code somewhere between the previous
8307	     fix and this one, and arrange to jump around it.  */
8308	  HOST_WIDE_INT max_address;
8309
8310	  /* The last item on the list of fixes must be a barrier, so
8311	     we can never run off the end of the list of fixes without
8312	     last_barrier being set.  */
8313	  gcc_assert (ftmp);
8314
8315	  max_address = minipool_vector_head->max_address;
8316	  /* Check that there isn't another fix that is in range that
8317	     we couldn't fit into this pool because the pool was
8318	     already too large: we need to put the pool before such an
8319	     instruction.  The pool itself may come just after the
8320	     fix because create_fix_barrier also allows space for a
8321	     jump instruction.  */
8322	  if (ftmp->address < max_address)
8323	    max_address = ftmp->address + 1;
8324
8325	  last_barrier = create_fix_barrier (last_added_fix, max_address);
8326	}
8327
8328      assign_minipool_offsets (last_barrier);
8329
8330      while (ftmp)
8331	{
8332	  if (GET_CODE (ftmp->insn) != BARRIER
8333	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
8334		  == NULL))
8335	    break;
8336
8337	  ftmp = ftmp->next;
8338	}
8339
8340      /* Scan over the fixes we have identified for this pool, fixing them
8341	 up and adding the constants to the pool itself.  */
8342      for (this_fix = fix; this_fix && ftmp != this_fix;
8343	   this_fix = this_fix->next)
8344	if (GET_CODE (this_fix->insn) != BARRIER)
8345	  {
8346	    rtx addr
8347	      = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
8348						  minipool_vector_label),
8349			       this_fix->minipool->offset);
8350	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
8351	  }
8352
8353      dump_minipool (last_barrier->insn);
8354      fix = ftmp;
8355    }
8356
8357  /* From now on we must synthesize any constants that we can't handle
8358     directly.  This can happen if the RTL gets split during final
8359     instruction generation.  */
8360  after_arm_reorg = 1;
8361
8362  /* Free the minipool memory.  */
8363  obstack_free (&minipool_obstack, minipool_startobj);
8364}
8365
8366/* Routines to output assembly language.  */
8367
8368/* If the rtx is the correct value then return the string of the number.
8369   In this way we can ensure that valid double constants are generated even
8370   when cross compiling.  */
8371const char *
8372fp_immediate_constant (rtx x)
8373{
8374  REAL_VALUE_TYPE r;
8375  int i;
8376
8377  if (!fp_consts_inited)
8378    init_fp_table ();
8379
8380  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8381  for (i = 0; i < 8; i++)
8382    if (REAL_VALUES_EQUAL (r, values_fp[i]))
8383      return strings_fp[i];
8384
8385  gcc_unreachable ();
8386}
8387
8388/* As for fp_immediate_constant, but value is passed directly, not in rtx.  */
8389static const char *
8390fp_const_from_val (REAL_VALUE_TYPE *r)
8391{
8392  int i;
8393
8394  if (!fp_consts_inited)
8395    init_fp_table ();
8396
8397  for (i = 0; i < 8; i++)
8398    if (REAL_VALUES_EQUAL (*r, values_fp[i]))
8399      return strings_fp[i];
8400
8401  gcc_unreachable ();
8402}
8403
8404/* Output the operands of a LDM/STM instruction to STREAM.
8405   MASK is the ARM register set mask of which only bits 0-15 are important.
8406   REG is the base register, either the frame pointer or the stack pointer,
8407   INSTR is the possibly suffixed load or store instruction.  */
8408
8409static void
8410print_multi_reg (FILE *stream, const char *instr, unsigned reg,
8411		 unsigned long mask)
8412{
8413  unsigned i;
8414  bool not_first = FALSE;
8415
8416  fputc ('\t', stream);
8417  asm_fprintf (stream, instr, reg);
8418  fputs (", {", stream);
8419
8420  for (i = 0; i <= LAST_ARM_REGNUM; i++)
8421    if (mask & (1 << i))
8422      {
8423	if (not_first)
8424	  fprintf (stream, ", ");
8425
8426	asm_fprintf (stream, "%r", i);
8427	not_first = TRUE;
8428      }
8429
8430  fprintf (stream, "}\n");
8431}
8432
8433
8434/* Output a FLDMX instruction to STREAM.
8435   BASE if the register containing the address.
8436   REG and COUNT specify the register range.
8437   Extra registers may be added to avoid hardware bugs.  */
8438
8439static void
8440arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
8441{
8442  int i;
8443
8444  /* Workaround ARM10 VFPr1 bug.  */
8445  if (count == 2 && !arm_arch6)
8446    {
8447      if (reg == 15)
8448	reg--;
8449      count++;
8450    }
8451
8452  fputc ('\t', stream);
8453  asm_fprintf (stream, "fldmfdx\t%r!, {", base);
8454
8455  for (i = reg; i < reg + count; i++)
8456    {
8457      if (i > reg)
8458	fputs (", ", stream);
8459      asm_fprintf (stream, "d%d", i);
8460    }
8461  fputs ("}\n", stream);
8462
8463}
8464
8465
8466/* Output the assembly for a store multiple.  */
8467
8468const char *
8469vfp_output_fstmx (rtx * operands)
8470{
8471  char pattern[100];
8472  int p;
8473  int base;
8474  int i;
8475
8476  strcpy (pattern, "fstmfdx\t%m0!, {%P1");
8477  p = strlen (pattern);
8478
8479  gcc_assert (GET_CODE (operands[1]) == REG);
8480
8481  base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
8482  for (i = 1; i < XVECLEN (operands[2], 0); i++)
8483    {
8484      p += sprintf (&pattern[p], ", d%d", base + i);
8485    }
8486  strcpy (&pattern[p], "}");
8487
8488  output_asm_insn (pattern, operands);
8489  return "";
8490}
8491
8492
8493/* Emit RTL to save block of VFP register pairs to the stack.  Returns the
8494   number of bytes pushed.  */
8495
8496static int
8497vfp_emit_fstmx (int base_reg, int count)
8498{
8499  rtx par;
8500  rtx dwarf;
8501  rtx tmp, reg;
8502  int i;
8503
8504  /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
8505     register pairs are stored by a store multiple insn.  We avoid this
8506     by pushing an extra pair.  */
8507  if (count == 2 && !arm_arch6)
8508    {
8509      if (base_reg == LAST_VFP_REGNUM - 3)
8510	base_reg -= 2;
8511      count++;
8512    }
8513
8514  /* ??? The frame layout is implementation defined.  We describe
8515     standard format 1 (equivalent to a FSTMD insn and unused pad word).
8516     We really need some way of representing the whole block so that the
8517     unwinder can figure it out at runtime.  */
8518  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
8519  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
8520
8521  reg = gen_rtx_REG (DFmode, base_reg);
8522  base_reg += 2;
8523
8524  XVECEXP (par, 0, 0)
8525    = gen_rtx_SET (VOIDmode,
8526		   gen_frame_mem (BLKmode,
8527				  gen_rtx_PRE_DEC (BLKmode,
8528						   stack_pointer_rtx)),
8529		   gen_rtx_UNSPEC (BLKmode,
8530				   gen_rtvec (1, reg),
8531				   UNSPEC_PUSH_MULT));
8532
8533  tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8534		     plus_constant (stack_pointer_rtx, -(count * 8 + 4)));
8535  RTX_FRAME_RELATED_P (tmp) = 1;
8536  XVECEXP (dwarf, 0, 0) = tmp;
8537
8538  tmp = gen_rtx_SET (VOIDmode,
8539		     gen_frame_mem (DFmode, stack_pointer_rtx),
8540		     reg);
8541  RTX_FRAME_RELATED_P (tmp) = 1;
8542  XVECEXP (dwarf, 0, 1) = tmp;
8543
8544  for (i = 1; i < count; i++)
8545    {
8546      reg = gen_rtx_REG (DFmode, base_reg);
8547      base_reg += 2;
8548      XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
8549
8550      tmp = gen_rtx_SET (VOIDmode,
8551			 gen_frame_mem (DFmode,
8552					plus_constant (stack_pointer_rtx,
8553						       i * 8)),
8554			 reg);
8555      RTX_FRAME_RELATED_P (tmp) = 1;
8556      XVECEXP (dwarf, 0, i + 1) = tmp;
8557    }
8558
8559  par = emit_insn (par);
8560  REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
8561				       REG_NOTES (par));
8562  RTX_FRAME_RELATED_P (par) = 1;
8563
8564  return count * 8 + 4;
8565}
8566
8567
8568/* Output a 'call' insn.  */
8569const char *
8570output_call (rtx *operands)
8571{
8572  gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
8573
8574  /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
8575  if (REGNO (operands[0]) == LR_REGNUM)
8576    {
8577      operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
8578      output_asm_insn ("mov%?\t%0, %|lr", operands);
8579    }
8580
8581  output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8582
8583  if (TARGET_INTERWORK || arm_arch4t)
8584    output_asm_insn ("bx%?\t%0", operands);
8585  else
8586    output_asm_insn ("mov%?\t%|pc, %0", operands);
8587
8588  return "";
8589}
8590
8591/* Output a 'call' insn that is a reference in memory.  */
8592const char *
8593output_call_mem (rtx *operands)
8594{
8595  if (TARGET_INTERWORK && !arm_arch5)
8596    {
8597      output_asm_insn ("ldr%?\t%|ip, %0", operands);
8598      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8599      output_asm_insn ("bx%?\t%|ip", operands);
8600    }
8601  else if (regno_use_in (LR_REGNUM, operands[0]))
8602    {
8603      /* LR is used in the memory address.  We load the address in the
8604	 first instruction.  It's safe to use IP as the target of the
8605	 load since the call will kill it anyway.  */
8606      output_asm_insn ("ldr%?\t%|ip, %0", operands);
8607      if (arm_arch5)
8608	output_asm_insn ("blx%?\t%|ip", operands);
8609      else
8610	{
8611	  output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8612	  if (arm_arch4t)
8613	    output_asm_insn ("bx%?\t%|ip", operands);
8614	  else
8615	    output_asm_insn ("mov%?\t%|pc, %|ip", operands);
8616	}
8617    }
8618  else
8619    {
8620      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8621      output_asm_insn ("ldr%?\t%|pc, %0", operands);
8622    }
8623
8624  return "";
8625}
8626
8627
8628/* Output a move from arm registers to an fpa registers.
8629   OPERANDS[0] is an fpa register.
8630   OPERANDS[1] is the first registers of an arm register pair.  */
8631const char *
8632output_mov_long_double_fpa_from_arm (rtx *operands)
8633{
8634  int arm_reg0 = REGNO (operands[1]);
8635  rtx ops[3];
8636
8637  gcc_assert (arm_reg0 != IP_REGNUM);
8638
8639  ops[0] = gen_rtx_REG (SImode, arm_reg0);
8640  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8641  ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8642
8643  output_asm_insn ("stm%?fd\t%|sp!, {%0, %1, %2}", ops);
8644  output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
8645
8646  return "";
8647}
8648
8649/* Output a move from an fpa register to arm registers.
8650   OPERANDS[0] is the first registers of an arm register pair.
8651   OPERANDS[1] is an fpa register.  */
8652const char *
8653output_mov_long_double_arm_from_fpa (rtx *operands)
8654{
8655  int arm_reg0 = REGNO (operands[0]);
8656  rtx ops[3];
8657
8658  gcc_assert (arm_reg0 != IP_REGNUM);
8659
8660  ops[0] = gen_rtx_REG (SImode, arm_reg0);
8661  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8662  ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8663
8664  output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
8665  output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1, %2}", ops);
8666  return "";
8667}
8668
8669/* Output a move from arm registers to arm registers of a long double
8670   OPERANDS[0] is the destination.
8671   OPERANDS[1] is the source.  */
8672const char *
8673output_mov_long_double_arm_from_arm (rtx *operands)
8674{
8675  /* We have to be careful here because the two might overlap.  */
8676  int dest_start = REGNO (operands[0]);
8677  int src_start = REGNO (operands[1]);
8678  rtx ops[2];
8679  int i;
8680
8681  if (dest_start < src_start)
8682    {
8683      for (i = 0; i < 3; i++)
8684	{
8685	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
8686	  ops[1] = gen_rtx_REG (SImode, src_start + i);
8687	  output_asm_insn ("mov%?\t%0, %1", ops);
8688	}
8689    }
8690  else
8691    {
8692      for (i = 2; i >= 0; i--)
8693	{
8694	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
8695	  ops[1] = gen_rtx_REG (SImode, src_start + i);
8696	  output_asm_insn ("mov%?\t%0, %1", ops);
8697	}
8698    }
8699
8700  return "";
8701}
8702
8703
8704/* Output a move from arm registers to an fpa registers.
8705   OPERANDS[0] is an fpa register.
8706   OPERANDS[1] is the first registers of an arm register pair.  */
8707const char *
8708output_mov_double_fpa_from_arm (rtx *operands)
8709{
8710  int arm_reg0 = REGNO (operands[1]);
8711  rtx ops[2];
8712
8713  gcc_assert (arm_reg0 != IP_REGNUM);
8714
8715  ops[0] = gen_rtx_REG (SImode, arm_reg0);
8716  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8717  output_asm_insn ("stm%?fd\t%|sp!, {%0, %1}", ops);
8718  output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
8719  return "";
8720}
8721
8722/* Output a move from an fpa register to arm registers.
8723   OPERANDS[0] is the first registers of an arm register pair.
8724   OPERANDS[1] is an fpa register.  */
8725const char *
8726output_mov_double_arm_from_fpa (rtx *operands)
8727{
8728  int arm_reg0 = REGNO (operands[0]);
8729  rtx ops[2];
8730
8731  gcc_assert (arm_reg0 != IP_REGNUM);
8732
8733  ops[0] = gen_rtx_REG (SImode, arm_reg0);
8734  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8735  output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
8736  output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1}", ops);
8737  return "";
8738}
8739
8740/* Output a move between double words.
8741   It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
8742   or MEM<-REG and all MEMs must be offsettable addresses.  */
8743const char *
8744output_move_double (rtx *operands)
8745{
8746  enum rtx_code code0 = GET_CODE (operands[0]);
8747  enum rtx_code code1 = GET_CODE (operands[1]);
8748  rtx otherops[3];
8749
8750  if (code0 == REG)
8751    {
8752      int reg0 = REGNO (operands[0]);
8753
8754      otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8755
8756      gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
8757
8758      switch (GET_CODE (XEXP (operands[1], 0)))
8759	{
8760	case REG:
8761	  output_asm_insn ("ldm%?ia\t%m1, %M0", operands);
8762	  break;
8763
8764	case PRE_INC:
8765	  gcc_assert (TARGET_LDRD);
8766	  output_asm_insn ("ldr%?d\t%0, [%m1, #8]!", operands);
8767	  break;
8768
8769	case PRE_DEC:
8770	  output_asm_insn ("ldm%?db\t%m1!, %M0", operands);
8771	  break;
8772
8773	case POST_INC:
8774	  output_asm_insn ("ldm%?ia\t%m1!, %M0", operands);
8775	  break;
8776
8777	case POST_DEC:
8778	  gcc_assert (TARGET_LDRD);
8779	  output_asm_insn ("ldr%?d\t%0, [%m1], #-8", operands);
8780	  break;
8781
8782	case PRE_MODIFY:
8783	case POST_MODIFY:
8784	  otherops[0] = operands[0];
8785	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
8786	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
8787
8788	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
8789	    {
8790	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8791		{
8792		  /* Registers overlap so split out the increment.  */
8793		  output_asm_insn ("add%?\t%1, %1, %2", otherops);
8794		  output_asm_insn ("ldr%?d\t%0, [%1] @split", otherops);
8795		}
8796	      else
8797		{
8798		  /* IWMMXT allows offsets larger than ldrd can handle,
8799		     fix these up with a pair of ldr.  */
8800		  if (GET_CODE (otherops[2]) == CONST_INT
8801		      && (INTVAL(otherops[2]) <= -256
8802			  || INTVAL(otherops[2]) >= 256))
8803		    {
8804		      output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
8805		      otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8806		      output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
8807		    }
8808		  else
8809		    output_asm_insn ("ldr%?d\t%0, [%1, %2]!", otherops);
8810		}
8811	    }
8812	  else
8813	    {
8814	      /* IWMMXT allows offsets larger than ldrd can handle,
8815		 fix these up with a pair of ldr.  */
8816	      if (GET_CODE (otherops[2]) == CONST_INT
8817		  && (INTVAL(otherops[2]) <= -256
8818		      || INTVAL(otherops[2]) >= 256))
8819		{
8820		  otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8821		  output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
8822		  otherops[0] = operands[0];
8823		  output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
8824		}
8825	      else
8826		/* We only allow constant increments, so this is safe.  */
8827		output_asm_insn ("ldr%?d\t%0, [%1], %2", otherops);
8828	    }
8829	  break;
8830
8831	case LABEL_REF:
8832	case CONST:
8833	  output_asm_insn ("adr%?\t%0, %1", operands);
8834	  output_asm_insn ("ldm%?ia\t%0, %M0", operands);
8835	  break;
8836
8837	default:
8838	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
8839			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
8840	    {
8841	      otherops[0] = operands[0];
8842	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
8843	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
8844
8845	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
8846		{
8847		  if (GET_CODE (otherops[2]) == CONST_INT)
8848		    {
8849		      switch ((int) INTVAL (otherops[2]))
8850			{
8851			case -8:
8852			  output_asm_insn ("ldm%?db\t%1, %M0", otherops);
8853			  return "";
8854			case -4:
8855			  output_asm_insn ("ldm%?da\t%1, %M0", otherops);
8856			  return "";
8857			case 4:
8858			  output_asm_insn ("ldm%?ib\t%1, %M0", otherops);
8859			  return "";
8860			}
8861		    }
8862		  if (TARGET_LDRD
8863		      && (GET_CODE (otherops[2]) == REG
8864			  || (GET_CODE (otherops[2]) == CONST_INT
8865			      && INTVAL (otherops[2]) > -256
8866			      && INTVAL (otherops[2]) < 256)))
8867		    {
8868		      if (reg_overlap_mentioned_p (otherops[0],
8869						   otherops[2]))
8870			{
8871			  /* Swap base and index registers over to
8872			     avoid a conflict.  */
8873			  otherops[1] = XEXP (XEXP (operands[1], 0), 1);
8874			  otherops[2] = XEXP (XEXP (operands[1], 0), 0);
8875			}
8876		      /* If both registers conflict, it will usually
8877			 have been fixed by a splitter.  */
8878		      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8879			{
8880			  output_asm_insn ("add%?\t%1, %1, %2", otherops);
8881			  output_asm_insn ("ldr%?d\t%0, [%1]",
8882					   otherops);
8883			}
8884		      else
8885			output_asm_insn ("ldr%?d\t%0, [%1, %2]", otherops);
8886		      return "";
8887		    }
8888
8889		  if (GET_CODE (otherops[2]) == CONST_INT)
8890		    {
8891		      if (!(const_ok_for_arm (INTVAL (otherops[2]))))
8892			output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
8893		      else
8894			output_asm_insn ("add%?\t%0, %1, %2", otherops);
8895		    }
8896		  else
8897		    output_asm_insn ("add%?\t%0, %1, %2", otherops);
8898		}
8899	      else
8900		output_asm_insn ("sub%?\t%0, %1, %2", otherops);
8901
8902	      return "ldm%?ia\t%0, %M0";
8903	    }
8904	  else
8905	    {
8906	      otherops[1] = adjust_address (operands[1], SImode, 4);
8907	      /* Take care of overlapping base/data reg.  */
8908	      if (reg_mentioned_p (operands[0], operands[1]))
8909		{
8910		  output_asm_insn ("ldr%?\t%0, %1", otherops);
8911		  output_asm_insn ("ldr%?\t%0, %1", operands);
8912		}
8913	      else
8914		{
8915		  output_asm_insn ("ldr%?\t%0, %1", operands);
8916		  output_asm_insn ("ldr%?\t%0, %1", otherops);
8917		}
8918	    }
8919	}
8920    }
8921  else
8922    {
8923      /* Constraints should ensure this.  */
8924      gcc_assert (code0 == MEM && code1 == REG);
8925      gcc_assert (REGNO (operands[1]) != IP_REGNUM);
8926
8927      switch (GET_CODE (XEXP (operands[0], 0)))
8928        {
8929	case REG:
8930	  output_asm_insn ("stm%?ia\t%m0, %M1", operands);
8931	  break;
8932
8933        case PRE_INC:
8934	  gcc_assert (TARGET_LDRD);
8935	  output_asm_insn ("str%?d\t%1, [%m0, #8]!", operands);
8936	  break;
8937
8938        case PRE_DEC:
8939	  output_asm_insn ("stm%?db\t%m0!, %M1", operands);
8940	  break;
8941
8942        case POST_INC:
8943	  output_asm_insn ("stm%?ia\t%m0!, %M1", operands);
8944	  break;
8945
8946        case POST_DEC:
8947	  gcc_assert (TARGET_LDRD);
8948	  output_asm_insn ("str%?d\t%1, [%m0], #-8", operands);
8949	  break;
8950
8951	case PRE_MODIFY:
8952	case POST_MODIFY:
8953	  otherops[0] = operands[1];
8954	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
8955	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
8956
8957	  /* IWMMXT allows offsets larger than ldrd can handle,
8958	     fix these up with a pair of ldr.  */
8959	  if (GET_CODE (otherops[2]) == CONST_INT
8960	      && (INTVAL(otherops[2]) <= -256
8961		  || INTVAL(otherops[2]) >= 256))
8962	    {
8963	      rtx reg1;
8964	      reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
8965	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
8966		{
8967		  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
8968		  otherops[0] = reg1;
8969		  output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
8970		}
8971	      else
8972		{
8973		  otherops[0] = reg1;
8974		  output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
8975		  otherops[0] = operands[1];
8976		  output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
8977		}
8978	    }
8979	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
8980	    output_asm_insn ("str%?d\t%0, [%1, %2]!", otherops);
8981	  else
8982	    output_asm_insn ("str%?d\t%0, [%1], %2", otherops);
8983	  break;
8984
8985	case PLUS:
8986	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
8987	  if (GET_CODE (otherops[2]) == CONST_INT)
8988	    {
8989	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
8990		{
8991		case -8:
8992		  output_asm_insn ("stm%?db\t%m0, %M1", operands);
8993		  return "";
8994
8995		case -4:
8996		  output_asm_insn ("stm%?da\t%m0, %M1", operands);
8997		  return "";
8998
8999		case 4:
9000		  output_asm_insn ("stm%?ib\t%m0, %M1", operands);
9001		  return "";
9002		}
9003	    }
9004	  if (TARGET_LDRD
9005	      && (GET_CODE (otherops[2]) == REG
9006		  || (GET_CODE (otherops[2]) == CONST_INT
9007		      && INTVAL (otherops[2]) > -256
9008		      && INTVAL (otherops[2]) < 256)))
9009	    {
9010	      otherops[0] = operands[1];
9011	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
9012	      output_asm_insn ("str%?d\t%0, [%1, %2]", otherops);
9013	      return "";
9014	    }
9015	  /* Fall through */
9016
9017        default:
9018	  otherops[0] = adjust_address (operands[0], SImode, 4);
9019	  otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
9020	  output_asm_insn ("str%?\t%1, %0", operands);
9021	  output_asm_insn ("str%?\t%1, %0", otherops);
9022	}
9023    }
9024
9025  return "";
9026}
9027
9028/* Output an ADD r, s, #n where n may be too big for one instruction.
9029   If adding zero to one register, output nothing.  */
9030const char *
9031output_add_immediate (rtx *operands)
9032{
9033  HOST_WIDE_INT n = INTVAL (operands[2]);
9034
9035  if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
9036    {
9037      if (n < 0)
9038	output_multi_immediate (operands,
9039				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
9040				-n);
9041      else
9042	output_multi_immediate (operands,
9043				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
9044				n);
9045    }
9046
9047  return "";
9048}
9049
9050/* Output a multiple immediate operation.
9051   OPERANDS is the vector of operands referred to in the output patterns.
9052   INSTR1 is the output pattern to use for the first constant.
9053   INSTR2 is the output pattern to use for subsequent constants.
9054   IMMED_OP is the index of the constant slot in OPERANDS.
9055   N is the constant value.  */
9056static const char *
9057output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
9058			int immed_op, HOST_WIDE_INT n)
9059{
9060#if HOST_BITS_PER_WIDE_INT > 32
9061  n &= 0xffffffff;
9062#endif
9063
9064  if (n == 0)
9065    {
9066      /* Quick and easy output.  */
9067      operands[immed_op] = const0_rtx;
9068      output_asm_insn (instr1, operands);
9069    }
9070  else
9071    {
9072      int i;
9073      const char * instr = instr1;
9074
9075      /* Note that n is never zero here (which would give no output).  */
9076      for (i = 0; i < 32; i += 2)
9077	{
9078	  if (n & (3 << i))
9079	    {
9080	      operands[immed_op] = GEN_INT (n & (255 << i));
9081	      output_asm_insn (instr, operands);
9082	      instr = instr2;
9083	      i += 6;
9084	    }
9085	}
9086    }
9087
9088  return "";
9089}
9090
9091/* Return the appropriate ARM instruction for the operation code.
9092   The returned result should not be overwritten.  OP is the rtx of the
9093   operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
9094   was shifted.  */
9095const char *
9096arithmetic_instr (rtx op, int shift_first_arg)
9097{
9098  switch (GET_CODE (op))
9099    {
9100    case PLUS:
9101      return "add";
9102
9103    case MINUS:
9104      return shift_first_arg ? "rsb" : "sub";
9105
9106    case IOR:
9107      return "orr";
9108
9109    case XOR:
9110      return "eor";
9111
9112    case AND:
9113      return "and";
9114
9115    default:
9116      gcc_unreachable ();
9117    }
9118}
9119
9120/* Ensure valid constant shifts and return the appropriate shift mnemonic
9121   for the operation code.  The returned result should not be overwritten.
9122   OP is the rtx code of the shift.
9123   On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
9124   shift.  */
9125static const char *
9126shift_op (rtx op, HOST_WIDE_INT *amountp)
9127{
9128  const char * mnem;
9129  enum rtx_code code = GET_CODE (op);
9130
9131  switch (GET_CODE (XEXP (op, 1)))
9132    {
9133    case REG:
9134    case SUBREG:
9135      *amountp = -1;
9136      break;
9137
9138    case CONST_INT:
9139      *amountp = INTVAL (XEXP (op, 1));
9140      break;
9141
9142    default:
9143      gcc_unreachable ();
9144    }
9145
9146  switch (code)
9147    {
9148    case ASHIFT:
9149      mnem = "asl";
9150      break;
9151
9152    case ASHIFTRT:
9153      mnem = "asr";
9154      break;
9155
9156    case LSHIFTRT:
9157      mnem = "lsr";
9158      break;
9159
9160    case ROTATE:
9161      gcc_assert (*amountp != -1);
9162      *amountp = 32 - *amountp;
9163
9164      /* Fall through.  */
9165
9166    case ROTATERT:
9167      mnem = "ror";
9168      break;
9169
9170    case MULT:
9171      /* We never have to worry about the amount being other than a
9172	 power of 2, since this case can never be reloaded from a reg.  */
9173      gcc_assert (*amountp != -1);
9174      *amountp = int_log2 (*amountp);
9175      return "asl";
9176
9177    default:
9178      gcc_unreachable ();
9179    }
9180
9181  if (*amountp != -1)
9182    {
9183      /* This is not 100% correct, but follows from the desire to merge
9184	 multiplication by a power of 2 with the recognizer for a
9185	 shift.  >=32 is not a valid shift for "asl", so we must try and
9186	 output a shift that produces the correct arithmetical result.
9187	 Using lsr #32 is identical except for the fact that the carry bit
9188	 is not set correctly if we set the flags; but we never use the
9189	 carry bit from such an operation, so we can ignore that.  */
9190      if (code == ROTATERT)
9191	/* Rotate is just modulo 32.  */
9192	*amountp &= 31;
9193      else if (*amountp != (*amountp & 31))
9194	{
9195	  if (code == ASHIFT)
9196	    mnem = "lsr";
9197	  *amountp = 32;
9198	}
9199
9200      /* Shifts of 0 are no-ops.  */
9201      if (*amountp == 0)
9202	return NULL;
9203    }
9204
9205  return mnem;
9206}
9207
9208/* Obtain the shift from the POWER of two.  */
9209
9210static HOST_WIDE_INT
9211int_log2 (HOST_WIDE_INT power)
9212{
9213  HOST_WIDE_INT shift = 0;
9214
9215  while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
9216    {
9217      gcc_assert (shift <= 31);
9218      shift++;
9219    }
9220
9221  return shift;
9222}
9223
9224/* Output a .ascii pseudo-op, keeping track of lengths.  This is
9225   because /bin/as is horribly restrictive.  The judgement about
9226   whether or not each character is 'printable' (and can be output as
9227   is) or not (and must be printed with an octal escape) must be made
9228   with reference to the *host* character set -- the situation is
9229   similar to that discussed in the comments above pp_c_char in
9230   c-pretty-print.c.  */
9231
9232#define MAX_ASCII_LEN 51
9233
9234void
9235output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
9236{
9237  int i;
9238  int len_so_far = 0;
9239
9240  fputs ("\t.ascii\t\"", stream);
9241
9242  for (i = 0; i < len; i++)
9243    {
9244      int c = p[i];
9245
9246      if (len_so_far >= MAX_ASCII_LEN)
9247	{
9248	  fputs ("\"\n\t.ascii\t\"", stream);
9249	  len_so_far = 0;
9250	}
9251
9252      if (ISPRINT (c))
9253	{
9254	  if (c == '\\' || c == '\"')
9255	    {
9256	      putc ('\\', stream);
9257	      len_so_far++;
9258	    }
9259	  putc (c, stream);
9260	  len_so_far++;
9261	}
9262      else
9263	{
9264	  fprintf (stream, "\\%03o", c);
9265	  len_so_far += 4;
9266	}
9267    }
9268
9269  fputs ("\"\n", stream);
9270}
9271
9272/* Compute the register save mask for registers 0 through 12
9273   inclusive.  This code is used by arm_compute_save_reg_mask.  */
9274
9275static unsigned long
9276arm_compute_save_reg0_reg12_mask (void)
9277{
9278  unsigned long func_type = arm_current_func_type ();
9279  unsigned long save_reg_mask = 0;
9280  unsigned int reg;
9281
9282  if (IS_INTERRUPT (func_type))
9283    {
9284      unsigned int max_reg;
9285      /* Interrupt functions must not corrupt any registers,
9286	 even call clobbered ones.  If this is a leaf function
9287	 we can just examine the registers used by the RTL, but
9288	 otherwise we have to assume that whatever function is
9289	 called might clobber anything, and so we have to save
9290	 all the call-clobbered registers as well.  */
9291      if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
9292	/* FIQ handlers have registers r8 - r12 banked, so
9293	   we only need to check r0 - r7, Normal ISRs only
9294	   bank r14 and r15, so we must check up to r12.
9295	   r13 is the stack pointer which is always preserved,
9296	   so we do not need to consider it here.  */
9297	max_reg = 7;
9298      else
9299	max_reg = 12;
9300
9301      for (reg = 0; reg <= max_reg; reg++)
9302	if (regs_ever_live[reg]
9303	    || (! current_function_is_leaf && call_used_regs [reg]))
9304	  save_reg_mask |= (1 << reg);
9305
9306      /* Also save the pic base register if necessary.  */
9307      if (flag_pic
9308	  && !TARGET_SINGLE_PIC_BASE
9309	  && arm_pic_register != INVALID_REGNUM
9310	  && current_function_uses_pic_offset_table)
9311	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
9312    }
9313  else
9314    {
9315      /* In the normal case we only need to save those registers
9316	 which are call saved and which are used by this function.  */
9317      for (reg = 0; reg <= 10; reg++)
9318	if (regs_ever_live[reg] && ! call_used_regs [reg])
9319	  save_reg_mask |= (1 << reg);
9320
9321      /* Handle the frame pointer as a special case.  */
9322      if (! TARGET_APCS_FRAME
9323	  && ! frame_pointer_needed
9324	  && regs_ever_live[HARD_FRAME_POINTER_REGNUM]
9325	  && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
9326	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
9327
9328      /* If we aren't loading the PIC register,
9329	 don't stack it even though it may be live.  */
9330      if (flag_pic
9331	  && !TARGET_SINGLE_PIC_BASE
9332	  && arm_pic_register != INVALID_REGNUM
9333	  && (regs_ever_live[PIC_OFFSET_TABLE_REGNUM]
9334	      || current_function_uses_pic_offset_table))
9335	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
9336    }
9337
9338  /* Save registers so the exception handler can modify them.  */
9339  if (current_function_calls_eh_return)
9340    {
9341      unsigned int i;
9342
9343      for (i = 0; ; i++)
9344	{
9345	  reg = EH_RETURN_DATA_REGNO (i);
9346	  if (reg == INVALID_REGNUM)
9347	    break;
9348	  save_reg_mask |= 1 << reg;
9349	}
9350    }
9351
9352  return save_reg_mask;
9353}
9354
9355/* Compute a bit mask of which registers need to be
9356   saved on the stack for the current function.  */
9357
9358static unsigned long
9359arm_compute_save_reg_mask (void)
9360{
9361  unsigned int save_reg_mask = 0;
9362  unsigned long func_type = arm_current_func_type ();
9363
9364  if (IS_NAKED (func_type))
9365    /* This should never really happen.  */
9366    return 0;
9367
9368  /* If we are creating a stack frame, then we must save the frame pointer,
9369     IP (which will hold the old stack pointer), LR and the PC.  */
9370  if (frame_pointer_needed)
9371    save_reg_mask |=
9372      (1 << ARM_HARD_FRAME_POINTER_REGNUM)
9373      | (1 << IP_REGNUM)
9374      | (1 << LR_REGNUM)
9375      | (1 << PC_REGNUM);
9376
9377  /* Volatile functions do not return, so there
9378     is no need to save any other registers.  */
9379  if (IS_VOLATILE (func_type))
9380    return save_reg_mask;
9381
9382  save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
9383
9384  /* Decide if we need to save the link register.
9385     Interrupt routines have their own banked link register,
9386     so they never need to save it.
9387     Otherwise if we do not use the link register we do not need to save
9388     it.  If we are pushing other registers onto the stack however, we
9389     can save an instruction in the epilogue by pushing the link register
9390     now and then popping it back into the PC.  This incurs extra memory
9391     accesses though, so we only do it when optimizing for size, and only
9392     if we know that we will not need a fancy return sequence.  */
9393  if (regs_ever_live [LR_REGNUM]
9394	  || (save_reg_mask
9395	      && optimize_size
9396	      && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9397	      && !current_function_calls_eh_return))
9398    save_reg_mask |= 1 << LR_REGNUM;
9399
9400  if (cfun->machine->lr_save_eliminated)
9401    save_reg_mask &= ~ (1 << LR_REGNUM);
9402
9403  if (TARGET_REALLY_IWMMXT
9404      && ((bit_count (save_reg_mask)
9405	   + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
9406    {
9407      unsigned int reg;
9408
9409      /* The total number of registers that are going to be pushed
9410	 onto the stack is odd.  We need to ensure that the stack
9411	 is 64-bit aligned before we start to save iWMMXt registers,
9412	 and also before we start to create locals.  (A local variable
9413	 might be a double or long long which we will load/store using
9414	 an iWMMXt instruction).  Therefore we need to push another
9415	 ARM register, so that the stack will be 64-bit aligned.  We
9416	 try to avoid using the arg registers (r0 -r3) as they might be
9417	 used to pass values in a tail call.  */
9418      for (reg = 4; reg <= 12; reg++)
9419	if ((save_reg_mask & (1 << reg)) == 0)
9420	  break;
9421
9422      if (reg <= 12)
9423	save_reg_mask |= (1 << reg);
9424      else
9425	{
9426	  cfun->machine->sibcall_blocked = 1;
9427	  save_reg_mask |= (1 << 3);
9428	}
9429    }
9430
9431  return save_reg_mask;
9432}
9433
9434
9435/* Compute a bit mask of which registers need to be
9436   saved on the stack for the current function.  */
9437static unsigned long
9438thumb_compute_save_reg_mask (void)
9439{
9440  unsigned long mask;
9441  unsigned reg;
9442
9443  mask = 0;
9444  for (reg = 0; reg < 12; reg ++)
9445    if (regs_ever_live[reg] && !call_used_regs[reg])
9446      mask |= 1 << reg;
9447
9448  if (flag_pic
9449      && !TARGET_SINGLE_PIC_BASE
9450      && arm_pic_register != INVALID_REGNUM
9451      && current_function_uses_pic_offset_table)
9452    mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
9453
9454  /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
9455  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
9456    mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
9457
9458  /* LR will also be pushed if any lo regs are pushed.  */
9459  if (mask & 0xff || thumb_force_lr_save ())
9460    mask |= (1 << LR_REGNUM);
9461
9462  /* Make sure we have a low work register if we need one.
9463     We will need one if we are going to push a high register,
9464     but we are not currently intending to push a low register.  */
9465  if ((mask & 0xff) == 0
9466      && ((mask & 0x0f00) || TARGET_BACKTRACE))
9467    {
9468      /* Use thumb_find_work_register to choose which register
9469	 we will use.  If the register is live then we will
9470	 have to push it.  Use LAST_LO_REGNUM as our fallback
9471	 choice for the register to select.  */
9472      reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
9473
9474      if (! call_used_regs[reg])
9475	mask |= 1 << reg;
9476    }
9477
9478  return mask;
9479}
9480
9481
9482/* Return the number of bytes required to save VFP registers.  */
9483static int
9484arm_get_vfp_saved_size (void)
9485{
9486  unsigned int regno;
9487  int count;
9488  int saved;
9489
9490  saved = 0;
9491  /* Space for saved VFP registers.  */
9492  if (TARGET_HARD_FLOAT && TARGET_VFP)
9493    {
9494      count = 0;
9495      for (regno = FIRST_VFP_REGNUM;
9496	   regno < LAST_VFP_REGNUM;
9497	   regno += 2)
9498	{
9499	  if ((!regs_ever_live[regno] || call_used_regs[regno])
9500	      && (!regs_ever_live[regno + 1] || call_used_regs[regno + 1]))
9501	    {
9502	      if (count > 0)
9503		{
9504		  /* Workaround ARM10 VFPr1 bug.  */
9505		  if (count == 2 && !arm_arch6)
9506		    count++;
9507		  saved += count * 8 + 4;
9508		}
9509	      count = 0;
9510	    }
9511	  else
9512	    count++;
9513	}
9514      if (count > 0)
9515	{
9516	  if (count == 2 && !arm_arch6)
9517	    count++;
9518	  saved += count * 8 + 4;
9519	}
9520    }
9521  return saved;
9522}
9523
9524
9525/* Generate a function exit sequence.  If REALLY_RETURN is false, then do
9526   everything bar the final return instruction.  */
9527const char *
9528output_return_instruction (rtx operand, int really_return, int reverse)
9529{
9530  char conditional[10];
9531  char instr[100];
9532  unsigned reg;
9533  unsigned long live_regs_mask;
9534  unsigned long func_type;
9535  arm_stack_offsets *offsets;
9536
9537  func_type = arm_current_func_type ();
9538
9539  if (IS_NAKED (func_type))
9540    return "";
9541
9542  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9543    {
9544      /* If this function was declared non-returning, and we have
9545	 found a tail call, then we have to trust that the called
9546	 function won't return.  */
9547      if (really_return)
9548	{
9549	  rtx ops[2];
9550
9551	  /* Otherwise, trap an attempted return by aborting.  */
9552	  ops[0] = operand;
9553	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
9554				       : "abort");
9555	  assemble_external_libcall (ops[1]);
9556	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
9557	}
9558
9559      return "";
9560    }
9561
9562  gcc_assert (!current_function_calls_alloca || really_return);
9563
9564  sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
9565
9566  return_used_this_function = 1;
9567
9568  live_regs_mask = arm_compute_save_reg_mask ();
9569
9570  if (live_regs_mask)
9571    {
9572      const char * return_reg;
9573
9574      /* If we do not have any special requirements for function exit
9575	 (e.g. interworking, or ISR) then we can load the return address
9576	 directly into the PC.  Otherwise we must load it into LR.  */
9577      if (really_return
9578	  && ! TARGET_INTERWORK)
9579	return_reg = reg_names[PC_REGNUM];
9580      else
9581	return_reg = reg_names[LR_REGNUM];
9582
9583      if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
9584	{
9585	  /* There are three possible reasons for the IP register
9586	     being saved.  1) a stack frame was created, in which case
9587	     IP contains the old stack pointer, or 2) an ISR routine
9588	     corrupted it, or 3) it was saved to align the stack on
9589	     iWMMXt.  In case 1, restore IP into SP, otherwise just
9590	     restore IP.  */
9591	  if (frame_pointer_needed)
9592	    {
9593	      live_regs_mask &= ~ (1 << IP_REGNUM);
9594	      live_regs_mask |=   (1 << SP_REGNUM);
9595	    }
9596	  else
9597	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
9598	}
9599
9600      /* On some ARM architectures it is faster to use LDR rather than
9601	 LDM to load a single register.  On other architectures, the
9602	 cost is the same.  In 26 bit mode, or for exception handlers,
9603	 we have to use LDM to load the PC so that the CPSR is also
9604	 restored.  */
9605      for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9606	if (live_regs_mask == (1U << reg))
9607	  break;
9608
9609      if (reg <= LAST_ARM_REGNUM
9610	  && (reg != LR_REGNUM
9611	      || ! really_return
9612	      || ! IS_INTERRUPT (func_type)))
9613	{
9614	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
9615		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
9616	}
9617      else
9618	{
9619	  char *p;
9620	  int first = 1;
9621
9622	  /* Generate the load multiple instruction to restore the
9623	     registers.  Note we can get here, even if
9624	     frame_pointer_needed is true, but only if sp already
9625	     points to the base of the saved core registers.  */
9626	  if (live_regs_mask & (1 << SP_REGNUM))
9627	    {
9628	      unsigned HOST_WIDE_INT stack_adjust;
9629
9630	      offsets = arm_get_frame_offsets ();
9631	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
9632	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
9633
9634	      if (stack_adjust && arm_arch5)
9635		sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
9636	      else
9637		{
9638		  /* If we can't use ldmib (SA110 bug),
9639		     then try to pop r3 instead.  */
9640		  if (stack_adjust)
9641		    live_regs_mask |= 1 << 3;
9642		  sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
9643		}
9644	    }
9645	  else
9646	    sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
9647
9648	  p = instr + strlen (instr);
9649
9650	  for (reg = 0; reg <= SP_REGNUM; reg++)
9651	    if (live_regs_mask & (1 << reg))
9652	      {
9653		int l = strlen (reg_names[reg]);
9654
9655		if (first)
9656		  first = 0;
9657		else
9658		  {
9659		    memcpy (p, ", ", 2);
9660		    p += 2;
9661		  }
9662
9663		memcpy (p, "%|", 2);
9664		memcpy (p + 2, reg_names[reg], l);
9665		p += l + 2;
9666	      }
9667
9668	  if (live_regs_mask & (1 << LR_REGNUM))
9669	    {
9670	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
9671	      /* If returning from an interrupt, restore the CPSR.  */
9672	      if (IS_INTERRUPT (func_type))
9673		strcat (p, "^");
9674	    }
9675	  else
9676	    strcpy (p, "}");
9677	}
9678
9679      output_asm_insn (instr, & operand);
9680
9681      /* See if we need to generate an extra instruction to
9682	 perform the actual function return.  */
9683      if (really_return
9684	  && func_type != ARM_FT_INTERWORKED
9685	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
9686	{
9687	  /* The return has already been handled
9688	     by loading the LR into the PC.  */
9689	  really_return = 0;
9690	}
9691    }
9692
9693  if (really_return)
9694    {
9695      switch ((int) ARM_FUNC_TYPE (func_type))
9696	{
9697	case ARM_FT_ISR:
9698	case ARM_FT_FIQ:
9699	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
9700	  break;
9701
9702	case ARM_FT_INTERWORKED:
9703	  sprintf (instr, "bx%s\t%%|lr", conditional);
9704	  break;
9705
9706	case ARM_FT_EXCEPTION:
9707	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
9708	  break;
9709
9710	default:
9711	  /* Use bx if it's available.  */
9712	  if (arm_arch5 || arm_arch4t)
9713	    sprintf (instr, "bx%s\t%%|lr", conditional);
9714	  else
9715	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
9716	  break;
9717	}
9718
9719      output_asm_insn (instr, & operand);
9720    }
9721
9722  return "";
9723}
9724
9725/* Write the function name into the code section, directly preceding
9726   the function prologue.
9727
9728   Code will be output similar to this:
9729     t0
9730	 .ascii "arm_poke_function_name", 0
9731	 .align
9732     t1
9733	 .word 0xff000000 + (t1 - t0)
9734     arm_poke_function_name
9735	 mov     ip, sp
9736	 stmfd   sp!, {fp, ip, lr, pc}
9737	 sub     fp, ip, #4
9738
9739   When performing a stack backtrace, code can inspect the value
9740   of 'pc' stored at 'fp' + 0.  If the trace function then looks
9741   at location pc - 12 and the top 8 bits are set, then we know
9742   that there is a function name embedded immediately preceding this
9743   location and has length ((pc[-3]) & 0xff000000).
9744
9745   We assume that pc is declared as a pointer to an unsigned long.
9746
9747   It is of no benefit to output the function name if we are assembling
9748   a leaf function.  These function types will not contain a stack
9749   backtrace structure, therefore it is not possible to determine the
9750   function name.  */
9751void
9752arm_poke_function_name (FILE *stream, const char *name)
9753{
9754  unsigned long alignlength;
9755  unsigned long length;
9756  rtx           x;
9757
9758  length      = strlen (name) + 1;
9759  alignlength = ROUND_UP_WORD (length);
9760
9761  ASM_OUTPUT_ASCII (stream, name, length);
9762  ASM_OUTPUT_ALIGN (stream, 2);
9763  x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
9764  assemble_aligned_integer (UNITS_PER_WORD, x);
9765}
9766
9767/* Place some comments into the assembler stream
9768   describing the current function.  */
9769static void
9770arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
9771{
9772  unsigned long func_type;
9773
9774  if (!TARGET_ARM)
9775    {
9776      thumb_output_function_prologue (f, frame_size);
9777      return;
9778    }
9779
9780  /* Sanity check.  */
9781  gcc_assert (!arm_ccfsm_state && !arm_target_insn);
9782
9783  func_type = arm_current_func_type ();
9784
9785  switch ((int) ARM_FUNC_TYPE (func_type))
9786    {
9787    default:
9788    case ARM_FT_NORMAL:
9789      break;
9790    case ARM_FT_INTERWORKED:
9791      asm_fprintf (f, "\t%@ Function supports interworking.\n");
9792      break;
9793    case ARM_FT_ISR:
9794      asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
9795      break;
9796    case ARM_FT_FIQ:
9797      asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
9798      break;
9799    case ARM_FT_EXCEPTION:
9800      asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
9801      break;
9802    }
9803
9804  if (IS_NAKED (func_type))
9805    asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
9806
9807  if (IS_VOLATILE (func_type))
9808    asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
9809
9810  if (IS_NESTED (func_type))
9811    asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
9812
9813  asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
9814	       current_function_args_size,
9815	       current_function_pretend_args_size, frame_size);
9816
9817  asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
9818	       frame_pointer_needed,
9819	       cfun->machine->uses_anonymous_args);
9820
9821  if (cfun->machine->lr_save_eliminated)
9822    asm_fprintf (f, "\t%@ link register save eliminated.\n");
9823
9824  if (current_function_calls_eh_return)
9825    asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
9826
9827#ifdef AOF_ASSEMBLER
9828  if (flag_pic)
9829    asm_fprintf (f, "\tmov\t%r, %r\n", IP_REGNUM, PIC_OFFSET_TABLE_REGNUM);
9830#endif
9831
9832  return_used_this_function = 0;
9833}
9834
9835const char *
9836arm_output_epilogue (rtx sibling)
9837{
9838  int reg;
9839  unsigned long saved_regs_mask;
9840  unsigned long func_type;
9841  /* Floats_offset is the offset from the "virtual" frame.  In an APCS
9842     frame that is $fp + 4 for a non-variadic function.  */
9843  int floats_offset = 0;
9844  rtx operands[3];
9845  FILE * f = asm_out_file;
9846  unsigned int lrm_count = 0;
9847  int really_return = (sibling == NULL);
9848  int start_reg;
9849  arm_stack_offsets *offsets;
9850
9851  /* If we have already generated the return instruction
9852     then it is futile to generate anything else.  */
9853  if (use_return_insn (FALSE, sibling) && return_used_this_function)
9854    return "";
9855
9856  func_type = arm_current_func_type ();
9857
9858  if (IS_NAKED (func_type))
9859    /* Naked functions don't have epilogues.  */
9860    return "";
9861
9862  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9863    {
9864      rtx op;
9865
9866      /* A volatile function should never return.  Call abort.  */
9867      op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
9868      assemble_external_libcall (op);
9869      output_asm_insn ("bl\t%a0", &op);
9870
9871      return "";
9872    }
9873
9874  /* If we are throwing an exception, then we really must be doing a
9875     return, so we can't tail-call.  */
9876  gcc_assert (!current_function_calls_eh_return || really_return);
9877
9878  offsets = arm_get_frame_offsets ();
9879  saved_regs_mask = arm_compute_save_reg_mask ();
9880
9881  if (TARGET_IWMMXT)
9882    lrm_count = bit_count (saved_regs_mask);
9883
9884  floats_offset = offsets->saved_args;
9885  /* Compute how far away the floats will be.  */
9886  for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9887    if (saved_regs_mask & (1 << reg))
9888      floats_offset += 4;
9889
9890  if (frame_pointer_needed)
9891    {
9892      /* This variable is for the Virtual Frame Pointer, not VFP regs.  */
9893      int vfp_offset = offsets->frame;
9894
9895      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9896	{
9897	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9898	    if (regs_ever_live[reg] && !call_used_regs[reg])
9899	      {
9900		floats_offset += 12;
9901		asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
9902			     reg, FP_REGNUM, floats_offset - vfp_offset);
9903	      }
9904	}
9905      else
9906	{
9907	  start_reg = LAST_FPA_REGNUM;
9908
9909	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9910	    {
9911	      if (regs_ever_live[reg] && !call_used_regs[reg])
9912		{
9913		  floats_offset += 12;
9914
9915		  /* We can't unstack more than four registers at once.  */
9916		  if (start_reg - reg == 3)
9917		    {
9918		      asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
9919			           reg, FP_REGNUM, floats_offset - vfp_offset);
9920		      start_reg = reg - 1;
9921		    }
9922		}
9923	      else
9924		{
9925		  if (reg != start_reg)
9926		    asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9927				 reg + 1, start_reg - reg,
9928				 FP_REGNUM, floats_offset - vfp_offset);
9929		  start_reg = reg - 1;
9930		}
9931	    }
9932
9933	  /* Just in case the last register checked also needs unstacking.  */
9934	  if (reg != start_reg)
9935	    asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9936			 reg + 1, start_reg - reg,
9937			 FP_REGNUM, floats_offset - vfp_offset);
9938	}
9939
9940      if (TARGET_HARD_FLOAT && TARGET_VFP)
9941	{
9942	  int saved_size;
9943
9944	  /* The fldmx insn does not have base+offset addressing modes,
9945	     so we use IP to hold the address.  */
9946	  saved_size = arm_get_vfp_saved_size ();
9947
9948	  if (saved_size > 0)
9949	    {
9950	      floats_offset += saved_size;
9951	      asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
9952			   FP_REGNUM, floats_offset - vfp_offset);
9953	    }
9954	  start_reg = FIRST_VFP_REGNUM;
9955	  for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9956	    {
9957	      if ((!regs_ever_live[reg] || call_used_regs[reg])
9958		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9959		{
9960		  if (start_reg != reg)
9961		    arm_output_fldmx (f, IP_REGNUM,
9962				      (start_reg - FIRST_VFP_REGNUM) / 2,
9963				      (reg - start_reg) / 2);
9964		  start_reg = reg + 2;
9965		}
9966	    }
9967	  if (start_reg != reg)
9968	    arm_output_fldmx (f, IP_REGNUM,
9969			      (start_reg - FIRST_VFP_REGNUM) / 2,
9970			      (reg - start_reg) / 2);
9971	}
9972
9973      if (TARGET_IWMMXT)
9974	{
9975	  /* The frame pointer is guaranteed to be non-double-word aligned.
9976	     This is because it is set to (old_stack_pointer - 4) and the
9977	     old_stack_pointer was double word aligned.  Thus the offset to
9978	     the iWMMXt registers to be loaded must also be non-double-word
9979	     sized, so that the resultant address *is* double-word aligned.
9980	     We can ignore floats_offset since that was already included in
9981	     the live_regs_mask.  */
9982	  lrm_count += (lrm_count % 2 ? 2 : 1);
9983
9984	  for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
9985	    if (regs_ever_live[reg] && !call_used_regs[reg])
9986	      {
9987		asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
9988			     reg, FP_REGNUM, lrm_count * 4);
9989		lrm_count += 2;
9990	      }
9991	}
9992
9993      /* saved_regs_mask should contain the IP, which at the time of stack
9994	 frame generation actually contains the old stack pointer.  So a
9995	 quick way to unwind the stack is just pop the IP register directly
9996	 into the stack pointer.  */
9997      gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
9998      saved_regs_mask &= ~ (1 << IP_REGNUM);
9999      saved_regs_mask |=   (1 << SP_REGNUM);
10000
10001      /* There are two registers left in saved_regs_mask - LR and PC.  We
10002	 only need to restore the LR register (the return address), but to
10003	 save time we can load it directly into the PC, unless we need a
10004	 special function exit sequence, or we are not really returning.  */
10005      if (really_return
10006	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10007	  && !current_function_calls_eh_return)
10008	/* Delete the LR from the register mask, so that the LR on
10009	   the stack is loaded into the PC in the register mask.  */
10010	saved_regs_mask &= ~ (1 << LR_REGNUM);
10011      else
10012	saved_regs_mask &= ~ (1 << PC_REGNUM);
10013
10014      /* We must use SP as the base register, because SP is one of the
10015         registers being restored.  If an interrupt or page fault
10016         happens in the ldm instruction, the SP might or might not
10017         have been restored.  That would be bad, as then SP will no
10018         longer indicate the safe area of stack, and we can get stack
10019         corruption.  Using SP as the base register means that it will
10020         be reset correctly to the original value, should an interrupt
10021         occur.  If the stack pointer already points at the right
10022         place, then omit the subtraction.  */
10023      if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
10024	  || current_function_calls_alloca)
10025	asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
10026		     4 * bit_count (saved_regs_mask));
10027      print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
10028
10029      if (IS_INTERRUPT (func_type))
10030	/* Interrupt handlers will have pushed the
10031	   IP onto the stack, so restore it now.  */
10032	print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, 1 << IP_REGNUM);
10033    }
10034  else
10035    {
10036      /* Restore stack pointer if necessary.  */
10037      if (offsets->outgoing_args != offsets->saved_regs)
10038	{
10039	  operands[0] = operands[1] = stack_pointer_rtx;
10040	  operands[2] = GEN_INT (offsets->outgoing_args - offsets->saved_regs);
10041	  output_add_immediate (operands);
10042	}
10043
10044      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
10045	{
10046	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
10047	    if (regs_ever_live[reg] && !call_used_regs[reg])
10048	      asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
10049			   reg, SP_REGNUM);
10050	}
10051      else
10052	{
10053	  start_reg = FIRST_FPA_REGNUM;
10054
10055	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
10056	    {
10057	      if (regs_ever_live[reg] && !call_used_regs[reg])
10058		{
10059		  if (reg - start_reg == 3)
10060		    {
10061		      asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
10062				   start_reg, SP_REGNUM);
10063		      start_reg = reg + 1;
10064		    }
10065		}
10066	      else
10067		{
10068		  if (reg != start_reg)
10069		    asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
10070				 start_reg, reg - start_reg,
10071				 SP_REGNUM);
10072
10073		  start_reg = reg + 1;
10074		}
10075	    }
10076
10077	  /* Just in case the last register checked also needs unstacking.  */
10078	  if (reg != start_reg)
10079	    asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
10080			 start_reg, reg - start_reg, SP_REGNUM);
10081	}
10082
10083      if (TARGET_HARD_FLOAT && TARGET_VFP)
10084	{
10085	  start_reg = FIRST_VFP_REGNUM;
10086	  for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
10087	    {
10088	      if ((!regs_ever_live[reg] || call_used_regs[reg])
10089		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
10090		{
10091		  if (start_reg != reg)
10092		    arm_output_fldmx (f, SP_REGNUM,
10093				      (start_reg - FIRST_VFP_REGNUM) / 2,
10094				      (reg - start_reg) / 2);
10095		  start_reg = reg + 2;
10096		}
10097	    }
10098	  if (start_reg != reg)
10099	    arm_output_fldmx (f, SP_REGNUM,
10100			      (start_reg - FIRST_VFP_REGNUM) / 2,
10101			      (reg - start_reg) / 2);
10102	}
10103      if (TARGET_IWMMXT)
10104	for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
10105	  if (regs_ever_live[reg] && !call_used_regs[reg])
10106	    asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
10107
10108      /* If we can, restore the LR into the PC.  */
10109      if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10110	  && really_return
10111	  && current_function_pretend_args_size == 0
10112	  && saved_regs_mask & (1 << LR_REGNUM)
10113	  && !current_function_calls_eh_return)
10114	{
10115	  saved_regs_mask &= ~ (1 << LR_REGNUM);
10116	  saved_regs_mask |=   (1 << PC_REGNUM);
10117	}
10118
10119      /* Load the registers off the stack.  If we only have one register
10120	 to load use the LDR instruction - it is faster.  */
10121      if (saved_regs_mask == (1 << LR_REGNUM))
10122	{
10123	  asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
10124	}
10125      else if (saved_regs_mask)
10126	{
10127	  if (saved_regs_mask & (1 << SP_REGNUM))
10128	    /* Note - write back to the stack register is not enabled
10129	       (i.e. "ldmfd sp!...").  We know that the stack pointer is
10130	       in the list of registers and if we add writeback the
10131	       instruction becomes UNPREDICTABLE.  */
10132	    print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
10133	  else
10134	    print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, saved_regs_mask);
10135	}
10136
10137      if (current_function_pretend_args_size)
10138	{
10139	  /* Unwind the pre-pushed regs.  */
10140	  operands[0] = operands[1] = stack_pointer_rtx;
10141	  operands[2] = GEN_INT (current_function_pretend_args_size);
10142	  output_add_immediate (operands);
10143	}
10144    }
10145
10146  /* We may have already restored PC directly from the stack.  */
10147  if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
10148    return "";
10149
10150  /* Stack adjustment for exception handler.  */
10151  if (current_function_calls_eh_return)
10152    asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
10153		 ARM_EH_STACKADJ_REGNUM);
10154
10155  /* Generate the return instruction.  */
10156  switch ((int) ARM_FUNC_TYPE (func_type))
10157    {
10158    case ARM_FT_ISR:
10159    case ARM_FT_FIQ:
10160      asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
10161      break;
10162
10163    case ARM_FT_EXCEPTION:
10164      asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
10165      break;
10166
10167    case ARM_FT_INTERWORKED:
10168      asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
10169      break;
10170
10171    default:
10172      if (arm_arch5 || arm_arch4t)
10173	asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
10174      else
10175	asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
10176      break;
10177    }
10178
10179  return "";
10180}
10181
10182static void
10183arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10184			      HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
10185{
10186  arm_stack_offsets *offsets;
10187
10188  if (TARGET_THUMB)
10189    {
10190      int regno;
10191
10192      /* Emit any call-via-reg trampolines that are needed for v4t support
10193	 of call_reg and call_value_reg type insns.  */
10194      for (regno = 0; regno < LR_REGNUM; regno++)
10195	{
10196	  rtx label = cfun->machine->call_via[regno];
10197
10198	  if (label != NULL)
10199	    {
10200	      switch_to_section (function_section (current_function_decl));
10201	      targetm.asm_out.internal_label (asm_out_file, "L",
10202					      CODE_LABEL_NUMBER (label));
10203	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
10204	    }
10205	}
10206
10207      /* ??? Probably not safe to set this here, since it assumes that a
10208	 function will be emitted as assembly immediately after we generate
10209	 RTL for it.  This does not happen for inline functions.  */
10210      return_used_this_function = 0;
10211    }
10212  else
10213    {
10214      /* We need to take into account any stack-frame rounding.  */
10215      offsets = arm_get_frame_offsets ();
10216
10217      gcc_assert (!use_return_insn (FALSE, NULL)
10218		  || !return_used_this_function
10219		  || offsets->saved_regs == offsets->outgoing_args
10220		  || frame_pointer_needed);
10221
10222      /* Reset the ARM-specific per-function variables.  */
10223      after_arm_reorg = 0;
10224    }
10225}
10226
10227/* Generate and emit an insn that we will recognize as a push_multi.
10228   Unfortunately, since this insn does not reflect very well the actual
10229   semantics of the operation, we need to annotate the insn for the benefit
10230   of DWARF2 frame unwind information.  */
10231static rtx
10232emit_multi_reg_push (unsigned long mask)
10233{
10234  int num_regs = 0;
10235  int num_dwarf_regs;
10236  int i, j;
10237  rtx par;
10238  rtx dwarf;
10239  int dwarf_par_index;
10240  rtx tmp, reg;
10241
10242  for (i = 0; i <= LAST_ARM_REGNUM; i++)
10243    if (mask & (1 << i))
10244      num_regs++;
10245
10246  gcc_assert (num_regs && num_regs <= 16);
10247
10248  /* We don't record the PC in the dwarf frame information.  */
10249  num_dwarf_regs = num_regs;
10250  if (mask & (1 << PC_REGNUM))
10251    num_dwarf_regs--;
10252
10253  /* For the body of the insn we are going to generate an UNSPEC in
10254     parallel with several USEs.  This allows the insn to be recognized
10255     by the push_multi pattern in the arm.md file.  The insn looks
10256     something like this:
10257
10258       (parallel [
10259           (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
10260	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
10261           (use (reg:SI 11 fp))
10262           (use (reg:SI 12 ip))
10263           (use (reg:SI 14 lr))
10264           (use (reg:SI 15 pc))
10265        ])
10266
10267     For the frame note however, we try to be more explicit and actually
10268     show each register being stored into the stack frame, plus a (single)
10269     decrement of the stack pointer.  We do it this way in order to be
10270     friendly to the stack unwinding code, which only wants to see a single
10271     stack decrement per instruction.  The RTL we generate for the note looks
10272     something like this:
10273
10274      (sequence [
10275           (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
10276           (set (mem:SI (reg:SI sp)) (reg:SI r4))
10277           (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
10278           (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
10279           (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
10280        ])
10281
10282      This sequence is used both by the code to support stack unwinding for
10283      exceptions handlers and the code to generate dwarf2 frame debugging.  */
10284
10285  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
10286  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
10287  dwarf_par_index = 1;
10288
10289  for (i = 0; i <= LAST_ARM_REGNUM; i++)
10290    {
10291      if (mask & (1 << i))
10292	{
10293	  reg = gen_rtx_REG (SImode, i);
10294
10295	  XVECEXP (par, 0, 0)
10296	    = gen_rtx_SET (VOIDmode,
10297			   gen_frame_mem (BLKmode,
10298					  gen_rtx_PRE_DEC (BLKmode,
10299							   stack_pointer_rtx)),
10300			   gen_rtx_UNSPEC (BLKmode,
10301					   gen_rtvec (1, reg),
10302					   UNSPEC_PUSH_MULT));
10303
10304	  if (i != PC_REGNUM)
10305	    {
10306	      tmp = gen_rtx_SET (VOIDmode,
10307				 gen_frame_mem (SImode, stack_pointer_rtx),
10308				 reg);
10309	      RTX_FRAME_RELATED_P (tmp) = 1;
10310	      XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
10311	      dwarf_par_index++;
10312	    }
10313
10314	  break;
10315	}
10316    }
10317
10318  for (j = 1, i++; j < num_regs; i++)
10319    {
10320      if (mask & (1 << i))
10321	{
10322	  reg = gen_rtx_REG (SImode, i);
10323
10324	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
10325
10326	  if (i != PC_REGNUM)
10327	    {
10328	      tmp
10329		= gen_rtx_SET (VOIDmode,
10330			       gen_frame_mem (SImode,
10331					      plus_constant (stack_pointer_rtx,
10332							     4 * j)),
10333			       reg);
10334	      RTX_FRAME_RELATED_P (tmp) = 1;
10335	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
10336	    }
10337
10338	  j++;
10339	}
10340    }
10341
10342  par = emit_insn (par);
10343
10344  tmp = gen_rtx_SET (VOIDmode,
10345		     stack_pointer_rtx,
10346		     plus_constant (stack_pointer_rtx, -4 * num_regs));
10347  RTX_FRAME_RELATED_P (tmp) = 1;
10348  XVECEXP (dwarf, 0, 0) = tmp;
10349
10350  REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
10351				       REG_NOTES (par));
10352  return par;
10353}
10354
10355/* Calculate the size of the return value that is passed in registers.  */
10356static int
10357arm_size_return_regs (void)
10358{
10359  enum machine_mode mode;
10360
10361  if (current_function_return_rtx != 0)
10362    mode = GET_MODE (current_function_return_rtx);
10363  else
10364    mode = DECL_MODE (DECL_RESULT (current_function_decl));
10365
10366  return GET_MODE_SIZE (mode);
10367}
10368
10369static rtx
10370emit_sfm (int base_reg, int count)
10371{
10372  rtx par;
10373  rtx dwarf;
10374  rtx tmp, reg;
10375  int i;
10376
10377  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
10378  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
10379
10380  reg = gen_rtx_REG (XFmode, base_reg++);
10381
10382  XVECEXP (par, 0, 0)
10383    = gen_rtx_SET (VOIDmode,
10384		   gen_frame_mem (BLKmode,
10385				  gen_rtx_PRE_DEC (BLKmode,
10386						   stack_pointer_rtx)),
10387		   gen_rtx_UNSPEC (BLKmode,
10388				   gen_rtvec (1, reg),
10389				   UNSPEC_PUSH_MULT));
10390  tmp = gen_rtx_SET (VOIDmode,
10391		     gen_frame_mem (XFmode, stack_pointer_rtx), reg);
10392  RTX_FRAME_RELATED_P (tmp) = 1;
10393  XVECEXP (dwarf, 0, 1) = tmp;
10394
10395  for (i = 1; i < count; i++)
10396    {
10397      reg = gen_rtx_REG (XFmode, base_reg++);
10398      XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
10399
10400      tmp = gen_rtx_SET (VOIDmode,
10401			 gen_frame_mem (XFmode,
10402					plus_constant (stack_pointer_rtx,
10403						       i * 12)),
10404			 reg);
10405      RTX_FRAME_RELATED_P (tmp) = 1;
10406      XVECEXP (dwarf, 0, i + 1) = tmp;
10407    }
10408
10409  tmp = gen_rtx_SET (VOIDmode,
10410		     stack_pointer_rtx,
10411		     plus_constant (stack_pointer_rtx, -12 * count));
10412
10413  RTX_FRAME_RELATED_P (tmp) = 1;
10414  XVECEXP (dwarf, 0, 0) = tmp;
10415
10416  par = emit_insn (par);
10417  REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
10418				       REG_NOTES (par));
10419  return par;
10420}
10421
10422
10423/* Return true if the current function needs to save/restore LR.  */
10424
10425static bool
10426thumb_force_lr_save (void)
10427{
10428  return !cfun->machine->lr_save_eliminated
10429	 && (!leaf_function_p ()
10430	     || thumb_far_jump_used_p ()
10431	     || regs_ever_live [LR_REGNUM]);
10432}
10433
10434
10435/* Compute the distance from register FROM to register TO.
10436   These can be the arg pointer (26), the soft frame pointer (25),
10437   the stack pointer (13) or the hard frame pointer (11).
10438   In thumb mode r7 is used as the soft frame pointer, if needed.
10439   Typical stack layout looks like this:
10440
10441       old stack pointer -> |    |
10442                             ----
10443                            |    | \
10444                            |    |   saved arguments for
10445                            |    |   vararg functions
10446			    |    | /
10447                              --
10448   hard FP & arg pointer -> |    | \
10449                            |    |   stack
10450                            |    |   frame
10451                            |    | /
10452                              --
10453                            |    | \
10454                            |    |   call saved
10455                            |    |   registers
10456      soft frame pointer -> |    | /
10457                              --
10458                            |    | \
10459                            |    |   local
10460                            |    |   variables
10461     locals base pointer -> |    | /
10462                              --
10463                            |    | \
10464                            |    |   outgoing
10465                            |    |   arguments
10466   current stack pointer -> |    | /
10467                              --
10468
10469  For a given function some or all of these stack components
10470  may not be needed, giving rise to the possibility of
10471  eliminating some of the registers.
10472
10473  The values returned by this function must reflect the behavior
10474  of arm_expand_prologue() and arm_compute_save_reg_mask().
10475
10476  The sign of the number returned reflects the direction of stack
10477  growth, so the values are positive for all eliminations except
10478  from the soft frame pointer to the hard frame pointer.
10479
10480  SFP may point just inside the local variables block to ensure correct
10481  alignment.  */
10482
10483
10484/* Calculate stack offsets.  These are used to calculate register elimination
10485   offsets and in prologue/epilogue code.  */
10486
10487static arm_stack_offsets *
10488arm_get_frame_offsets (void)
10489{
10490  struct arm_stack_offsets *offsets;
10491  unsigned long func_type;
10492  int leaf;
10493  int saved;
10494  HOST_WIDE_INT frame_size;
10495
10496  offsets = &cfun->machine->stack_offsets;
10497
10498  /* We need to know if we are a leaf function.  Unfortunately, it
10499     is possible to be called after start_sequence has been called,
10500     which causes get_insns to return the insns for the sequence,
10501     not the function, which will cause leaf_function_p to return
10502     the incorrect result.
10503
10504     to know about leaf functions once reload has completed, and the
10505     frame size cannot be changed after that time, so we can safely
10506     use the cached value.  */
10507
10508  if (reload_completed)
10509    return offsets;
10510
10511  /* Initially this is the size of the local variables.  It will translated
10512     into an offset once we have determined the size of preceding data.  */
10513  frame_size = ROUND_UP_WORD (get_frame_size ());
10514
10515  leaf = leaf_function_p ();
10516
10517  /* Space for variadic functions.  */
10518  offsets->saved_args = current_function_pretend_args_size;
10519
10520  offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
10521
10522  if (TARGET_ARM)
10523    {
10524      unsigned int regno;
10525
10526      saved = bit_count (arm_compute_save_reg_mask ()) * 4;
10527
10528      /* We know that SP will be doubleword aligned on entry, and we must
10529	 preserve that condition at any subroutine call.  We also require the
10530	 soft frame pointer to be doubleword aligned.  */
10531
10532      if (TARGET_REALLY_IWMMXT)
10533	{
10534	  /* Check for the call-saved iWMMXt registers.  */
10535	  for (regno = FIRST_IWMMXT_REGNUM;
10536	       regno <= LAST_IWMMXT_REGNUM;
10537	       regno++)
10538	    if (regs_ever_live [regno] && ! call_used_regs [regno])
10539	      saved += 8;
10540	}
10541
10542      func_type = arm_current_func_type ();
10543      if (! IS_VOLATILE (func_type))
10544	{
10545	  /* Space for saved FPA registers.  */
10546	  for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
10547	  if (regs_ever_live[regno] && ! call_used_regs[regno])
10548	    saved += 12;
10549
10550	  /* Space for saved VFP registers.  */
10551	  if (TARGET_HARD_FLOAT && TARGET_VFP)
10552	    saved += arm_get_vfp_saved_size ();
10553	}
10554    }
10555  else /* TARGET_THUMB */
10556    {
10557      saved = bit_count (thumb_compute_save_reg_mask ()) * 4;
10558      if (TARGET_BACKTRACE)
10559	saved += 16;
10560    }
10561
10562  /* Saved registers include the stack frame.  */
10563  offsets->saved_regs = offsets->saved_args + saved;
10564  offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
10565  /* A leaf function does not need any stack alignment if it has nothing
10566     on the stack.  */
10567  if (leaf && frame_size == 0)
10568    {
10569      offsets->outgoing_args = offsets->soft_frame;
10570      offsets->locals_base = offsets->soft_frame;
10571      return offsets;
10572    }
10573
10574  /* Ensure SFP has the correct alignment.  */
10575  if (ARM_DOUBLEWORD_ALIGN
10576      && (offsets->soft_frame & 7))
10577    offsets->soft_frame += 4;
10578
10579  offsets->locals_base = offsets->soft_frame + frame_size;
10580  offsets->outgoing_args = (offsets->locals_base
10581			    + current_function_outgoing_args_size);
10582
10583  if (ARM_DOUBLEWORD_ALIGN)
10584    {
10585      /* Ensure SP remains doubleword aligned.  */
10586      if (offsets->outgoing_args & 7)
10587	offsets->outgoing_args += 4;
10588      gcc_assert (!(offsets->outgoing_args & 7));
10589    }
10590
10591  return offsets;
10592}
10593
10594
10595/* Calculate the relative offsets for the different stack pointers.  Positive
10596   offsets are in the direction of stack growth.  */
10597
10598HOST_WIDE_INT
10599arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
10600{
10601  arm_stack_offsets *offsets;
10602
10603  offsets = arm_get_frame_offsets ();
10604
10605  /* OK, now we have enough information to compute the distances.
10606     There must be an entry in these switch tables for each pair
10607     of registers in ELIMINABLE_REGS, even if some of the entries
10608     seem to be redundant or useless.  */
10609  switch (from)
10610    {
10611    case ARG_POINTER_REGNUM:
10612      switch (to)
10613	{
10614	case THUMB_HARD_FRAME_POINTER_REGNUM:
10615	  return 0;
10616
10617	case FRAME_POINTER_REGNUM:
10618	  /* This is the reverse of the soft frame pointer
10619	     to hard frame pointer elimination below.  */
10620	  return offsets->soft_frame - offsets->saved_args;
10621
10622	case ARM_HARD_FRAME_POINTER_REGNUM:
10623	  /* If there is no stack frame then the hard
10624	     frame pointer and the arg pointer coincide.  */
10625	  if (offsets->frame == offsets->saved_regs)
10626	    return 0;
10627	  /* FIXME:  Not sure about this.  Maybe we should always return 0 ?  */
10628	  return (frame_pointer_needed
10629		  && cfun->static_chain_decl != NULL
10630		  && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
10631
10632	case STACK_POINTER_REGNUM:
10633	  /* If nothing has been pushed on the stack at all
10634	     then this will return -4.  This *is* correct!  */
10635	  return offsets->outgoing_args - (offsets->saved_args + 4);
10636
10637	default:
10638	  gcc_unreachable ();
10639	}
10640      gcc_unreachable ();
10641
10642    case FRAME_POINTER_REGNUM:
10643      switch (to)
10644	{
10645	case THUMB_HARD_FRAME_POINTER_REGNUM:
10646	  return 0;
10647
10648	case ARM_HARD_FRAME_POINTER_REGNUM:
10649	  /* The hard frame pointer points to the top entry in the
10650	     stack frame.  The soft frame pointer to the bottom entry
10651	     in the stack frame.  If there is no stack frame at all,
10652	     then they are identical.  */
10653
10654	  return offsets->frame - offsets->soft_frame;
10655
10656	case STACK_POINTER_REGNUM:
10657	  return offsets->outgoing_args - offsets->soft_frame;
10658
10659	default:
10660	  gcc_unreachable ();
10661	}
10662      gcc_unreachable ();
10663
10664    default:
10665      /* You cannot eliminate from the stack pointer.
10666	 In theory you could eliminate from the hard frame
10667	 pointer to the stack pointer, but this will never
10668	 happen, since if a stack frame is not needed the
10669	 hard frame pointer will never be used.  */
10670      gcc_unreachable ();
10671    }
10672}
10673
10674
10675/* Generate the prologue instructions for entry into an ARM function.  */
10676void
10677arm_expand_prologue (void)
10678{
10679  int reg;
10680  rtx amount;
10681  rtx insn;
10682  rtx ip_rtx;
10683  unsigned long live_regs_mask;
10684  unsigned long func_type;
10685  int fp_offset = 0;
10686  int saved_pretend_args = 0;
10687  int saved_regs = 0;
10688  unsigned HOST_WIDE_INT args_to_push;
10689  arm_stack_offsets *offsets;
10690
10691  func_type = arm_current_func_type ();
10692
10693  /* Naked functions don't have prologues.  */
10694  if (IS_NAKED (func_type))
10695    return;
10696
10697  /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
10698  args_to_push = current_function_pretend_args_size;
10699
10700  /* Compute which register we will have to save onto the stack.  */
10701  live_regs_mask = arm_compute_save_reg_mask ();
10702
10703  ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
10704
10705  if (frame_pointer_needed)
10706    {
10707      if (IS_INTERRUPT (func_type))
10708	{
10709	  /* Interrupt functions must not corrupt any registers.
10710	     Creating a frame pointer however, corrupts the IP
10711	     register, so we must push it first.  */
10712	  insn = emit_multi_reg_push (1 << IP_REGNUM);
10713
10714	  /* Do not set RTX_FRAME_RELATED_P on this insn.
10715	     The dwarf stack unwinding code only wants to see one
10716	     stack decrement per function, and this is not it.  If
10717	     this instruction is labeled as being part of the frame
10718	     creation sequence then dwarf2out_frame_debug_expr will
10719	     die when it encounters the assignment of IP to FP
10720	     later on, since the use of SP here establishes SP as
10721	     the CFA register and not IP.
10722
10723	     Anyway this instruction is not really part of the stack
10724	     frame creation although it is part of the prologue.  */
10725	}
10726      else if (IS_NESTED (func_type))
10727	{
10728	  /* The Static chain register is the same as the IP register
10729	     used as a scratch register during stack frame creation.
10730	     To get around this need to find somewhere to store IP
10731	     whilst the frame is being created.  We try the following
10732	     places in order:
10733
10734	       1. The last argument register.
10735	       2. A slot on the stack above the frame.  (This only
10736	          works if the function is not a varargs function).
10737	       3. Register r3, after pushing the argument registers
10738	          onto the stack.
10739
10740	     Note - we only need to tell the dwarf2 backend about the SP
10741	     adjustment in the second variant; the static chain register
10742	     doesn't need to be unwound, as it doesn't contain a value
10743	     inherited from the caller.  */
10744
10745	  if (regs_ever_live[3] == 0)
10746	    insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
10747	  else if (args_to_push == 0)
10748	    {
10749	      rtx dwarf;
10750
10751	      insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
10752	      insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
10753	      fp_offset = 4;
10754
10755	      /* Just tell the dwarf backend that we adjusted SP.  */
10756	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10757				   plus_constant (stack_pointer_rtx,
10758						  -fp_offset));
10759	      RTX_FRAME_RELATED_P (insn) = 1;
10760	      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
10761						    dwarf, REG_NOTES (insn));
10762	    }
10763	  else
10764	    {
10765	      /* Store the args on the stack.  */
10766	      if (cfun->machine->uses_anonymous_args)
10767		insn = emit_multi_reg_push
10768		  ((0xf0 >> (args_to_push / 4)) & 0xf);
10769	      else
10770		insn = emit_insn
10771		  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10772			       GEN_INT (- args_to_push)));
10773
10774	      RTX_FRAME_RELATED_P (insn) = 1;
10775
10776	      saved_pretend_args = 1;
10777	      fp_offset = args_to_push;
10778	      args_to_push = 0;
10779
10780	      /* Now reuse r3 to preserve IP.  */
10781	      emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
10782	    }
10783	}
10784
10785      insn = emit_set_insn (ip_rtx,
10786			    plus_constant (stack_pointer_rtx, fp_offset));
10787      RTX_FRAME_RELATED_P (insn) = 1;
10788    }
10789
10790  if (args_to_push)
10791    {
10792      /* Push the argument registers, or reserve space for them.  */
10793      if (cfun->machine->uses_anonymous_args)
10794	insn = emit_multi_reg_push
10795	  ((0xf0 >> (args_to_push / 4)) & 0xf);
10796      else
10797	insn = emit_insn
10798	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10799		       GEN_INT (- args_to_push)));
10800      RTX_FRAME_RELATED_P (insn) = 1;
10801    }
10802
10803  /* If this is an interrupt service routine, and the link register
10804     is going to be pushed, and we are not creating a stack frame,
10805     (which would involve an extra push of IP and a pop in the epilogue)
10806     subtracting four from LR now will mean that the function return
10807     can be done with a single instruction.  */
10808  if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
10809      && (live_regs_mask & (1 << LR_REGNUM)) != 0
10810      && ! frame_pointer_needed)
10811    {
10812      rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
10813
10814      emit_set_insn (lr, plus_constant (lr, -4));
10815    }
10816
10817  if (live_regs_mask)
10818    {
10819      insn = emit_multi_reg_push (live_regs_mask);
10820      saved_regs += bit_count (live_regs_mask) * 4;
10821      RTX_FRAME_RELATED_P (insn) = 1;
10822    }
10823
10824  if (TARGET_IWMMXT)
10825    for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
10826      if (regs_ever_live[reg] && ! call_used_regs [reg])
10827	{
10828	  insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
10829	  insn = gen_frame_mem (V2SImode, insn);
10830	  insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
10831	  RTX_FRAME_RELATED_P (insn) = 1;
10832	  saved_regs += 8;
10833	}
10834
10835  if (! IS_VOLATILE (func_type))
10836    {
10837      int start_reg;
10838
10839      /* Save any floating point call-saved registers used by this
10840	 function.  */
10841      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
10842	{
10843	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10844	    if (regs_ever_live[reg] && !call_used_regs[reg])
10845	      {
10846		insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
10847		insn = gen_frame_mem (XFmode, insn);
10848		insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
10849		RTX_FRAME_RELATED_P (insn) = 1;
10850		saved_regs += 12;
10851	      }
10852	}
10853      else
10854	{
10855	  start_reg = LAST_FPA_REGNUM;
10856
10857	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10858	    {
10859	      if (regs_ever_live[reg] && !call_used_regs[reg])
10860		{
10861		  if (start_reg - reg == 3)
10862		    {
10863		      insn = emit_sfm (reg, 4);
10864		      RTX_FRAME_RELATED_P (insn) = 1;
10865		      saved_regs += 48;
10866		      start_reg = reg - 1;
10867		    }
10868		}
10869	      else
10870		{
10871		  if (start_reg != reg)
10872		    {
10873		      insn = emit_sfm (reg + 1, start_reg - reg);
10874		      RTX_FRAME_RELATED_P (insn) = 1;
10875		      saved_regs += (start_reg - reg) * 12;
10876		    }
10877		  start_reg = reg - 1;
10878		}
10879	    }
10880
10881	  if (start_reg != reg)
10882	    {
10883	      insn = emit_sfm (reg + 1, start_reg - reg);
10884	      saved_regs += (start_reg - reg) * 12;
10885	      RTX_FRAME_RELATED_P (insn) = 1;
10886	    }
10887	}
10888      if (TARGET_HARD_FLOAT && TARGET_VFP)
10889	{
10890	  start_reg = FIRST_VFP_REGNUM;
10891
10892 	  for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
10893	    {
10894	      if ((!regs_ever_live[reg] || call_used_regs[reg])
10895		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
10896		{
10897		  if (start_reg != reg)
10898		    saved_regs += vfp_emit_fstmx (start_reg,
10899						  (reg - start_reg) / 2);
10900		  start_reg = reg + 2;
10901		}
10902	    }
10903	  if (start_reg != reg)
10904	    saved_regs += vfp_emit_fstmx (start_reg,
10905					  (reg - start_reg) / 2);
10906	}
10907    }
10908
10909  if (frame_pointer_needed)
10910    {
10911      /* Create the new frame pointer.  */
10912      insn = GEN_INT (-(4 + args_to_push + fp_offset));
10913      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
10914      RTX_FRAME_RELATED_P (insn) = 1;
10915
10916      if (IS_NESTED (func_type))
10917	{
10918	  /* Recover the static chain register.  */
10919	  if (regs_ever_live [3] == 0
10920	      || saved_pretend_args)
10921	    insn = gen_rtx_REG (SImode, 3);
10922	  else /* if (current_function_pretend_args_size == 0) */
10923	    {
10924	      insn = plus_constant (hard_frame_pointer_rtx, 4);
10925	      insn = gen_frame_mem (SImode, insn);
10926	    }
10927
10928	  emit_set_insn (ip_rtx, insn);
10929	  /* Add a USE to stop propagate_one_insn() from barfing.  */
10930	  emit_insn (gen_prologue_use (ip_rtx));
10931	}
10932    }
10933
10934  offsets = arm_get_frame_offsets ();
10935  if (offsets->outgoing_args != offsets->saved_args + saved_regs)
10936    {
10937      /* This add can produce multiple insns for a large constant, so we
10938	 need to get tricky.  */
10939      rtx last = get_last_insn ();
10940
10941      amount = GEN_INT (offsets->saved_args + saved_regs
10942			- offsets->outgoing_args);
10943
10944      insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10945				    amount));
10946      do
10947	{
10948	  last = last ? NEXT_INSN (last) : get_insns ();
10949	  RTX_FRAME_RELATED_P (last) = 1;
10950	}
10951      while (last != insn);
10952
10953      /* If the frame pointer is needed, emit a special barrier that
10954	 will prevent the scheduler from moving stores to the frame
10955	 before the stack adjustment.  */
10956      if (frame_pointer_needed)
10957	insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
10958					 hard_frame_pointer_rtx));
10959    }
10960
10961
10962  if (flag_pic && arm_pic_register != INVALID_REGNUM)
10963    arm_load_pic_register (0UL);
10964
10965  /* If we are profiling, make sure no instructions are scheduled before
10966     the call to mcount.  Similarly if the user has requested no
10967     scheduling in the prolog.  Similarly if we want non-call exceptions
10968     using the EABI unwinder, to prevent faulting instructions from being
10969     swapped with a stack adjustment.  */
10970  if (current_function_profile || !TARGET_SCHED_PROLOG
10971      || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
10972    emit_insn (gen_blockage ());
10973
10974  /* If the link register is being kept alive, with the return address in it,
10975     then make sure that it does not get reused by the ce2 pass.  */
10976  if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
10977    {
10978      emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
10979      cfun->machine->lr_save_eliminated = 1;
10980    }
10981}
10982
10983/* If CODE is 'd', then the X is a condition operand and the instruction
10984   should only be executed if the condition is true.
10985   if CODE is 'D', then the X is a condition operand and the instruction
10986   should only be executed if the condition is false: however, if the mode
10987   of the comparison is CCFPEmode, then always execute the instruction -- we
10988   do this because in these circumstances !GE does not necessarily imply LT;
10989   in these cases the instruction pattern will take care to make sure that
10990   an instruction containing %d will follow, thereby undoing the effects of
10991   doing this instruction unconditionally.
10992   If CODE is 'N' then X is a floating point operand that must be negated
10993   before output.
10994   If CODE is 'B' then output a bitwise inverted value of X (a const int).
10995   If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
10996void
10997arm_print_operand (FILE *stream, rtx x, int code)
10998{
10999  switch (code)
11000    {
11001    case '@':
11002      fputs (ASM_COMMENT_START, stream);
11003      return;
11004
11005    case '_':
11006      fputs (user_label_prefix, stream);
11007      return;
11008
11009    case '|':
11010      fputs (REGISTER_PREFIX, stream);
11011      return;
11012
11013    case '?':
11014      if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
11015	{
11016	  if (TARGET_THUMB)
11017	    {
11018	      output_operand_lossage ("predicated Thumb instruction");
11019	      break;
11020	    }
11021	  if (current_insn_predicate != NULL)
11022	    {
11023	      output_operand_lossage
11024		("predicated instruction in conditional sequence");
11025	      break;
11026	    }
11027
11028	  fputs (arm_condition_codes[arm_current_cc], stream);
11029	}
11030      else if (current_insn_predicate)
11031	{
11032	  enum arm_cond_code code;
11033
11034	  if (TARGET_THUMB)
11035	    {
11036	      output_operand_lossage ("predicated Thumb instruction");
11037	      break;
11038	    }
11039
11040	  code = get_arm_condition_code (current_insn_predicate);
11041	  fputs (arm_condition_codes[code], stream);
11042	}
11043      return;
11044
11045    case 'N':
11046      {
11047	REAL_VALUE_TYPE r;
11048	REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11049	r = REAL_VALUE_NEGATE (r);
11050	fprintf (stream, "%s", fp_const_from_val (&r));
11051      }
11052      return;
11053
11054    case 'B':
11055      if (GET_CODE (x) == CONST_INT)
11056	{
11057	  HOST_WIDE_INT val;
11058	  val = ARM_SIGN_EXTEND (~INTVAL (x));
11059	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
11060	}
11061      else
11062	{
11063	  putc ('~', stream);
11064	  output_addr_const (stream, x);
11065	}
11066      return;
11067
11068    case 'i':
11069      fprintf (stream, "%s", arithmetic_instr (x, 1));
11070      return;
11071
11072    /* Truncate Cirrus shift counts.  */
11073    case 's':
11074      if (GET_CODE (x) == CONST_INT)
11075	{
11076	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
11077	  return;
11078	}
11079      arm_print_operand (stream, x, 0);
11080      return;
11081
11082    case 'I':
11083      fprintf (stream, "%s", arithmetic_instr (x, 0));
11084      return;
11085
11086    case 'S':
11087      {
11088	HOST_WIDE_INT val;
11089	const char *shift;
11090
11091	if (!shift_operator (x, SImode))
11092	  {
11093	    output_operand_lossage ("invalid shift operand");
11094	    break;
11095	  }
11096
11097	shift = shift_op (x, &val);
11098
11099	if (shift)
11100	  {
11101	    fprintf (stream, ", %s ", shift);
11102	    if (val == -1)
11103	      arm_print_operand (stream, XEXP (x, 1), 0);
11104	    else
11105	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
11106	  }
11107      }
11108      return;
11109
11110      /* An explanation of the 'Q', 'R' and 'H' register operands:
11111
11112	 In a pair of registers containing a DI or DF value the 'Q'
11113	 operand returns the register number of the register containing
11114	 the least significant part of the value.  The 'R' operand returns
11115	 the register number of the register containing the most
11116	 significant part of the value.
11117
11118	 The 'H' operand returns the higher of the two register numbers.
11119	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
11120	 same as the 'Q' operand, since the most significant part of the
11121	 value is held in the lower number register.  The reverse is true
11122	 on systems where WORDS_BIG_ENDIAN is false.
11123
11124	 The purpose of these operands is to distinguish between cases
11125	 where the endian-ness of the values is important (for example
11126	 when they are added together), and cases where the endian-ness
11127	 is irrelevant, but the order of register operations is important.
11128	 For example when loading a value from memory into a register
11129	 pair, the endian-ness does not matter.  Provided that the value
11130	 from the lower memory address is put into the lower numbered
11131	 register, and the value from the higher address is put into the
11132	 higher numbered register, the load will work regardless of whether
11133	 the value being loaded is big-wordian or little-wordian.  The
11134	 order of the two register loads can matter however, if the address
11135	 of the memory location is actually held in one of the registers
11136	 being overwritten by the load.  */
11137    case 'Q':
11138      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
11139	{
11140	  output_operand_lossage ("invalid operand for code '%c'", code);
11141	  return;
11142	}
11143
11144      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
11145      return;
11146
11147    case 'R':
11148      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
11149	{
11150	  output_operand_lossage ("invalid operand for code '%c'", code);
11151	  return;
11152	}
11153
11154      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
11155      return;
11156
11157    case 'H':
11158      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
11159	{
11160	  output_operand_lossage ("invalid operand for code '%c'", code);
11161	  return;
11162	}
11163
11164      asm_fprintf (stream, "%r", REGNO (x) + 1);
11165      return;
11166
11167    case 'm':
11168      asm_fprintf (stream, "%r",
11169		   GET_CODE (XEXP (x, 0)) == REG
11170		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
11171      return;
11172
11173    case 'M':
11174      asm_fprintf (stream, "{%r-%r}",
11175		   REGNO (x),
11176		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
11177      return;
11178
11179    case 'd':
11180      /* CONST_TRUE_RTX means always -- that's the default.  */
11181      if (x == const_true_rtx)
11182	return;
11183
11184      if (!COMPARISON_P (x))
11185	{
11186	  output_operand_lossage ("invalid operand for code '%c'", code);
11187	  return;
11188	}
11189
11190      fputs (arm_condition_codes[get_arm_condition_code (x)],
11191	     stream);
11192      return;
11193
11194    case 'D':
11195      /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
11196	 want to do that.  */
11197      if (x == const_true_rtx)
11198	{
11199	  output_operand_lossage ("instruction never exectued");
11200	  return;
11201	}
11202      if (!COMPARISON_P (x))
11203	{
11204	  output_operand_lossage ("invalid operand for code '%c'", code);
11205	  return;
11206	}
11207
11208      fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
11209				 (get_arm_condition_code (x))],
11210	     stream);
11211      return;
11212
11213    /* Cirrus registers can be accessed in a variety of ways:
11214         single floating point (f)
11215	 double floating point (d)
11216	 32bit integer         (fx)
11217	 64bit integer         (dx).  */
11218    case 'W':			/* Cirrus register in F mode.  */
11219    case 'X':			/* Cirrus register in D mode.  */
11220    case 'Y':			/* Cirrus register in FX mode.  */
11221    case 'Z':			/* Cirrus register in DX mode.  */
11222      gcc_assert (GET_CODE (x) == REG
11223		  && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
11224
11225      fprintf (stream, "mv%s%s",
11226	       code == 'W' ? "f"
11227	       : code == 'X' ? "d"
11228	       : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
11229
11230      return;
11231
11232    /* Print cirrus register in the mode specified by the register's mode.  */
11233    case 'V':
11234      {
11235	int mode = GET_MODE (x);
11236
11237	if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
11238	  {
11239	    output_operand_lossage ("invalid operand for code '%c'", code);
11240	    return;
11241	  }
11242
11243	fprintf (stream, "mv%s%s",
11244		 mode == DFmode ? "d"
11245		 : mode == SImode ? "fx"
11246		 : mode == DImode ? "dx"
11247		 : "f", reg_names[REGNO (x)] + 2);
11248
11249	return;
11250      }
11251
11252    case 'U':
11253      if (GET_CODE (x) != REG
11254	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
11255	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
11256	/* Bad value for wCG register number.  */
11257	{
11258	  output_operand_lossage ("invalid operand for code '%c'", code);
11259	  return;
11260	}
11261
11262      else
11263	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
11264      return;
11265
11266      /* Print an iWMMXt control register name.  */
11267    case 'w':
11268      if (GET_CODE (x) != CONST_INT
11269	  || INTVAL (x) < 0
11270	  || INTVAL (x) >= 16)
11271	/* Bad value for wC register number.  */
11272	{
11273	  output_operand_lossage ("invalid operand for code '%c'", code);
11274	  return;
11275	}
11276
11277      else
11278	{
11279	  static const char * wc_reg_names [16] =
11280	    {
11281	      "wCID",  "wCon",  "wCSSF", "wCASF",
11282	      "wC4",   "wC5",   "wC6",   "wC7",
11283	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
11284	      "wC12",  "wC13",  "wC14",  "wC15"
11285	    };
11286
11287	  fprintf (stream, wc_reg_names [INTVAL (x)]);
11288	}
11289      return;
11290
11291      /* Print a VFP double precision register name.  */
11292    case 'P':
11293      {
11294	int mode = GET_MODE (x);
11295	int num;
11296
11297	if (mode != DImode && mode != DFmode)
11298	  {
11299	    output_operand_lossage ("invalid operand for code '%c'", code);
11300	    return;
11301	  }
11302
11303	if (GET_CODE (x) != REG
11304	    || !IS_VFP_REGNUM (REGNO (x)))
11305	  {
11306	    output_operand_lossage ("invalid operand for code '%c'", code);
11307	    return;
11308	  }
11309
11310	num = REGNO(x) - FIRST_VFP_REGNUM;
11311	if (num & 1)
11312	  {
11313	    output_operand_lossage ("invalid operand for code '%c'", code);
11314	    return;
11315	  }
11316
11317	fprintf (stream, "d%d", num >> 1);
11318      }
11319      return;
11320
11321    default:
11322      if (x == 0)
11323	{
11324	  output_operand_lossage ("missing operand");
11325	  return;
11326	}
11327
11328      switch (GET_CODE (x))
11329	{
11330	case REG:
11331	  asm_fprintf (stream, "%r", REGNO (x));
11332	  break;
11333
11334	case MEM:
11335	  output_memory_reference_mode = GET_MODE (x);
11336	  output_address (XEXP (x, 0));
11337	  break;
11338
11339	case CONST_DOUBLE:
11340	  fprintf (stream, "#%s", fp_immediate_constant (x));
11341	  break;
11342
11343	default:
11344	  gcc_assert (GET_CODE (x) != NEG);
11345	  fputc ('#', stream);
11346	  output_addr_const (stream, x);
11347	  break;
11348	}
11349    }
11350}
11351
11352#ifndef AOF_ASSEMBLER
11353/* Target hook for assembling integer objects.  The ARM version needs to
11354   handle word-sized values specially.  */
11355static bool
11356arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
11357{
11358  if (size == UNITS_PER_WORD && aligned_p)
11359    {
11360      fputs ("\t.word\t", asm_out_file);
11361      output_addr_const (asm_out_file, x);
11362
11363      /* Mark symbols as position independent.  We only do this in the
11364	 .text segment, not in the .data segment.  */
11365      if (NEED_GOT_RELOC && flag_pic && making_const_table &&
11366	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
11367	{
11368	  if (GET_CODE (x) == SYMBOL_REF
11369	      && (CONSTANT_POOL_ADDRESS_P (x)
11370		  || SYMBOL_REF_LOCAL_P (x)))
11371	    fputs ("(GOTOFF)", asm_out_file);
11372	  else if (GET_CODE (x) == LABEL_REF)
11373	    fputs ("(GOTOFF)", asm_out_file);
11374	  else
11375	    fputs ("(GOT)", asm_out_file);
11376	}
11377      fputc ('\n', asm_out_file);
11378      return true;
11379    }
11380
11381  if (arm_vector_mode_supported_p (GET_MODE (x)))
11382    {
11383      int i, units;
11384
11385      gcc_assert (GET_CODE (x) == CONST_VECTOR);
11386
11387      units = CONST_VECTOR_NUNITS (x);
11388
11389      switch (GET_MODE (x))
11390	{
11391	case V2SImode: size = 4; break;
11392	case V4HImode: size = 2; break;
11393	case V8QImode: size = 1; break;
11394	default:
11395	  gcc_unreachable ();
11396	}
11397
11398      for (i = 0; i < units; i++)
11399	{
11400	  rtx elt;
11401
11402	  elt = CONST_VECTOR_ELT (x, i);
11403	  assemble_integer
11404	    (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
11405	}
11406
11407      return true;
11408    }
11409
11410  return default_assemble_integer (x, size, aligned_p);
11411}
11412
11413
11414/* Add a function to the list of static constructors.  */
11415
11416static void
11417arm_elf_asm_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
11418{
11419  if (!TARGET_AAPCS_BASED)
11420    {
11421      default_named_section_asm_out_constructor (symbol, priority);
11422      return;
11423    }
11424
11425  /* Put these in the .init_array section, using a special relocation.  */
11426  switch_to_section (ctors_section);
11427  assemble_align (POINTER_SIZE);
11428  fputs ("\t.word\t", asm_out_file);
11429  output_addr_const (asm_out_file, symbol);
11430  fputs ("(target1)\n", asm_out_file);
11431}
11432#endif
11433
11434/* A finite state machine takes care of noticing whether or not instructions
11435   can be conditionally executed, and thus decrease execution time and code
11436   size by deleting branch instructions.  The fsm is controlled by
11437   final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
11438
11439/* The state of the fsm controlling condition codes are:
11440   0: normal, do nothing special
11441   1: make ASM_OUTPUT_OPCODE not output this instruction
11442   2: make ASM_OUTPUT_OPCODE not output this instruction
11443   3: make instructions conditional
11444   4: make instructions conditional
11445
11446   State transitions (state->state by whom under condition):
11447   0 -> 1 final_prescan_insn if the `target' is a label
11448   0 -> 2 final_prescan_insn if the `target' is an unconditional branch
11449   1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
11450   2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
11451   3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
11452          (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
11453   4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
11454          (the target insn is arm_target_insn).
11455
11456   If the jump clobbers the conditions then we use states 2 and 4.
11457
11458   A similar thing can be done with conditional return insns.
11459
11460   XXX In case the `target' is an unconditional branch, this conditionalising
11461   of the instructions always reduces code size, but not always execution
11462   time.  But then, I want to reduce the code size to somewhere near what
11463   /bin/cc produces.  */
11464
11465/* Returns the index of the ARM condition code string in
11466   `arm_condition_codes'.  COMPARISON should be an rtx like
11467   `(eq (...) (...))'.  */
11468static enum arm_cond_code
11469get_arm_condition_code (rtx comparison)
11470{
11471  enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
11472  int code;
11473  enum rtx_code comp_code = GET_CODE (comparison);
11474
11475  if (GET_MODE_CLASS (mode) != MODE_CC)
11476    mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
11477			   XEXP (comparison, 1));
11478
11479  switch (mode)
11480    {
11481    case CC_DNEmode: code = ARM_NE; goto dominance;
11482    case CC_DEQmode: code = ARM_EQ; goto dominance;
11483    case CC_DGEmode: code = ARM_GE; goto dominance;
11484    case CC_DGTmode: code = ARM_GT; goto dominance;
11485    case CC_DLEmode: code = ARM_LE; goto dominance;
11486    case CC_DLTmode: code = ARM_LT; goto dominance;
11487    case CC_DGEUmode: code = ARM_CS; goto dominance;
11488    case CC_DGTUmode: code = ARM_HI; goto dominance;
11489    case CC_DLEUmode: code = ARM_LS; goto dominance;
11490    case CC_DLTUmode: code = ARM_CC;
11491
11492    dominance:
11493      gcc_assert (comp_code == EQ || comp_code == NE);
11494
11495      if (comp_code == EQ)
11496	return ARM_INVERSE_CONDITION_CODE (code);
11497      return code;
11498
11499    case CC_NOOVmode:
11500      switch (comp_code)
11501	{
11502	case NE: return ARM_NE;
11503	case EQ: return ARM_EQ;
11504	case GE: return ARM_PL;
11505	case LT: return ARM_MI;
11506	default: gcc_unreachable ();
11507	}
11508
11509    case CC_Zmode:
11510      switch (comp_code)
11511	{
11512	case NE: return ARM_NE;
11513	case EQ: return ARM_EQ;
11514	default: gcc_unreachable ();
11515	}
11516
11517    case CC_Nmode:
11518      switch (comp_code)
11519	{
11520	case NE: return ARM_MI;
11521	case EQ: return ARM_PL;
11522	default: gcc_unreachable ();
11523	}
11524
11525    case CCFPEmode:
11526    case CCFPmode:
11527      /* These encodings assume that AC=1 in the FPA system control
11528	 byte.  This allows us to handle all cases except UNEQ and
11529	 LTGT.  */
11530      switch (comp_code)
11531	{
11532	case GE: return ARM_GE;
11533	case GT: return ARM_GT;
11534	case LE: return ARM_LS;
11535	case LT: return ARM_MI;
11536	case NE: return ARM_NE;
11537	case EQ: return ARM_EQ;
11538	case ORDERED: return ARM_VC;
11539	case UNORDERED: return ARM_VS;
11540	case UNLT: return ARM_LT;
11541	case UNLE: return ARM_LE;
11542	case UNGT: return ARM_HI;
11543	case UNGE: return ARM_PL;
11544	  /* UNEQ and LTGT do not have a representation.  */
11545	case UNEQ: /* Fall through.  */
11546	case LTGT: /* Fall through.  */
11547	default: gcc_unreachable ();
11548	}
11549
11550    case CC_SWPmode:
11551      switch (comp_code)
11552	{
11553	case NE: return ARM_NE;
11554	case EQ: return ARM_EQ;
11555	case GE: return ARM_LE;
11556	case GT: return ARM_LT;
11557	case LE: return ARM_GE;
11558	case LT: return ARM_GT;
11559	case GEU: return ARM_LS;
11560	case GTU: return ARM_CC;
11561	case LEU: return ARM_CS;
11562	case LTU: return ARM_HI;
11563	default: gcc_unreachable ();
11564	}
11565
11566    case CC_Cmode:
11567      switch (comp_code)
11568      {
11569      case LTU: return ARM_CS;
11570      case GEU: return ARM_CC;
11571      default: gcc_unreachable ();
11572      }
11573
11574    case CCmode:
11575      switch (comp_code)
11576	{
11577	case NE: return ARM_NE;
11578	case EQ: return ARM_EQ;
11579	case GE: return ARM_GE;
11580	case GT: return ARM_GT;
11581	case LE: return ARM_LE;
11582	case LT: return ARM_LT;
11583	case GEU: return ARM_CS;
11584	case GTU: return ARM_HI;
11585	case LEU: return ARM_LS;
11586	case LTU: return ARM_CC;
11587	default: gcc_unreachable ();
11588	}
11589
11590    default: gcc_unreachable ();
11591    }
11592}
11593
11594void
11595arm_final_prescan_insn (rtx insn)
11596{
11597  /* BODY will hold the body of INSN.  */
11598  rtx body = PATTERN (insn);
11599
11600  /* This will be 1 if trying to repeat the trick, and things need to be
11601     reversed if it appears to fail.  */
11602  int reverse = 0;
11603
11604  /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
11605     taken are clobbered, even if the rtl suggests otherwise.  It also
11606     means that we have to grub around within the jump expression to find
11607     out what the conditions are when the jump isn't taken.  */
11608  int jump_clobbers = 0;
11609
11610  /* If we start with a return insn, we only succeed if we find another one.  */
11611  int seeking_return = 0;
11612
11613  /* START_INSN will hold the insn from where we start looking.  This is the
11614     first insn after the following code_label if REVERSE is true.  */
11615  rtx start_insn = insn;
11616
11617  /* If in state 4, check if the target branch is reached, in order to
11618     change back to state 0.  */
11619  if (arm_ccfsm_state == 4)
11620    {
11621      if (insn == arm_target_insn)
11622	{
11623	  arm_target_insn = NULL;
11624	  arm_ccfsm_state = 0;
11625	}
11626      return;
11627    }
11628
11629  /* If in state 3, it is possible to repeat the trick, if this insn is an
11630     unconditional branch to a label, and immediately following this branch
11631     is the previous target label which is only used once, and the label this
11632     branch jumps to is not too far off.  */
11633  if (arm_ccfsm_state == 3)
11634    {
11635      if (simplejump_p (insn))
11636	{
11637	  start_insn = next_nonnote_insn (start_insn);
11638	  if (GET_CODE (start_insn) == BARRIER)
11639	    {
11640	      /* XXX Isn't this always a barrier?  */
11641	      start_insn = next_nonnote_insn (start_insn);
11642	    }
11643	  if (GET_CODE (start_insn) == CODE_LABEL
11644	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11645	      && LABEL_NUSES (start_insn) == 1)
11646	    reverse = TRUE;
11647	  else
11648	    return;
11649	}
11650      else if (GET_CODE (body) == RETURN)
11651        {
11652	  start_insn = next_nonnote_insn (start_insn);
11653	  if (GET_CODE (start_insn) == BARRIER)
11654	    start_insn = next_nonnote_insn (start_insn);
11655	  if (GET_CODE (start_insn) == CODE_LABEL
11656	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11657	      && LABEL_NUSES (start_insn) == 1)
11658	    {
11659	      reverse = TRUE;
11660	      seeking_return = 1;
11661	    }
11662	  else
11663	    return;
11664        }
11665      else
11666	return;
11667    }
11668
11669  gcc_assert (!arm_ccfsm_state || reverse);
11670  if (GET_CODE (insn) != JUMP_INSN)
11671    return;
11672
11673  /* This jump might be paralleled with a clobber of the condition codes
11674     the jump should always come first */
11675  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
11676    body = XVECEXP (body, 0, 0);
11677
11678  if (reverse
11679      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
11680	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
11681    {
11682      int insns_skipped;
11683      int fail = FALSE, succeed = FALSE;
11684      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
11685      int then_not_else = TRUE;
11686      rtx this_insn = start_insn, label = 0;
11687
11688      /* If the jump cannot be done with one instruction, we cannot
11689	 conditionally execute the instruction in the inverse case.  */
11690      if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
11691	{
11692	  jump_clobbers = 1;
11693	  return;
11694	}
11695
11696      /* Register the insn jumped to.  */
11697      if (reverse)
11698        {
11699	  if (!seeking_return)
11700	    label = XEXP (SET_SRC (body), 0);
11701        }
11702      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
11703	label = XEXP (XEXP (SET_SRC (body), 1), 0);
11704      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
11705	{
11706	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
11707	  then_not_else = FALSE;
11708	}
11709      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
11710	seeking_return = 1;
11711      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
11712        {
11713	  seeking_return = 1;
11714	  then_not_else = FALSE;
11715        }
11716      else
11717	gcc_unreachable ();
11718
11719      /* See how many insns this branch skips, and what kind of insns.  If all
11720	 insns are okay, and the label or unconditional branch to the same
11721	 label is not too far away, succeed.  */
11722      for (insns_skipped = 0;
11723	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
11724	{
11725	  rtx scanbody;
11726
11727	  this_insn = next_nonnote_insn (this_insn);
11728	  if (!this_insn)
11729	    break;
11730
11731	  switch (GET_CODE (this_insn))
11732	    {
11733	    case CODE_LABEL:
11734	      /* Succeed if it is the target label, otherwise fail since
11735		 control falls in from somewhere else.  */
11736	      if (this_insn == label)
11737		{
11738		  if (jump_clobbers)
11739		    {
11740		      arm_ccfsm_state = 2;
11741		      this_insn = next_nonnote_insn (this_insn);
11742		    }
11743		  else
11744		    arm_ccfsm_state = 1;
11745		  succeed = TRUE;
11746		}
11747	      else
11748		fail = TRUE;
11749	      break;
11750
11751	    case BARRIER:
11752	      /* Succeed if the following insn is the target label.
11753		 Otherwise fail.
11754		 If return insns are used then the last insn in a function
11755		 will be a barrier.  */
11756	      this_insn = next_nonnote_insn (this_insn);
11757	      if (this_insn && this_insn == label)
11758		{
11759		  if (jump_clobbers)
11760		    {
11761		      arm_ccfsm_state = 2;
11762		      this_insn = next_nonnote_insn (this_insn);
11763		    }
11764		  else
11765		    arm_ccfsm_state = 1;
11766		  succeed = TRUE;
11767		}
11768	      else
11769		fail = TRUE;
11770	      break;
11771
11772	    case CALL_INSN:
11773	      /* The AAPCS says that conditional calls should not be
11774		 used since they make interworking inefficient (the
11775		 linker can't transform BL<cond> into BLX).  That's
11776		 only a problem if the machine has BLX.  */
11777	      if (arm_arch5)
11778		{
11779		  fail = TRUE;
11780		  break;
11781		}
11782
11783	      /* Succeed if the following insn is the target label, or
11784		 if the following two insns are a barrier and the
11785		 target label.  */
11786	      this_insn = next_nonnote_insn (this_insn);
11787	      if (this_insn && GET_CODE (this_insn) == BARRIER)
11788		this_insn = next_nonnote_insn (this_insn);
11789
11790	      if (this_insn && this_insn == label
11791		  && insns_skipped < max_insns_skipped)
11792		{
11793		  if (jump_clobbers)
11794		    {
11795		      arm_ccfsm_state = 2;
11796		      this_insn = next_nonnote_insn (this_insn);
11797		    }
11798		  else
11799		    arm_ccfsm_state = 1;
11800		  succeed = TRUE;
11801		}
11802	      else
11803		fail = TRUE;
11804	      break;
11805
11806	    case JUMP_INSN:
11807      	      /* If this is an unconditional branch to the same label, succeed.
11808		 If it is to another label, do nothing.  If it is conditional,
11809		 fail.  */
11810	      /* XXX Probably, the tests for SET and the PC are
11811		 unnecessary.  */
11812
11813	      scanbody = PATTERN (this_insn);
11814	      if (GET_CODE (scanbody) == SET
11815		  && GET_CODE (SET_DEST (scanbody)) == PC)
11816		{
11817		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
11818		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
11819		    {
11820		      arm_ccfsm_state = 2;
11821		      succeed = TRUE;
11822		    }
11823		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
11824		    fail = TRUE;
11825		}
11826	      /* Fail if a conditional return is undesirable (e.g. on a
11827		 StrongARM), but still allow this if optimizing for size.  */
11828	      else if (GET_CODE (scanbody) == RETURN
11829		       && !use_return_insn (TRUE, NULL)
11830		       && !optimize_size)
11831		fail = TRUE;
11832	      else if (GET_CODE (scanbody) == RETURN
11833		       && seeking_return)
11834	        {
11835		  arm_ccfsm_state = 2;
11836		  succeed = TRUE;
11837	        }
11838	      else if (GET_CODE (scanbody) == PARALLEL)
11839	        {
11840		  switch (get_attr_conds (this_insn))
11841		    {
11842		    case CONDS_NOCOND:
11843		      break;
11844		    default:
11845		      fail = TRUE;
11846		      break;
11847		    }
11848		}
11849	      else
11850		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
11851
11852	      break;
11853
11854	    case INSN:
11855	      /* Instructions using or affecting the condition codes make it
11856		 fail.  */
11857	      scanbody = PATTERN (this_insn);
11858	      if (!(GET_CODE (scanbody) == SET
11859		    || GET_CODE (scanbody) == PARALLEL)
11860		  || get_attr_conds (this_insn) != CONDS_NOCOND)
11861		fail = TRUE;
11862
11863	      /* A conditional cirrus instruction must be followed by
11864		 a non Cirrus instruction.  However, since we
11865		 conditionalize instructions in this function and by
11866		 the time we get here we can't add instructions
11867		 (nops), because shorten_branches() has already been
11868		 called, we will disable conditionalizing Cirrus
11869		 instructions to be safe.  */
11870	      if (GET_CODE (scanbody) != USE
11871		  && GET_CODE (scanbody) != CLOBBER
11872		  && get_attr_cirrus (this_insn) != CIRRUS_NOT)
11873		fail = TRUE;
11874	      break;
11875
11876	    default:
11877	      break;
11878	    }
11879	}
11880      if (succeed)
11881	{
11882	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
11883	    arm_target_label = CODE_LABEL_NUMBER (label);
11884	  else
11885	    {
11886	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
11887
11888	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
11889	        {
11890		  this_insn = next_nonnote_insn (this_insn);
11891		  gcc_assert (!this_insn
11892			      || (GET_CODE (this_insn) != BARRIER
11893				  && GET_CODE (this_insn) != CODE_LABEL));
11894	        }
11895	      if (!this_insn)
11896	        {
11897		  /* Oh, dear! we ran off the end.. give up.  */
11898		  recog (PATTERN (insn), insn, NULL);
11899		  arm_ccfsm_state = 0;
11900		  arm_target_insn = NULL;
11901		  return;
11902	        }
11903	      arm_target_insn = this_insn;
11904	    }
11905	  if (jump_clobbers)
11906	    {
11907	      gcc_assert (!reverse);
11908	      arm_current_cc =
11909		  get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
11910							    0), 0), 1));
11911	      if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
11912		arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11913	      if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
11914		arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11915	    }
11916	  else
11917	    {
11918	      /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
11919		 what it was.  */
11920	      if (!reverse)
11921		arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
11922							       0));
11923	    }
11924
11925	  if (reverse || then_not_else)
11926	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11927	}
11928
11929      /* Restore recog_data (getting the attributes of other insns can
11930	 destroy this array, but final.c assumes that it remains intact
11931	 across this call; since the insn has been recognized already we
11932	 call recog direct).  */
11933      recog (PATTERN (insn), insn, NULL);
11934    }
11935}
11936
11937/* Returns true if REGNO is a valid register
11938   for holding a quantity of type MODE.  */
11939int
11940arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11941{
11942  if (GET_MODE_CLASS (mode) == MODE_CC)
11943    return (regno == CC_REGNUM
11944	    || (TARGET_HARD_FLOAT && TARGET_VFP
11945		&& regno == VFPCC_REGNUM));
11946
11947  if (TARGET_THUMB)
11948    /* For the Thumb we only allow values bigger than SImode in
11949       registers 0 - 6, so that there is always a second low
11950       register available to hold the upper part of the value.
11951       We probably we ought to ensure that the register is the
11952       start of an even numbered register pair.  */
11953    return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
11954
11955  if (TARGET_HARD_FLOAT && TARGET_MAVERICK
11956      && IS_CIRRUS_REGNUM (regno))
11957    /* We have outlawed SI values in Cirrus registers because they
11958       reside in the lower 32 bits, but SF values reside in the
11959       upper 32 bits.  This causes gcc all sorts of grief.  We can't
11960       even split the registers into pairs because Cirrus SI values
11961       get sign extended to 64bits-- aldyh.  */
11962    return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
11963
11964  if (TARGET_HARD_FLOAT && TARGET_VFP
11965      && IS_VFP_REGNUM (regno))
11966    {
11967      if (mode == SFmode || mode == SImode)
11968	return TRUE;
11969
11970      /* DFmode values are only valid in even register pairs.  */
11971      if (mode == DFmode)
11972	return ((regno - FIRST_VFP_REGNUM) & 1) == 0;
11973      return FALSE;
11974    }
11975
11976  if (TARGET_REALLY_IWMMXT)
11977    {
11978      if (IS_IWMMXT_GR_REGNUM (regno))
11979	return mode == SImode;
11980
11981      if (IS_IWMMXT_REGNUM (regno))
11982	return VALID_IWMMXT_REG_MODE (mode);
11983    }
11984
11985  /* We allow any value to be stored in the general registers.
11986     Restrict doubleword quantities to even register pairs so that we can
11987     use ldrd.  */
11988  if (regno <= LAST_ARM_REGNUM)
11989    return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
11990
11991  if (regno == FRAME_POINTER_REGNUM
11992      || regno == ARG_POINTER_REGNUM)
11993    /* We only allow integers in the fake hard registers.  */
11994    return GET_MODE_CLASS (mode) == MODE_INT;
11995
11996  /* The only registers left are the FPA registers
11997     which we only allow to hold FP values.  */
11998  return (TARGET_HARD_FLOAT && TARGET_FPA
11999	  && GET_MODE_CLASS (mode) == MODE_FLOAT
12000	  && regno >= FIRST_FPA_REGNUM
12001	  && regno <= LAST_FPA_REGNUM);
12002}
12003
12004int
12005arm_regno_class (int regno)
12006{
12007  if (TARGET_THUMB)
12008    {
12009      if (regno == STACK_POINTER_REGNUM)
12010	return STACK_REG;
12011      if (regno == CC_REGNUM)
12012	return CC_REG;
12013      if (regno < 8)
12014	return LO_REGS;
12015      return HI_REGS;
12016    }
12017
12018  if (   regno <= LAST_ARM_REGNUM
12019      || regno == FRAME_POINTER_REGNUM
12020      || regno == ARG_POINTER_REGNUM)
12021    return GENERAL_REGS;
12022
12023  if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
12024    return NO_REGS;
12025
12026  if (IS_CIRRUS_REGNUM (regno))
12027    return CIRRUS_REGS;
12028
12029  if (IS_VFP_REGNUM (regno))
12030    return VFP_REGS;
12031
12032  if (IS_IWMMXT_REGNUM (regno))
12033    return IWMMXT_REGS;
12034
12035  if (IS_IWMMXT_GR_REGNUM (regno))
12036    return IWMMXT_GR_REGS;
12037
12038  return FPA_REGS;
12039}
12040
12041/* Handle a special case when computing the offset
12042   of an argument from the frame pointer.  */
12043int
12044arm_debugger_arg_offset (int value, rtx addr)
12045{
12046  rtx insn;
12047
12048  /* We are only interested if dbxout_parms() failed to compute the offset.  */
12049  if (value != 0)
12050    return 0;
12051
12052  /* We can only cope with the case where the address is held in a register.  */
12053  if (GET_CODE (addr) != REG)
12054    return 0;
12055
12056  /* If we are using the frame pointer to point at the argument, then
12057     an offset of 0 is correct.  */
12058  if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
12059    return 0;
12060
12061  /* If we are using the stack pointer to point at the
12062     argument, then an offset of 0 is correct.  */
12063  if ((TARGET_THUMB || !frame_pointer_needed)
12064      && REGNO (addr) == SP_REGNUM)
12065    return 0;
12066
12067  /* Oh dear.  The argument is pointed to by a register rather
12068     than being held in a register, or being stored at a known
12069     offset from the frame pointer.  Since GDB only understands
12070     those two kinds of argument we must translate the address
12071     held in the register into an offset from the frame pointer.
12072     We do this by searching through the insns for the function
12073     looking to see where this register gets its value.  If the
12074     register is initialized from the frame pointer plus an offset
12075     then we are in luck and we can continue, otherwise we give up.
12076
12077     This code is exercised by producing debugging information
12078     for a function with arguments like this:
12079
12080           double func (double a, double b, int c, double d) {return d;}
12081
12082     Without this code the stab for parameter 'd' will be set to
12083     an offset of 0 from the frame pointer, rather than 8.  */
12084
12085  /* The if() statement says:
12086
12087     If the insn is a normal instruction
12088     and if the insn is setting the value in a register
12089     and if the register being set is the register holding the address of the argument
12090     and if the address is computing by an addition
12091     that involves adding to a register
12092     which is the frame pointer
12093     a constant integer
12094
12095     then...  */
12096
12097  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12098    {
12099      if (   GET_CODE (insn) == INSN
12100	  && GET_CODE (PATTERN (insn)) == SET
12101	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
12102	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
12103	  && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
12104	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
12105	  && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
12106	     )
12107	{
12108	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
12109
12110	  break;
12111	}
12112    }
12113
12114  if (value == 0)
12115    {
12116      debug_rtx (addr);
12117      warning (0, "unable to compute real location of stacked parameter");
12118      value = 8; /* XXX magic hack */
12119    }
12120
12121  return value;
12122}
12123
12124#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
12125  do									\
12126    {									\
12127      if ((MASK) & insn_flags)						\
12128        lang_hooks.builtin_function ((NAME), (TYPE), (CODE),		\
12129				     BUILT_IN_MD, NULL, NULL_TREE);	\
12130    }									\
12131  while (0)
12132
12133struct builtin_description
12134{
12135  const unsigned int       mask;
12136  const enum insn_code     icode;
12137  const char * const       name;
12138  const enum arm_builtins  code;
12139  const enum rtx_code      comparison;
12140  const unsigned int       flag;
12141};
12142
12143static const struct builtin_description bdesc_2arg[] =
12144{
12145#define IWMMXT_BUILTIN(code, string, builtin) \
12146  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
12147    ARM_BUILTIN_##builtin, 0, 0 },
12148
12149  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
12150  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
12151  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
12152  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
12153  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
12154  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
12155  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
12156  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
12157  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
12158  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
12159  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
12160  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
12161  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
12162  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
12163  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
12164  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
12165  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
12166  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
12167  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
12168  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
12169  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
12170  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
12171  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
12172  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
12173  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
12174  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
12175  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
12176  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
12177  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
12178  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
12179  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
12180  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
12181  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
12182  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
12183  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
12184  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
12185  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
12186  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
12187  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
12188  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
12189  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
12190  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
12191  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
12192  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
12193  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
12194  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
12195  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
12196  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
12197  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
12198  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
12199  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
12200  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
12201  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
12202  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
12203  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
12204  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
12205  IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
12206  IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
12207
12208#define IWMMXT_BUILTIN2(code, builtin) \
12209  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
12210
12211  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
12212  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
12213  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
12214  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
12215  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
12216  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
12217  IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
12218  IWMMXT_BUILTIN2 (ashlv4hi3,       WSLLHI)
12219  IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
12220  IWMMXT_BUILTIN2 (ashlv2si3,       WSLLWI)
12221  IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
12222  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
12223  IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
12224  IWMMXT_BUILTIN2 (lshrv4hi3,       WSRLHI)
12225  IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
12226  IWMMXT_BUILTIN2 (lshrv2si3,       WSRLWI)
12227  IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
12228  IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,  WSRLDI)
12229  IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
12230  IWMMXT_BUILTIN2 (ashrv4hi3,       WSRAHI)
12231  IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
12232  IWMMXT_BUILTIN2 (ashrv2si3,       WSRAWI)
12233  IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
12234  IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,  WSRADI)
12235  IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
12236  IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
12237  IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
12238  IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
12239  IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
12240  IWMMXT_BUILTIN2 (rordi3,          WRORDI)
12241  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
12242  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
12243};
12244
12245static const struct builtin_description bdesc_1arg[] =
12246{
12247  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
12248  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
12249  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
12250  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
12251  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
12252  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
12253  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
12254  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
12255  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
12256  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
12257  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
12258  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
12259  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
12260  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
12261  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
12262  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
12263  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
12264  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
12265};
12266
12267/* Set up all the iWMMXt builtins.  This is
12268   not called if TARGET_IWMMXT is zero.  */
12269
12270static void
12271arm_init_iwmmxt_builtins (void)
12272{
12273  const struct builtin_description * d;
12274  size_t i;
12275  tree endlink = void_list_node;
12276
12277  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12278  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12279  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12280
12281  tree int_ftype_int
12282    = build_function_type (integer_type_node,
12283			   tree_cons (NULL_TREE, integer_type_node, endlink));
12284  tree v8qi_ftype_v8qi_v8qi_int
12285    = build_function_type (V8QI_type_node,
12286			   tree_cons (NULL_TREE, V8QI_type_node,
12287				      tree_cons (NULL_TREE, V8QI_type_node,
12288						 tree_cons (NULL_TREE,
12289							    integer_type_node,
12290							    endlink))));
12291  tree v4hi_ftype_v4hi_int
12292    = build_function_type (V4HI_type_node,
12293			   tree_cons (NULL_TREE, V4HI_type_node,
12294				      tree_cons (NULL_TREE, integer_type_node,
12295						 endlink)));
12296  tree v2si_ftype_v2si_int
12297    = build_function_type (V2SI_type_node,
12298			   tree_cons (NULL_TREE, V2SI_type_node,
12299				      tree_cons (NULL_TREE, integer_type_node,
12300						 endlink)));
12301  tree v2si_ftype_di_di
12302    = build_function_type (V2SI_type_node,
12303			   tree_cons (NULL_TREE, long_long_integer_type_node,
12304				      tree_cons (NULL_TREE, long_long_integer_type_node,
12305						 endlink)));
12306  tree di_ftype_di_int
12307    = build_function_type (long_long_integer_type_node,
12308			   tree_cons (NULL_TREE, long_long_integer_type_node,
12309				      tree_cons (NULL_TREE, integer_type_node,
12310						 endlink)));
12311  tree di_ftype_di_int_int
12312    = build_function_type (long_long_integer_type_node,
12313			   tree_cons (NULL_TREE, long_long_integer_type_node,
12314				      tree_cons (NULL_TREE, integer_type_node,
12315						 tree_cons (NULL_TREE,
12316							    integer_type_node,
12317							    endlink))));
12318  tree int_ftype_v8qi
12319    = build_function_type (integer_type_node,
12320			   tree_cons (NULL_TREE, V8QI_type_node,
12321				      endlink));
12322  tree int_ftype_v4hi
12323    = build_function_type (integer_type_node,
12324			   tree_cons (NULL_TREE, V4HI_type_node,
12325				      endlink));
12326  tree int_ftype_v2si
12327    = build_function_type (integer_type_node,
12328			   tree_cons (NULL_TREE, V2SI_type_node,
12329				      endlink));
12330  tree int_ftype_v8qi_int
12331    = build_function_type (integer_type_node,
12332			   tree_cons (NULL_TREE, V8QI_type_node,
12333				      tree_cons (NULL_TREE, integer_type_node,
12334						 endlink)));
12335  tree int_ftype_v4hi_int
12336    = build_function_type (integer_type_node,
12337			   tree_cons (NULL_TREE, V4HI_type_node,
12338				      tree_cons (NULL_TREE, integer_type_node,
12339						 endlink)));
12340  tree int_ftype_v2si_int
12341    = build_function_type (integer_type_node,
12342			   tree_cons (NULL_TREE, V2SI_type_node,
12343				      tree_cons (NULL_TREE, integer_type_node,
12344						 endlink)));
12345  tree v8qi_ftype_v8qi_int_int
12346    = build_function_type (V8QI_type_node,
12347			   tree_cons (NULL_TREE, V8QI_type_node,
12348				      tree_cons (NULL_TREE, integer_type_node,
12349						 tree_cons (NULL_TREE,
12350							    integer_type_node,
12351							    endlink))));
12352  tree v4hi_ftype_v4hi_int_int
12353    = build_function_type (V4HI_type_node,
12354			   tree_cons (NULL_TREE, V4HI_type_node,
12355				      tree_cons (NULL_TREE, integer_type_node,
12356						 tree_cons (NULL_TREE,
12357							    integer_type_node,
12358							    endlink))));
12359  tree v2si_ftype_v2si_int_int
12360    = build_function_type (V2SI_type_node,
12361			   tree_cons (NULL_TREE, V2SI_type_node,
12362				      tree_cons (NULL_TREE, integer_type_node,
12363						 tree_cons (NULL_TREE,
12364							    integer_type_node,
12365							    endlink))));
12366  /* Miscellaneous.  */
12367  tree v8qi_ftype_v4hi_v4hi
12368    = build_function_type (V8QI_type_node,
12369			   tree_cons (NULL_TREE, V4HI_type_node,
12370				      tree_cons (NULL_TREE, V4HI_type_node,
12371						 endlink)));
12372  tree v4hi_ftype_v2si_v2si
12373    = build_function_type (V4HI_type_node,
12374			   tree_cons (NULL_TREE, V2SI_type_node,
12375				      tree_cons (NULL_TREE, V2SI_type_node,
12376						 endlink)));
12377  tree v2si_ftype_v4hi_v4hi
12378    = build_function_type (V2SI_type_node,
12379			   tree_cons (NULL_TREE, V4HI_type_node,
12380				      tree_cons (NULL_TREE, V4HI_type_node,
12381						 endlink)));
12382  tree v2si_ftype_v8qi_v8qi
12383    = build_function_type (V2SI_type_node,
12384			   tree_cons (NULL_TREE, V8QI_type_node,
12385				      tree_cons (NULL_TREE, V8QI_type_node,
12386						 endlink)));
12387  tree v4hi_ftype_v4hi_di
12388    = build_function_type (V4HI_type_node,
12389			   tree_cons (NULL_TREE, V4HI_type_node,
12390				      tree_cons (NULL_TREE,
12391						 long_long_integer_type_node,
12392						 endlink)));
12393  tree v2si_ftype_v2si_di
12394    = build_function_type (V2SI_type_node,
12395			   tree_cons (NULL_TREE, V2SI_type_node,
12396				      tree_cons (NULL_TREE,
12397						 long_long_integer_type_node,
12398						 endlink)));
12399  tree void_ftype_int_int
12400    = build_function_type (void_type_node,
12401			   tree_cons (NULL_TREE, integer_type_node,
12402				      tree_cons (NULL_TREE, integer_type_node,
12403						 endlink)));
12404  tree di_ftype_void
12405    = build_function_type (long_long_unsigned_type_node, endlink);
12406  tree di_ftype_v8qi
12407    = build_function_type (long_long_integer_type_node,
12408			   tree_cons (NULL_TREE, V8QI_type_node,
12409				      endlink));
12410  tree di_ftype_v4hi
12411    = build_function_type (long_long_integer_type_node,
12412			   tree_cons (NULL_TREE, V4HI_type_node,
12413				      endlink));
12414  tree di_ftype_v2si
12415    = build_function_type (long_long_integer_type_node,
12416			   tree_cons (NULL_TREE, V2SI_type_node,
12417				      endlink));
12418  tree v2si_ftype_v4hi
12419    = build_function_type (V2SI_type_node,
12420			   tree_cons (NULL_TREE, V4HI_type_node,
12421				      endlink));
12422  tree v4hi_ftype_v8qi
12423    = build_function_type (V4HI_type_node,
12424			   tree_cons (NULL_TREE, V8QI_type_node,
12425				      endlink));
12426
12427  tree di_ftype_di_v4hi_v4hi
12428    = build_function_type (long_long_unsigned_type_node,
12429			   tree_cons (NULL_TREE,
12430				      long_long_unsigned_type_node,
12431				      tree_cons (NULL_TREE, V4HI_type_node,
12432						 tree_cons (NULL_TREE,
12433							    V4HI_type_node,
12434							    endlink))));
12435
12436  tree di_ftype_v4hi_v4hi
12437    = build_function_type (long_long_unsigned_type_node,
12438			   tree_cons (NULL_TREE, V4HI_type_node,
12439				      tree_cons (NULL_TREE, V4HI_type_node,
12440						 endlink)));
12441
12442  /* Normal vector binops.  */
12443  tree v8qi_ftype_v8qi_v8qi
12444    = build_function_type (V8QI_type_node,
12445			   tree_cons (NULL_TREE, V8QI_type_node,
12446				      tree_cons (NULL_TREE, V8QI_type_node,
12447						 endlink)));
12448  tree v4hi_ftype_v4hi_v4hi
12449    = build_function_type (V4HI_type_node,
12450			   tree_cons (NULL_TREE, V4HI_type_node,
12451				      tree_cons (NULL_TREE, V4HI_type_node,
12452						 endlink)));
12453  tree v2si_ftype_v2si_v2si
12454    = build_function_type (V2SI_type_node,
12455			   tree_cons (NULL_TREE, V2SI_type_node,
12456				      tree_cons (NULL_TREE, V2SI_type_node,
12457						 endlink)));
12458  tree di_ftype_di_di
12459    = build_function_type (long_long_unsigned_type_node,
12460			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
12461				      tree_cons (NULL_TREE,
12462						 long_long_unsigned_type_node,
12463						 endlink)));
12464
12465  /* Add all builtins that are more or less simple operations on two
12466     operands.  */
12467  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12468    {
12469      /* Use one of the operands; the target can have a different mode for
12470	 mask-generating compares.  */
12471      enum machine_mode mode;
12472      tree type;
12473
12474      if (d->name == 0)
12475	continue;
12476
12477      mode = insn_data[d->icode].operand[1].mode;
12478
12479      switch (mode)
12480	{
12481	case V8QImode:
12482	  type = v8qi_ftype_v8qi_v8qi;
12483	  break;
12484	case V4HImode:
12485	  type = v4hi_ftype_v4hi_v4hi;
12486	  break;
12487	case V2SImode:
12488	  type = v2si_ftype_v2si_v2si;
12489	  break;
12490	case DImode:
12491	  type = di_ftype_di_di;
12492	  break;
12493
12494	default:
12495	  gcc_unreachable ();
12496	}
12497
12498      def_mbuiltin (d->mask, d->name, type, d->code);
12499    }
12500
12501  /* Add the remaining MMX insns with somewhat more complicated types.  */
12502  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
12503  def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
12504  def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
12505
12506  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
12507  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
12508  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
12509  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
12510  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
12511  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
12512
12513  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
12514  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
12515  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
12516  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
12517  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
12518  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
12519
12520  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
12521  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
12522  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
12523  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
12524  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
12525  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
12526
12527  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
12528  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
12529  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
12530  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
12531  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
12532  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
12533
12534  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
12535
12536  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
12537  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
12538  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
12539  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
12540
12541  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
12542  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
12543  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
12544  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
12545  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
12546  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
12547  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
12548  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
12549  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
12550
12551  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
12552  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
12553  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
12554
12555  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
12556  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
12557  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
12558
12559  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
12560  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
12561  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
12562  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
12563  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
12564  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
12565
12566  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
12567  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
12568  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
12569  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
12570  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
12571  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
12572  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
12573  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
12574  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
12575  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
12576  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
12577  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
12578
12579  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
12580  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
12581  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
12582  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
12583
12584  def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
12585  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
12586  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
12587  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
12588  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
12589  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
12590  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
12591}
12592
12593static void
12594arm_init_tls_builtins (void)
12595{
12596  tree ftype;
12597  tree nothrow = tree_cons (get_identifier ("nothrow"), NULL, NULL);
12598  tree const_nothrow = tree_cons (get_identifier ("const"), NULL, nothrow);
12599
12600  ftype = build_function_type (ptr_type_node, void_list_node);
12601  lang_hooks.builtin_function ("__builtin_thread_pointer", ftype,
12602			       ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
12603			       NULL, const_nothrow);
12604}
12605
12606static void
12607arm_init_builtins (void)
12608{
12609  arm_init_tls_builtins ();
12610
12611  if (TARGET_REALLY_IWMMXT)
12612    arm_init_iwmmxt_builtins ();
12613}
12614
12615/* Errors in the source file can cause expand_expr to return const0_rtx
12616   where we expect a vector.  To avoid crashing, use one of the vector
12617   clear instructions.  */
12618
12619static rtx
12620safe_vector_operand (rtx x, enum machine_mode mode)
12621{
12622  if (x != const0_rtx)
12623    return x;
12624  x = gen_reg_rtx (mode);
12625
12626  emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
12627			       : gen_rtx_SUBREG (DImode, x, 0)));
12628  return x;
12629}
12630
12631/* Subroutine of arm_expand_builtin to take care of binop insns.  */
12632
12633static rtx
12634arm_expand_binop_builtin (enum insn_code icode,
12635			  tree arglist, rtx target)
12636{
12637  rtx pat;
12638  tree arg0 = TREE_VALUE (arglist);
12639  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12640  rtx op0 = expand_normal (arg0);
12641  rtx op1 = expand_normal (arg1);
12642  enum machine_mode tmode = insn_data[icode].operand[0].mode;
12643  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12644  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12645
12646  if (VECTOR_MODE_P (mode0))
12647    op0 = safe_vector_operand (op0, mode0);
12648  if (VECTOR_MODE_P (mode1))
12649    op1 = safe_vector_operand (op1, mode1);
12650
12651  if (! target
12652      || GET_MODE (target) != tmode
12653      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12654    target = gen_reg_rtx (tmode);
12655
12656  gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
12657
12658  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12659    op0 = copy_to_mode_reg (mode0, op0);
12660  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12661    op1 = copy_to_mode_reg (mode1, op1);
12662
12663  pat = GEN_FCN (icode) (target, op0, op1);
12664  if (! pat)
12665    return 0;
12666  emit_insn (pat);
12667  return target;
12668}
12669
12670/* Subroutine of arm_expand_builtin to take care of unop insns.  */
12671
12672static rtx
12673arm_expand_unop_builtin (enum insn_code icode,
12674			 tree arglist, rtx target, int do_load)
12675{
12676  rtx pat;
12677  tree arg0 = TREE_VALUE (arglist);
12678  rtx op0 = expand_normal (arg0);
12679  enum machine_mode tmode = insn_data[icode].operand[0].mode;
12680  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12681
12682  if (! target
12683      || GET_MODE (target) != tmode
12684      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12685    target = gen_reg_rtx (tmode);
12686  if (do_load)
12687    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12688  else
12689    {
12690      if (VECTOR_MODE_P (mode0))
12691	op0 = safe_vector_operand (op0, mode0);
12692
12693      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12694	op0 = copy_to_mode_reg (mode0, op0);
12695    }
12696
12697  pat = GEN_FCN (icode) (target, op0);
12698  if (! pat)
12699    return 0;
12700  emit_insn (pat);
12701  return target;
12702}
12703
12704/* Expand an expression EXP that calls a built-in function,
12705   with result going to TARGET if that's convenient
12706   (and in mode MODE if that's convenient).
12707   SUBTARGET may be used as the target for computing one of EXP's operands.
12708   IGNORE is nonzero if the value is to be ignored.  */
12709
12710static rtx
12711arm_expand_builtin (tree exp,
12712		    rtx target,
12713		    rtx subtarget ATTRIBUTE_UNUSED,
12714		    enum machine_mode mode ATTRIBUTE_UNUSED,
12715		    int ignore ATTRIBUTE_UNUSED)
12716{
12717  const struct builtin_description * d;
12718  enum insn_code    icode;
12719  tree              fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12720  tree              arglist = TREE_OPERAND (exp, 1);
12721  tree              arg0;
12722  tree              arg1;
12723  tree              arg2;
12724  rtx               op0;
12725  rtx               op1;
12726  rtx               op2;
12727  rtx               pat;
12728  int               fcode = DECL_FUNCTION_CODE (fndecl);
12729  size_t            i;
12730  enum machine_mode tmode;
12731  enum machine_mode mode0;
12732  enum machine_mode mode1;
12733  enum machine_mode mode2;
12734
12735  switch (fcode)
12736    {
12737    case ARM_BUILTIN_TEXTRMSB:
12738    case ARM_BUILTIN_TEXTRMUB:
12739    case ARM_BUILTIN_TEXTRMSH:
12740    case ARM_BUILTIN_TEXTRMUH:
12741    case ARM_BUILTIN_TEXTRMSW:
12742    case ARM_BUILTIN_TEXTRMUW:
12743      icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
12744	       : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
12745	       : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
12746	       : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
12747	       : CODE_FOR_iwmmxt_textrmw);
12748
12749      arg0 = TREE_VALUE (arglist);
12750      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12751      op0 = expand_normal (arg0);
12752      op1 = expand_normal (arg1);
12753      tmode = insn_data[icode].operand[0].mode;
12754      mode0 = insn_data[icode].operand[1].mode;
12755      mode1 = insn_data[icode].operand[2].mode;
12756
12757      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12758	op0 = copy_to_mode_reg (mode0, op0);
12759      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12760	{
12761	  /* @@@ better error message */
12762	  error ("selector must be an immediate");
12763	  return gen_reg_rtx (tmode);
12764	}
12765      if (target == 0
12766	  || GET_MODE (target) != tmode
12767	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12768	target = gen_reg_rtx (tmode);
12769      pat = GEN_FCN (icode) (target, op0, op1);
12770      if (! pat)
12771	return 0;
12772      emit_insn (pat);
12773      return target;
12774
12775    case ARM_BUILTIN_TINSRB:
12776    case ARM_BUILTIN_TINSRH:
12777    case ARM_BUILTIN_TINSRW:
12778      icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
12779	       : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
12780	       : CODE_FOR_iwmmxt_tinsrw);
12781      arg0 = TREE_VALUE (arglist);
12782      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12783      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12784      op0 = expand_normal (arg0);
12785      op1 = expand_normal (arg1);
12786      op2 = expand_normal (arg2);
12787      tmode = insn_data[icode].operand[0].mode;
12788      mode0 = insn_data[icode].operand[1].mode;
12789      mode1 = insn_data[icode].operand[2].mode;
12790      mode2 = insn_data[icode].operand[3].mode;
12791
12792      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12793	op0 = copy_to_mode_reg (mode0, op0);
12794      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12795	op1 = copy_to_mode_reg (mode1, op1);
12796      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12797	{
12798	  /* @@@ better error message */
12799	  error ("selector must be an immediate");
12800	  return const0_rtx;
12801	}
12802      if (target == 0
12803	  || GET_MODE (target) != tmode
12804	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12805	target = gen_reg_rtx (tmode);
12806      pat = GEN_FCN (icode) (target, op0, op1, op2);
12807      if (! pat)
12808	return 0;
12809      emit_insn (pat);
12810      return target;
12811
12812    case ARM_BUILTIN_SETWCX:
12813      arg0 = TREE_VALUE (arglist);
12814      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12815      op0 = force_reg (SImode, expand_normal (arg0));
12816      op1 = expand_normal (arg1);
12817      emit_insn (gen_iwmmxt_tmcr (op1, op0));
12818      return 0;
12819
12820    case ARM_BUILTIN_GETWCX:
12821      arg0 = TREE_VALUE (arglist);
12822      op0 = expand_normal (arg0);
12823      target = gen_reg_rtx (SImode);
12824      emit_insn (gen_iwmmxt_tmrc (target, op0));
12825      return target;
12826
12827    case ARM_BUILTIN_WSHUFH:
12828      icode = CODE_FOR_iwmmxt_wshufh;
12829      arg0 = TREE_VALUE (arglist);
12830      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12831      op0 = expand_normal (arg0);
12832      op1 = expand_normal (arg1);
12833      tmode = insn_data[icode].operand[0].mode;
12834      mode1 = insn_data[icode].operand[1].mode;
12835      mode2 = insn_data[icode].operand[2].mode;
12836
12837      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12838	op0 = copy_to_mode_reg (mode1, op0);
12839      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12840	{
12841	  /* @@@ better error message */
12842	  error ("mask must be an immediate");
12843	  return const0_rtx;
12844	}
12845      if (target == 0
12846	  || GET_MODE (target) != tmode
12847	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12848	target = gen_reg_rtx (tmode);
12849      pat = GEN_FCN (icode) (target, op0, op1);
12850      if (! pat)
12851	return 0;
12852      emit_insn (pat);
12853      return target;
12854
12855    case ARM_BUILTIN_WSADB:
12856      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, arglist, target);
12857    case ARM_BUILTIN_WSADH:
12858      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, arglist, target);
12859    case ARM_BUILTIN_WSADBZ:
12860      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, arglist, target);
12861    case ARM_BUILTIN_WSADHZ:
12862      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, arglist, target);
12863
12864      /* Several three-argument builtins.  */
12865    case ARM_BUILTIN_WMACS:
12866    case ARM_BUILTIN_WMACU:
12867    case ARM_BUILTIN_WALIGN:
12868    case ARM_BUILTIN_TMIA:
12869    case ARM_BUILTIN_TMIAPH:
12870    case ARM_BUILTIN_TMIATT:
12871    case ARM_BUILTIN_TMIATB:
12872    case ARM_BUILTIN_TMIABT:
12873    case ARM_BUILTIN_TMIABB:
12874      icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
12875	       : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
12876	       : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
12877	       : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
12878	       : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
12879	       : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
12880	       : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
12881	       : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
12882	       : CODE_FOR_iwmmxt_walign);
12883      arg0 = TREE_VALUE (arglist);
12884      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12885      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12886      op0 = expand_normal (arg0);
12887      op1 = expand_normal (arg1);
12888      op2 = expand_normal (arg2);
12889      tmode = insn_data[icode].operand[0].mode;
12890      mode0 = insn_data[icode].operand[1].mode;
12891      mode1 = insn_data[icode].operand[2].mode;
12892      mode2 = insn_data[icode].operand[3].mode;
12893
12894      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12895	op0 = copy_to_mode_reg (mode0, op0);
12896      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12897	op1 = copy_to_mode_reg (mode1, op1);
12898      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12899	op2 = copy_to_mode_reg (mode2, op2);
12900      if (target == 0
12901	  || GET_MODE (target) != tmode
12902	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12903	target = gen_reg_rtx (tmode);
12904      pat = GEN_FCN (icode) (target, op0, op1, op2);
12905      if (! pat)
12906	return 0;
12907      emit_insn (pat);
12908      return target;
12909
12910    case ARM_BUILTIN_WZERO:
12911      target = gen_reg_rtx (DImode);
12912      emit_insn (gen_iwmmxt_clrdi (target));
12913      return target;
12914
12915    case ARM_BUILTIN_THREAD_POINTER:
12916      return arm_load_tp (target);
12917
12918    default:
12919      break;
12920    }
12921
12922  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12923    if (d->code == (const enum arm_builtins) fcode)
12924      return arm_expand_binop_builtin (d->icode, arglist, target);
12925
12926  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12927    if (d->code == (const enum arm_builtins) fcode)
12928      return arm_expand_unop_builtin (d->icode, arglist, target, 0);
12929
12930  /* @@@ Should really do something sensible here.  */
12931  return NULL_RTX;
12932}
12933
12934/* Return the number (counting from 0) of
12935   the least significant set bit in MASK.  */
12936
12937inline static int
12938number_of_first_bit_set (unsigned mask)
12939{
12940  int bit;
12941
12942  for (bit = 0;
12943       (mask & (1 << bit)) == 0;
12944       ++bit)
12945    continue;
12946
12947  return bit;
12948}
12949
12950/* Emit code to push or pop registers to or from the stack.  F is the
12951   assembly file.  MASK is the registers to push or pop.  PUSH is
12952   nonzero if we should push, and zero if we should pop.  For debugging
12953   output, if pushing, adjust CFA_OFFSET by the amount of space added
12954   to the stack.  REAL_REGS should have the same number of bits set as
12955   MASK, and will be used instead (in the same order) to describe which
12956   registers were saved - this is used to mark the save slots when we
12957   push high registers after moving them to low registers.  */
12958static void
12959thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
12960	       unsigned long real_regs)
12961{
12962  int regno;
12963  int lo_mask = mask & 0xFF;
12964  int pushed_words = 0;
12965
12966  gcc_assert (mask);
12967
12968  if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
12969    {
12970      /* Special case.  Do not generate a POP PC statement here, do it in
12971	 thumb_exit() */
12972      thumb_exit (f, -1);
12973      return;
12974    }
12975
12976  if (ARM_EABI_UNWIND_TABLES && push)
12977    {
12978      fprintf (f, "\t.save\t{");
12979      for (regno = 0; regno < 15; regno++)
12980	{
12981	  if (real_regs & (1 << regno))
12982	    {
12983	      if (real_regs & ((1 << regno) -1))
12984		fprintf (f, ", ");
12985	      asm_fprintf (f, "%r", regno);
12986	    }
12987	}
12988      fprintf (f, "}\n");
12989    }
12990
12991  fprintf (f, "\t%s\t{", push ? "push" : "pop");
12992
12993  /* Look at the low registers first.  */
12994  for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
12995    {
12996      if (lo_mask & 1)
12997	{
12998	  asm_fprintf (f, "%r", regno);
12999
13000	  if ((lo_mask & ~1) != 0)
13001	    fprintf (f, ", ");
13002
13003	  pushed_words++;
13004	}
13005    }
13006
13007  if (push && (mask & (1 << LR_REGNUM)))
13008    {
13009      /* Catch pushing the LR.  */
13010      if (mask & 0xFF)
13011	fprintf (f, ", ");
13012
13013      asm_fprintf (f, "%r", LR_REGNUM);
13014
13015      pushed_words++;
13016    }
13017  else if (!push && (mask & (1 << PC_REGNUM)))
13018    {
13019      /* Catch popping the PC.  */
13020      if (TARGET_INTERWORK || TARGET_BACKTRACE
13021	  || current_function_calls_eh_return)
13022	{
13023	  /* The PC is never poped directly, instead
13024	     it is popped into r3 and then BX is used.  */
13025	  fprintf (f, "}\n");
13026
13027	  thumb_exit (f, -1);
13028
13029	  return;
13030	}
13031      else
13032	{
13033	  if (mask & 0xFF)
13034	    fprintf (f, ", ");
13035
13036	  asm_fprintf (f, "%r", PC_REGNUM);
13037	}
13038    }
13039
13040  fprintf (f, "}\n");
13041
13042  if (push && pushed_words && dwarf2out_do_frame ())
13043    {
13044      char *l = dwarf2out_cfi_label ();
13045      int pushed_mask = real_regs;
13046
13047      *cfa_offset += pushed_words * 4;
13048      dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
13049
13050      pushed_words = 0;
13051      pushed_mask = real_regs;
13052      for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
13053	{
13054	  if (pushed_mask & 1)
13055	    dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
13056	}
13057    }
13058}
13059
13060/* Generate code to return from a thumb function.
13061   If 'reg_containing_return_addr' is -1, then the return address is
13062   actually on the stack, at the stack pointer.  */
13063static void
13064thumb_exit (FILE *f, int reg_containing_return_addr)
13065{
13066  unsigned regs_available_for_popping;
13067  unsigned regs_to_pop;
13068  int pops_needed;
13069  unsigned available;
13070  unsigned required;
13071  int mode;
13072  int size;
13073  int restore_a4 = FALSE;
13074
13075  /* Compute the registers we need to pop.  */
13076  regs_to_pop = 0;
13077  pops_needed = 0;
13078
13079  if (reg_containing_return_addr == -1)
13080    {
13081      regs_to_pop |= 1 << LR_REGNUM;
13082      ++pops_needed;
13083    }
13084
13085  if (TARGET_BACKTRACE)
13086    {
13087      /* Restore the (ARM) frame pointer and stack pointer.  */
13088      regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
13089      pops_needed += 2;
13090    }
13091
13092  /* If there is nothing to pop then just emit the BX instruction and
13093     return.  */
13094  if (pops_needed == 0)
13095    {
13096      if (current_function_calls_eh_return)
13097	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
13098
13099      asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
13100      return;
13101    }
13102  /* Otherwise if we are not supporting interworking and we have not created
13103     a backtrace structure and the function was not entered in ARM mode then
13104     just pop the return address straight into the PC.  */
13105  else if (!TARGET_INTERWORK
13106	   && !TARGET_BACKTRACE
13107	   && !is_called_in_ARM_mode (current_function_decl)
13108	   && !current_function_calls_eh_return)
13109    {
13110      asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
13111      return;
13112    }
13113
13114  /* Find out how many of the (return) argument registers we can corrupt.  */
13115  regs_available_for_popping = 0;
13116
13117  /* If returning via __builtin_eh_return, the bottom three registers
13118     all contain information needed for the return.  */
13119  if (current_function_calls_eh_return)
13120    size = 12;
13121  else
13122    {
13123      /* If we can deduce the registers used from the function's
13124	 return value.  This is more reliable that examining
13125	 regs_ever_live[] because that will be set if the register is
13126	 ever used in the function, not just if the register is used
13127	 to hold a return value.  */
13128
13129      if (current_function_return_rtx != 0)
13130	mode = GET_MODE (current_function_return_rtx);
13131      else
13132	mode = DECL_MODE (DECL_RESULT (current_function_decl));
13133
13134      size = GET_MODE_SIZE (mode);
13135
13136      if (size == 0)
13137	{
13138	  /* In a void function we can use any argument register.
13139	     In a function that returns a structure on the stack
13140	     we can use the second and third argument registers.  */
13141	  if (mode == VOIDmode)
13142	    regs_available_for_popping =
13143	      (1 << ARG_REGISTER (1))
13144	      | (1 << ARG_REGISTER (2))
13145	      | (1 << ARG_REGISTER (3));
13146	  else
13147	    regs_available_for_popping =
13148	      (1 << ARG_REGISTER (2))
13149	      | (1 << ARG_REGISTER (3));
13150	}
13151      else if (size <= 4)
13152	regs_available_for_popping =
13153	  (1 << ARG_REGISTER (2))
13154	  | (1 << ARG_REGISTER (3));
13155      else if (size <= 8)
13156	regs_available_for_popping =
13157	  (1 << ARG_REGISTER (3));
13158    }
13159
13160  /* Match registers to be popped with registers into which we pop them.  */
13161  for (available = regs_available_for_popping,
13162       required  = regs_to_pop;
13163       required != 0 && available != 0;
13164       available &= ~(available & - available),
13165       required  &= ~(required  & - required))
13166    -- pops_needed;
13167
13168  /* If we have any popping registers left over, remove them.  */
13169  if (available > 0)
13170    regs_available_for_popping &= ~available;
13171
13172  /* Otherwise if we need another popping register we can use
13173     the fourth argument register.  */
13174  else if (pops_needed)
13175    {
13176      /* If we have not found any free argument registers and
13177	 reg a4 contains the return address, we must move it.  */
13178      if (regs_available_for_popping == 0
13179	  && reg_containing_return_addr == LAST_ARG_REGNUM)
13180	{
13181	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
13182	  reg_containing_return_addr = LR_REGNUM;
13183	}
13184      else if (size > 12)
13185	{
13186	  /* Register a4 is being used to hold part of the return value,
13187	     but we have dire need of a free, low register.  */
13188	  restore_a4 = TRUE;
13189
13190	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
13191	}
13192
13193      if (reg_containing_return_addr != LAST_ARG_REGNUM)
13194	{
13195	  /* The fourth argument register is available.  */
13196	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
13197
13198	  --pops_needed;
13199	}
13200    }
13201
13202  /* Pop as many registers as we can.  */
13203  thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
13204		 regs_available_for_popping);
13205
13206  /* Process the registers we popped.  */
13207  if (reg_containing_return_addr == -1)
13208    {
13209      /* The return address was popped into the lowest numbered register.  */
13210      regs_to_pop &= ~(1 << LR_REGNUM);
13211
13212      reg_containing_return_addr =
13213	number_of_first_bit_set (regs_available_for_popping);
13214
13215      /* Remove this register for the mask of available registers, so that
13216         the return address will not be corrupted by further pops.  */
13217      regs_available_for_popping &= ~(1 << reg_containing_return_addr);
13218    }
13219
13220  /* If we popped other registers then handle them here.  */
13221  if (regs_available_for_popping)
13222    {
13223      int frame_pointer;
13224
13225      /* Work out which register currently contains the frame pointer.  */
13226      frame_pointer = number_of_first_bit_set (regs_available_for_popping);
13227
13228      /* Move it into the correct place.  */
13229      asm_fprintf (f, "\tmov\t%r, %r\n",
13230		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
13231
13232      /* (Temporarily) remove it from the mask of popped registers.  */
13233      regs_available_for_popping &= ~(1 << frame_pointer);
13234      regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
13235
13236      if (regs_available_for_popping)
13237	{
13238	  int stack_pointer;
13239
13240	  /* We popped the stack pointer as well,
13241	     find the register that contains it.  */
13242	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
13243
13244	  /* Move it into the stack register.  */
13245	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
13246
13247	  /* At this point we have popped all necessary registers, so
13248	     do not worry about restoring regs_available_for_popping
13249	     to its correct value:
13250
13251	     assert (pops_needed == 0)
13252	     assert (regs_available_for_popping == (1 << frame_pointer))
13253	     assert (regs_to_pop == (1 << STACK_POINTER))  */
13254	}
13255      else
13256	{
13257	  /* Since we have just move the popped value into the frame
13258	     pointer, the popping register is available for reuse, and
13259	     we know that we still have the stack pointer left to pop.  */
13260	  regs_available_for_popping |= (1 << frame_pointer);
13261	}
13262    }
13263
13264  /* If we still have registers left on the stack, but we no longer have
13265     any registers into which we can pop them, then we must move the return
13266     address into the link register and make available the register that
13267     contained it.  */
13268  if (regs_available_for_popping == 0 && pops_needed > 0)
13269    {
13270      regs_available_for_popping |= 1 << reg_containing_return_addr;
13271
13272      asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
13273		   reg_containing_return_addr);
13274
13275      reg_containing_return_addr = LR_REGNUM;
13276    }
13277
13278  /* If we have registers left on the stack then pop some more.
13279     We know that at most we will want to pop FP and SP.  */
13280  if (pops_needed > 0)
13281    {
13282      int  popped_into;
13283      int  move_to;
13284
13285      thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
13286		     regs_available_for_popping);
13287
13288      /* We have popped either FP or SP.
13289	 Move whichever one it is into the correct register.  */
13290      popped_into = number_of_first_bit_set (regs_available_for_popping);
13291      move_to     = number_of_first_bit_set (regs_to_pop);
13292
13293      asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
13294
13295      regs_to_pop &= ~(1 << move_to);
13296
13297      --pops_needed;
13298    }
13299
13300  /* If we still have not popped everything then we must have only
13301     had one register available to us and we are now popping the SP.  */
13302  if (pops_needed > 0)
13303    {
13304      int  popped_into;
13305
13306      thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
13307		     regs_available_for_popping);
13308
13309      popped_into = number_of_first_bit_set (regs_available_for_popping);
13310
13311      asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
13312      /*
13313	assert (regs_to_pop == (1 << STACK_POINTER))
13314	assert (pops_needed == 1)
13315      */
13316    }
13317
13318  /* If necessary restore the a4 register.  */
13319  if (restore_a4)
13320    {
13321      if (reg_containing_return_addr != LR_REGNUM)
13322	{
13323	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
13324	  reg_containing_return_addr = LR_REGNUM;
13325	}
13326
13327      asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
13328    }
13329
13330  if (current_function_calls_eh_return)
13331    asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
13332
13333  /* Return to caller.  */
13334  asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
13335}
13336
13337
13338void
13339thumb_final_prescan_insn (rtx insn)
13340{
13341  if (flag_print_asm_name)
13342    asm_fprintf (asm_out_file, "%@ 0x%04x\n",
13343		 INSN_ADDRESSES (INSN_UID (insn)));
13344}
13345
13346int
13347thumb_shiftable_const (unsigned HOST_WIDE_INT val)
13348{
13349  unsigned HOST_WIDE_INT mask = 0xff;
13350  int i;
13351
13352  if (val == 0) /* XXX */
13353    return 0;
13354
13355  for (i = 0; i < 25; i++)
13356    if ((val & (mask << i)) == val)
13357      return 1;
13358
13359  return 0;
13360}
13361
13362/* Returns nonzero if the current function contains,
13363   or might contain a far jump.  */
13364static int
13365thumb_far_jump_used_p (void)
13366{
13367  rtx insn;
13368
13369  /* This test is only important for leaf functions.  */
13370  /* assert (!leaf_function_p ()); */
13371
13372  /* If we have already decided that far jumps may be used,
13373     do not bother checking again, and always return true even if
13374     it turns out that they are not being used.  Once we have made
13375     the decision that far jumps are present (and that hence the link
13376     register will be pushed onto the stack) we cannot go back on it.  */
13377  if (cfun->machine->far_jump_used)
13378    return 1;
13379
13380  /* If this function is not being called from the prologue/epilogue
13381     generation code then it must be being called from the
13382     INITIAL_ELIMINATION_OFFSET macro.  */
13383  if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
13384    {
13385      /* In this case we know that we are being asked about the elimination
13386	 of the arg pointer register.  If that register is not being used,
13387	 then there are no arguments on the stack, and we do not have to
13388	 worry that a far jump might force the prologue to push the link
13389	 register, changing the stack offsets.  In this case we can just
13390	 return false, since the presence of far jumps in the function will
13391	 not affect stack offsets.
13392
13393	 If the arg pointer is live (or if it was live, but has now been
13394	 eliminated and so set to dead) then we do have to test to see if
13395	 the function might contain a far jump.  This test can lead to some
13396	 false negatives, since before reload is completed, then length of
13397	 branch instructions is not known, so gcc defaults to returning their
13398	 longest length, which in turn sets the far jump attribute to true.
13399
13400	 A false negative will not result in bad code being generated, but it
13401	 will result in a needless push and pop of the link register.  We
13402	 hope that this does not occur too often.
13403
13404	 If we need doubleword stack alignment this could affect the other
13405	 elimination offsets so we can't risk getting it wrong.  */
13406      if (regs_ever_live [ARG_POINTER_REGNUM])
13407	cfun->machine->arg_pointer_live = 1;
13408      else if (!cfun->machine->arg_pointer_live)
13409	return 0;
13410    }
13411
13412  /* Check to see if the function contains a branch
13413     insn with the far jump attribute set.  */
13414  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13415    {
13416      if (GET_CODE (insn) == JUMP_INSN
13417	  /* Ignore tablejump patterns.  */
13418	  && GET_CODE (PATTERN (insn)) != ADDR_VEC
13419	  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
13420	  && get_attr_far_jump (insn) == FAR_JUMP_YES
13421	  )
13422	{
13423	  /* Record the fact that we have decided that
13424	     the function does use far jumps.  */
13425	  cfun->machine->far_jump_used = 1;
13426	  return 1;
13427	}
13428    }
13429
13430  return 0;
13431}
13432
13433/* Return nonzero if FUNC must be entered in ARM mode.  */
13434int
13435is_called_in_ARM_mode (tree func)
13436{
13437  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
13438
13439  /* Ignore the problem about functions whose address is taken.  */
13440  if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
13441    return TRUE;
13442
13443#ifdef ARM_PE
13444  return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
13445#else
13446  return FALSE;
13447#endif
13448}
13449
13450/* The bits which aren't usefully expanded as rtl.  */
13451const char *
13452thumb_unexpanded_epilogue (void)
13453{
13454  int regno;
13455  unsigned long live_regs_mask = 0;
13456  int high_regs_pushed = 0;
13457  int had_to_push_lr;
13458  int size;
13459
13460  if (return_used_this_function)
13461    return "";
13462
13463  if (IS_NAKED (arm_current_func_type ()))
13464    return "";
13465
13466  live_regs_mask = thumb_compute_save_reg_mask ();
13467  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
13468
13469  /* If we can deduce the registers used from the function's return value.
13470     This is more reliable that examining regs_ever_live[] because that
13471     will be set if the register is ever used in the function, not just if
13472     the register is used to hold a return value.  */
13473  size = arm_size_return_regs ();
13474
13475  /* The prolog may have pushed some high registers to use as
13476     work registers.  e.g. the testsuite file:
13477     gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
13478     compiles to produce:
13479	push	{r4, r5, r6, r7, lr}
13480	mov	r7, r9
13481	mov	r6, r8
13482	push	{r6, r7}
13483     as part of the prolog.  We have to undo that pushing here.  */
13484
13485  if (high_regs_pushed)
13486    {
13487      unsigned long mask = live_regs_mask & 0xff;
13488      int next_hi_reg;
13489
13490      /* The available low registers depend on the size of the value we are
13491         returning.  */
13492      if (size <= 12)
13493	mask |=  1 << 3;
13494      if (size <= 8)
13495	mask |= 1 << 2;
13496
13497      if (mask == 0)
13498	/* Oh dear!  We have no low registers into which we can pop
13499           high registers!  */
13500	internal_error
13501	  ("no low registers available for popping high registers");
13502
13503      for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
13504	if (live_regs_mask & (1 << next_hi_reg))
13505	  break;
13506
13507      while (high_regs_pushed)
13508	{
13509	  /* Find lo register(s) into which the high register(s) can
13510             be popped.  */
13511	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
13512	    {
13513	      if (mask & (1 << regno))
13514		high_regs_pushed--;
13515	      if (high_regs_pushed == 0)
13516		break;
13517	    }
13518
13519	  mask &= (2 << regno) - 1;	/* A noop if regno == 8 */
13520
13521	  /* Pop the values into the low register(s).  */
13522	  thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
13523
13524	  /* Move the value(s) into the high registers.  */
13525	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
13526	    {
13527	      if (mask & (1 << regno))
13528		{
13529		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
13530			       regno);
13531
13532		  for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
13533		    if (live_regs_mask & (1 << next_hi_reg))
13534		      break;
13535		}
13536	    }
13537	}
13538      live_regs_mask &= ~0x0f00;
13539    }
13540
13541  had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
13542  live_regs_mask &= 0xff;
13543
13544  if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
13545    {
13546      /* Pop the return address into the PC.  */
13547      if (had_to_push_lr)
13548	live_regs_mask |= 1 << PC_REGNUM;
13549
13550      /* Either no argument registers were pushed or a backtrace
13551	 structure was created which includes an adjusted stack
13552	 pointer, so just pop everything.  */
13553      if (live_regs_mask)
13554	thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13555		       live_regs_mask);
13556
13557      /* We have either just popped the return address into the
13558	 PC or it is was kept in LR for the entire function.  */
13559      if (!had_to_push_lr)
13560	thumb_exit (asm_out_file, LR_REGNUM);
13561    }
13562  else
13563    {
13564      /* Pop everything but the return address.  */
13565      if (live_regs_mask)
13566	thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13567		       live_regs_mask);
13568
13569      if (had_to_push_lr)
13570	{
13571	  if (size > 12)
13572	    {
13573	      /* We have no free low regs, so save one.  */
13574	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
13575			   LAST_ARG_REGNUM);
13576	    }
13577
13578	  /* Get the return address into a temporary register.  */
13579	  thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
13580			 1 << LAST_ARG_REGNUM);
13581
13582	  if (size > 12)
13583	    {
13584	      /* Move the return address to lr.  */
13585	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
13586			   LAST_ARG_REGNUM);
13587	      /* Restore the low register.  */
13588	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
13589			   IP_REGNUM);
13590	      regno = LR_REGNUM;
13591	    }
13592	  else
13593	    regno = LAST_ARG_REGNUM;
13594	}
13595      else
13596	regno = LR_REGNUM;
13597
13598      /* Remove the argument registers that were pushed onto the stack.  */
13599      asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
13600		   SP_REGNUM, SP_REGNUM,
13601		   current_function_pretend_args_size);
13602
13603      thumb_exit (asm_out_file, regno);
13604    }
13605
13606  return "";
13607}
13608
13609/* Functions to save and restore machine-specific function data.  */
13610static struct machine_function *
13611arm_init_machine_status (void)
13612{
13613  struct machine_function *machine;
13614  machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
13615
13616#if ARM_FT_UNKNOWN != 0
13617  machine->func_type = ARM_FT_UNKNOWN;
13618#endif
13619  return machine;
13620}
13621
13622/* Return an RTX indicating where the return address to the
13623   calling function can be found.  */
13624rtx
13625arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
13626{
13627  if (count != 0)
13628    return NULL_RTX;
13629
13630  return get_hard_reg_initial_val (Pmode, LR_REGNUM);
13631}
13632
13633/* Do anything needed before RTL is emitted for each function.  */
13634void
13635arm_init_expanders (void)
13636{
13637  /* Arrange to initialize and mark the machine per-function status.  */
13638  init_machine_status = arm_init_machine_status;
13639
13640  /* This is to stop the combine pass optimizing away the alignment
13641     adjustment of va_arg.  */
13642  /* ??? It is claimed that this should not be necessary.  */
13643  if (cfun)
13644    mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
13645}
13646
13647
13648/* Like arm_compute_initial_elimination offset.  Simpler because there
13649   isn't an ABI specified frame pointer for Thumb.  Instead, we set it
13650   to point at the base of the local variables after static stack
13651   space for a function has been allocated.  */
13652
13653HOST_WIDE_INT
13654thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
13655{
13656  arm_stack_offsets *offsets;
13657
13658  offsets = arm_get_frame_offsets ();
13659
13660  switch (from)
13661    {
13662    case ARG_POINTER_REGNUM:
13663      switch (to)
13664	{
13665	case STACK_POINTER_REGNUM:
13666	  return offsets->outgoing_args - offsets->saved_args;
13667
13668	case FRAME_POINTER_REGNUM:
13669	  return offsets->soft_frame - offsets->saved_args;
13670
13671	case ARM_HARD_FRAME_POINTER_REGNUM:
13672	  return offsets->saved_regs - offsets->saved_args;
13673
13674	case THUMB_HARD_FRAME_POINTER_REGNUM:
13675	  return offsets->locals_base - offsets->saved_args;
13676
13677	default:
13678	  gcc_unreachable ();
13679	}
13680      break;
13681
13682    case FRAME_POINTER_REGNUM:
13683      switch (to)
13684	{
13685	case STACK_POINTER_REGNUM:
13686	  return offsets->outgoing_args - offsets->soft_frame;
13687
13688	case ARM_HARD_FRAME_POINTER_REGNUM:
13689	  return offsets->saved_regs - offsets->soft_frame;
13690
13691	case THUMB_HARD_FRAME_POINTER_REGNUM:
13692	  return offsets->locals_base - offsets->soft_frame;
13693
13694	default:
13695	  gcc_unreachable ();
13696	}
13697      break;
13698
13699    default:
13700      gcc_unreachable ();
13701    }
13702}
13703
13704
13705/* Generate the rest of a function's prologue.  */
13706void
13707thumb_expand_prologue (void)
13708{
13709  rtx insn, dwarf;
13710
13711  HOST_WIDE_INT amount;
13712  arm_stack_offsets *offsets;
13713  unsigned long func_type;
13714  int regno;
13715  unsigned long live_regs_mask;
13716
13717  func_type = arm_current_func_type ();
13718
13719  /* Naked functions don't have prologues.  */
13720  if (IS_NAKED (func_type))
13721    return;
13722
13723  if (IS_INTERRUPT (func_type))
13724    {
13725      error ("interrupt Service Routines cannot be coded in Thumb mode");
13726      return;
13727    }
13728
13729  live_regs_mask = thumb_compute_save_reg_mask ();
13730  /* Load the pic register before setting the frame pointer,
13731     so we can use r7 as a temporary work register.  */
13732  if (flag_pic && arm_pic_register != INVALID_REGNUM)
13733    arm_load_pic_register (live_regs_mask);
13734
13735  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13736    emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
13737		    stack_pointer_rtx);
13738
13739  offsets = arm_get_frame_offsets ();
13740  amount = offsets->outgoing_args - offsets->saved_regs;
13741  if (amount)
13742    {
13743      if (amount < 512)
13744	{
13745	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13746					GEN_INT (- amount)));
13747	  RTX_FRAME_RELATED_P (insn) = 1;
13748	}
13749      else
13750	{
13751	  rtx reg;
13752
13753	  /* The stack decrement is too big for an immediate value in a single
13754	     insn.  In theory we could issue multiple subtracts, but after
13755	     three of them it becomes more space efficient to place the full
13756	     value in the constant pool and load into a register.  (Also the
13757	     ARM debugger really likes to see only one stack decrement per
13758	     function).  So instead we look for a scratch register into which
13759	     we can load the decrement, and then we subtract this from the
13760	     stack pointer.  Unfortunately on the thumb the only available
13761	     scratch registers are the argument registers, and we cannot use
13762	     these as they may hold arguments to the function.  Instead we
13763	     attempt to locate a call preserved register which is used by this
13764	     function.  If we can find one, then we know that it will have
13765	     been pushed at the start of the prologue and so we can corrupt
13766	     it now.  */
13767	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
13768	    if (live_regs_mask & (1 << regno)
13769		&& !(frame_pointer_needed
13770		     && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
13771	      break;
13772
13773	  if (regno > LAST_LO_REGNUM) /* Very unlikely.  */
13774	    {
13775	      rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
13776
13777	      /* Choose an arbitrary, non-argument low register.  */
13778	      reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
13779
13780	      /* Save it by copying it into a high, scratch register.  */
13781	      emit_insn (gen_movsi (spare, reg));
13782	      /* Add a USE to stop propagate_one_insn() from barfing.  */
13783	      emit_insn (gen_prologue_use (spare));
13784
13785	      /* Decrement the stack.  */
13786	      emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13787	      insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13788					    stack_pointer_rtx, reg));
13789	      RTX_FRAME_RELATED_P (insn) = 1;
13790	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13791				   plus_constant (stack_pointer_rtx,
13792						  -amount));
13793	      RTX_FRAME_RELATED_P (dwarf) = 1;
13794	      REG_NOTES (insn)
13795		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13796				     REG_NOTES (insn));
13797
13798	      /* Restore the low register's original value.  */
13799	      emit_insn (gen_movsi (reg, spare));
13800
13801	      /* Emit a USE of the restored scratch register, so that flow
13802		 analysis will not consider the restore redundant.  The
13803		 register won't be used again in this function and isn't
13804		 restored by the epilogue.  */
13805	      emit_insn (gen_prologue_use (reg));
13806	    }
13807	  else
13808	    {
13809	      reg = gen_rtx_REG (SImode, regno);
13810
13811	      emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13812
13813	      insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13814					    stack_pointer_rtx, reg));
13815	      RTX_FRAME_RELATED_P (insn) = 1;
13816	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13817				   plus_constant (stack_pointer_rtx,
13818						  -amount));
13819	      RTX_FRAME_RELATED_P (dwarf) = 1;
13820	      REG_NOTES (insn)
13821		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13822				     REG_NOTES (insn));
13823	    }
13824	}
13825    }
13826
13827  if (frame_pointer_needed)
13828    {
13829      amount = offsets->outgoing_args - offsets->locals_base;
13830
13831      if (amount < 1024)
13832	insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13833				      stack_pointer_rtx, GEN_INT (amount)));
13834      else
13835	{
13836	  emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
13837	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13838					hard_frame_pointer_rtx,
13839					stack_pointer_rtx));
13840	  dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
13841			       plus_constant (stack_pointer_rtx, amount));
13842	  RTX_FRAME_RELATED_P (dwarf) = 1;
13843	  REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13844						REG_NOTES (insn));
13845	}
13846
13847      RTX_FRAME_RELATED_P (insn) = 1;
13848    }
13849
13850  /* If we are profiling, make sure no instructions are scheduled before
13851     the call to mcount.  Similarly if the user has requested no
13852     scheduling in the prolog.  Similarly if we want non-call exceptions
13853     using the EABI unwinder, to prevent faulting instructions from being
13854     swapped with a stack adjustment.  */
13855  if (current_function_profile || !TARGET_SCHED_PROLOG
13856      || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
13857    emit_insn (gen_blockage ());
13858
13859  cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
13860  if (live_regs_mask & 0xff)
13861    cfun->machine->lr_save_eliminated = 0;
13862
13863  /* If the link register is being kept alive, with the return address in it,
13864     then make sure that it does not get reused by the ce2 pass.  */
13865  if (cfun->machine->lr_save_eliminated)
13866    emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
13867}
13868
13869
13870void
13871thumb_expand_epilogue (void)
13872{
13873  HOST_WIDE_INT amount;
13874  arm_stack_offsets *offsets;
13875  int regno;
13876
13877  /* Naked functions don't have prologues.  */
13878  if (IS_NAKED (arm_current_func_type ()))
13879    return;
13880
13881  offsets = arm_get_frame_offsets ();
13882  amount = offsets->outgoing_args - offsets->saved_regs;
13883
13884  if (frame_pointer_needed)
13885    {
13886      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
13887      amount = offsets->locals_base - offsets->saved_regs;
13888    }
13889
13890  gcc_assert (amount >= 0);
13891  if (amount)
13892    {
13893      if (amount < 512)
13894	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13895			       GEN_INT (amount)));
13896      else
13897	{
13898	  /* r3 is always free in the epilogue.  */
13899	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
13900
13901	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
13902	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
13903	}
13904    }
13905
13906  /* Emit a USE (stack_pointer_rtx), so that
13907     the stack adjustment will not be deleted.  */
13908  emit_insn (gen_prologue_use (stack_pointer_rtx));
13909
13910  if (current_function_profile || !TARGET_SCHED_PROLOG)
13911    emit_insn (gen_blockage ());
13912
13913  /* Emit a clobber for each insn that will be restored in the epilogue,
13914     so that flow2 will get register lifetimes correct.  */
13915  for (regno = 0; regno < 13; regno++)
13916    if (regs_ever_live[regno] && !call_used_regs[regno])
13917      emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
13918
13919  if (! regs_ever_live[LR_REGNUM])
13920    emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
13921}
13922
13923static void
13924thumb_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
13925{
13926  unsigned long live_regs_mask = 0;
13927  unsigned long l_mask;
13928  unsigned high_regs_pushed = 0;
13929  int cfa_offset = 0;
13930  int regno;
13931
13932  if (IS_NAKED (arm_current_func_type ()))
13933    return;
13934
13935  if (is_called_in_ARM_mode (current_function_decl))
13936    {
13937      const char * name;
13938
13939      gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
13940      gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
13941		  == SYMBOL_REF);
13942      name = XSTR  (XEXP (DECL_RTL (current_function_decl), 0), 0);
13943
13944      /* Generate code sequence to switch us into Thumb mode.  */
13945      /* The .code 32 directive has already been emitted by
13946	 ASM_DECLARE_FUNCTION_NAME.  */
13947      asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
13948      asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
13949
13950      /* Generate a label, so that the debugger will notice the
13951	 change in instruction sets.  This label is also used by
13952	 the assembler to bypass the ARM code when this function
13953	 is called from a Thumb encoded function elsewhere in the
13954	 same file.  Hence the definition of STUB_NAME here must
13955	 agree with the definition in gas/config/tc-arm.c.  */
13956
13957#define STUB_NAME ".real_start_of"
13958
13959      fprintf (f, "\t.code\t16\n");
13960#ifdef ARM_PE
13961      if (arm_dllexport_name_p (name))
13962        name = arm_strip_name_encoding (name);
13963#endif
13964      asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
13965      fprintf (f, "\t.thumb_func\n");
13966      asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
13967    }
13968
13969  if (current_function_pretend_args_size)
13970    {
13971      /* Output unwind directive for the stack adjustment.  */
13972      if (ARM_EABI_UNWIND_TABLES)
13973	fprintf (f, "\t.pad #%d\n",
13974		 current_function_pretend_args_size);
13975
13976      if (cfun->machine->uses_anonymous_args)
13977	{
13978	  int num_pushes;
13979
13980	  fprintf (f, "\tpush\t{");
13981
13982	  num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
13983
13984	  for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
13985	       regno <= LAST_ARG_REGNUM;
13986	       regno++)
13987	    asm_fprintf (f, "%r%s", regno,
13988			 regno == LAST_ARG_REGNUM ? "" : ", ");
13989
13990	  fprintf (f, "}\n");
13991	}
13992      else
13993	asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
13994		     SP_REGNUM, SP_REGNUM,
13995		     current_function_pretend_args_size);
13996
13997      /* We don't need to record the stores for unwinding (would it
13998	 help the debugger any if we did?), but record the change in
13999	 the stack pointer.  */
14000      if (dwarf2out_do_frame ())
14001	{
14002	  char *l = dwarf2out_cfi_label ();
14003
14004	  cfa_offset = cfa_offset + current_function_pretend_args_size;
14005	  dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
14006	}
14007    }
14008
14009  /* Get the registers we are going to push.  */
14010  live_regs_mask = thumb_compute_save_reg_mask ();
14011  /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
14012  l_mask = live_regs_mask & 0x40ff;
14013  /* Then count how many other high registers will need to be pushed.  */
14014  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
14015
14016  if (TARGET_BACKTRACE)
14017    {
14018      unsigned offset;
14019      unsigned work_register;
14020
14021      /* We have been asked to create a stack backtrace structure.
14022         The code looks like this:
14023
14024	 0   .align 2
14025	 0   func:
14026         0     sub   SP, #16         Reserve space for 4 registers.
14027	 2     push  {R7}            Push low registers.
14028         4     add   R7, SP, #20     Get the stack pointer before the push.
14029         6     str   R7, [SP, #8]    Store the stack pointer (before reserving the space).
14030         8     mov   R7, PC          Get hold of the start of this code plus 12.
14031        10     str   R7, [SP, #16]   Store it.
14032        12     mov   R7, FP          Get hold of the current frame pointer.
14033        14     str   R7, [SP, #4]    Store it.
14034        16     mov   R7, LR          Get hold of the current return address.
14035        18     str   R7, [SP, #12]   Store it.
14036        20     add   R7, SP, #16     Point at the start of the backtrace structure.
14037        22     mov   FP, R7          Put this value into the frame pointer.  */
14038
14039      work_register = thumb_find_work_register (live_regs_mask);
14040
14041      if (ARM_EABI_UNWIND_TABLES)
14042	asm_fprintf (f, "\t.pad #16\n");
14043
14044      asm_fprintf
14045	(f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
14046	 SP_REGNUM, SP_REGNUM);
14047
14048      if (dwarf2out_do_frame ())
14049	{
14050	  char *l = dwarf2out_cfi_label ();
14051
14052	  cfa_offset = cfa_offset + 16;
14053	  dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
14054	}
14055
14056      if (l_mask)
14057	{
14058	  thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
14059	  offset = bit_count (l_mask) * UNITS_PER_WORD;
14060	}
14061      else
14062	offset = 0;
14063
14064      asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
14065		   offset + 16 + current_function_pretend_args_size);
14066
14067      asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14068		   offset + 4);
14069
14070      /* Make sure that the instruction fetching the PC is in the right place
14071	 to calculate "start of backtrace creation code + 12".  */
14072      if (l_mask)
14073	{
14074	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
14075	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14076		       offset + 12);
14077	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
14078		       ARM_HARD_FRAME_POINTER_REGNUM);
14079	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14080		       offset);
14081	}
14082      else
14083	{
14084	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
14085		       ARM_HARD_FRAME_POINTER_REGNUM);
14086	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14087		       offset);
14088	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
14089	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14090		       offset + 12);
14091	}
14092
14093      asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
14094      asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14095		   offset + 8);
14096      asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
14097		   offset + 12);
14098      asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
14099		   ARM_HARD_FRAME_POINTER_REGNUM, work_register);
14100    }
14101  /* Optimization:  If we are not pushing any low registers but we are going
14102     to push some high registers then delay our first push.  This will just
14103     be a push of LR and we can combine it with the push of the first high
14104     register.  */
14105  else if ((l_mask & 0xff) != 0
14106	   || (high_regs_pushed == 0 && l_mask))
14107    thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
14108
14109  if (high_regs_pushed)
14110    {
14111      unsigned pushable_regs;
14112      unsigned next_hi_reg;
14113
14114      for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
14115	if (live_regs_mask & (1 << next_hi_reg))
14116	  break;
14117
14118      pushable_regs = l_mask & 0xff;
14119
14120      if (pushable_regs == 0)
14121	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
14122
14123      while (high_regs_pushed > 0)
14124	{
14125	  unsigned long real_regs_mask = 0;
14126
14127	  for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
14128	    {
14129	      if (pushable_regs & (1 << regno))
14130		{
14131		  asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
14132
14133		  high_regs_pushed --;
14134		  real_regs_mask |= (1 << next_hi_reg);
14135
14136		  if (high_regs_pushed)
14137		    {
14138		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
14139			   next_hi_reg --)
14140			if (live_regs_mask & (1 << next_hi_reg))
14141			  break;
14142		    }
14143		  else
14144		    {
14145		      pushable_regs &= ~((1 << regno) - 1);
14146		      break;
14147		    }
14148		}
14149	    }
14150
14151	  /* If we had to find a work register and we have not yet
14152	     saved the LR then add it to the list of regs to push.  */
14153	  if (l_mask == (1 << LR_REGNUM))
14154	    {
14155	      thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
14156			     1, &cfa_offset,
14157			     real_regs_mask | (1 << LR_REGNUM));
14158	      l_mask = 0;
14159	    }
14160	  else
14161	    thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
14162	}
14163    }
14164}
14165
14166/* Handle the case of a double word load into a low register from
14167   a computed memory address.  The computed address may involve a
14168   register which is overwritten by the load.  */
14169const char *
14170thumb_load_double_from_address (rtx *operands)
14171{
14172  rtx addr;
14173  rtx base;
14174  rtx offset;
14175  rtx arg1;
14176  rtx arg2;
14177
14178  gcc_assert (GET_CODE (operands[0]) == REG);
14179  gcc_assert (GET_CODE (operands[1]) == MEM);
14180
14181  /* Get the memory address.  */
14182  addr = XEXP (operands[1], 0);
14183
14184  /* Work out how the memory address is computed.  */
14185  switch (GET_CODE (addr))
14186    {
14187    case REG:
14188      operands[2] = adjust_address (operands[1], SImode, 4);
14189
14190      if (REGNO (operands[0]) == REGNO (addr))
14191	{
14192	  output_asm_insn ("ldr\t%H0, %2", operands);
14193	  output_asm_insn ("ldr\t%0, %1", operands);
14194	}
14195      else
14196	{
14197	  output_asm_insn ("ldr\t%0, %1", operands);
14198	  output_asm_insn ("ldr\t%H0, %2", operands);
14199	}
14200      break;
14201
14202    case CONST:
14203      /* Compute <address> + 4 for the high order load.  */
14204      operands[2] = adjust_address (operands[1], SImode, 4);
14205
14206      output_asm_insn ("ldr\t%0, %1", operands);
14207      output_asm_insn ("ldr\t%H0, %2", operands);
14208      break;
14209
14210    case PLUS:
14211      arg1   = XEXP (addr, 0);
14212      arg2   = XEXP (addr, 1);
14213
14214      if (CONSTANT_P (arg1))
14215	base = arg2, offset = arg1;
14216      else
14217	base = arg1, offset = arg2;
14218
14219      gcc_assert (GET_CODE (base) == REG);
14220
14221      /* Catch the case of <address> = <reg> + <reg> */
14222      if (GET_CODE (offset) == REG)
14223	{
14224	  int reg_offset = REGNO (offset);
14225	  int reg_base   = REGNO (base);
14226	  int reg_dest   = REGNO (operands[0]);
14227
14228	  /* Add the base and offset registers together into the
14229             higher destination register.  */
14230	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
14231		       reg_dest + 1, reg_base, reg_offset);
14232
14233	  /* Load the lower destination register from the address in
14234             the higher destination register.  */
14235	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
14236		       reg_dest, reg_dest + 1);
14237
14238	  /* Load the higher destination register from its own address
14239             plus 4.  */
14240	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
14241		       reg_dest + 1, reg_dest + 1);
14242	}
14243      else
14244	{
14245	  /* Compute <address> + 4 for the high order load.  */
14246	  operands[2] = adjust_address (operands[1], SImode, 4);
14247
14248	  /* If the computed address is held in the low order register
14249	     then load the high order register first, otherwise always
14250	     load the low order register first.  */
14251	  if (REGNO (operands[0]) == REGNO (base))
14252	    {
14253	      output_asm_insn ("ldr\t%H0, %2", operands);
14254	      output_asm_insn ("ldr\t%0, %1", operands);
14255	    }
14256	  else
14257	    {
14258	      output_asm_insn ("ldr\t%0, %1", operands);
14259	      output_asm_insn ("ldr\t%H0, %2", operands);
14260	    }
14261	}
14262      break;
14263
14264    case LABEL_REF:
14265      /* With no registers to worry about we can just load the value
14266         directly.  */
14267      operands[2] = adjust_address (operands[1], SImode, 4);
14268
14269      output_asm_insn ("ldr\t%H0, %2", operands);
14270      output_asm_insn ("ldr\t%0, %1", operands);
14271      break;
14272
14273    default:
14274      gcc_unreachable ();
14275    }
14276
14277  return "";
14278}
14279
14280const char *
14281thumb_output_move_mem_multiple (int n, rtx *operands)
14282{
14283  rtx tmp;
14284
14285  switch (n)
14286    {
14287    case 2:
14288      if (REGNO (operands[4]) > REGNO (operands[5]))
14289	{
14290	  tmp = operands[4];
14291	  operands[4] = operands[5];
14292	  operands[5] = tmp;
14293	}
14294      output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
14295      output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
14296      break;
14297
14298    case 3:
14299      if (REGNO (operands[4]) > REGNO (operands[5]))
14300	{
14301	  tmp = operands[4];
14302	  operands[4] = operands[5];
14303	  operands[5] = tmp;
14304	}
14305      if (REGNO (operands[5]) > REGNO (operands[6]))
14306	{
14307	  tmp = operands[5];
14308	  operands[5] = operands[6];
14309	  operands[6] = tmp;
14310	}
14311      if (REGNO (operands[4]) > REGNO (operands[5]))
14312	{
14313	  tmp = operands[4];
14314	  operands[4] = operands[5];
14315	  operands[5] = tmp;
14316	}
14317
14318      output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
14319      output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
14320      break;
14321
14322    default:
14323      gcc_unreachable ();
14324    }
14325
14326  return "";
14327}
14328
14329/* Output a call-via instruction for thumb state.  */
14330const char *
14331thumb_call_via_reg (rtx reg)
14332{
14333  int regno = REGNO (reg);
14334  rtx *labelp;
14335
14336  gcc_assert (regno < LR_REGNUM);
14337
14338  /* If we are in the normal text section we can use a single instance
14339     per compilation unit.  If we are doing function sections, then we need
14340     an entry per section, since we can't rely on reachability.  */
14341  if (in_section == text_section)
14342    {
14343      thumb_call_reg_needed = 1;
14344
14345      if (thumb_call_via_label[regno] == NULL)
14346	thumb_call_via_label[regno] = gen_label_rtx ();
14347      labelp = thumb_call_via_label + regno;
14348    }
14349  else
14350    {
14351      if (cfun->machine->call_via[regno] == NULL)
14352	cfun->machine->call_via[regno] = gen_label_rtx ();
14353      labelp = cfun->machine->call_via + regno;
14354    }
14355
14356  output_asm_insn ("bl\t%a0", labelp);
14357  return "";
14358}
14359
14360/* Routines for generating rtl.  */
14361void
14362thumb_expand_movmemqi (rtx *operands)
14363{
14364  rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
14365  rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
14366  HOST_WIDE_INT len = INTVAL (operands[2]);
14367  HOST_WIDE_INT offset = 0;
14368
14369  while (len >= 12)
14370    {
14371      emit_insn (gen_movmem12b (out, in, out, in));
14372      len -= 12;
14373    }
14374
14375  if (len >= 8)
14376    {
14377      emit_insn (gen_movmem8b (out, in, out, in));
14378      len -= 8;
14379    }
14380
14381  if (len >= 4)
14382    {
14383      rtx reg = gen_reg_rtx (SImode);
14384      emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
14385      emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
14386      len -= 4;
14387      offset += 4;
14388    }
14389
14390  if (len >= 2)
14391    {
14392      rtx reg = gen_reg_rtx (HImode);
14393      emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
14394					      plus_constant (in, offset))));
14395      emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
14396			    reg));
14397      len -= 2;
14398      offset += 2;
14399    }
14400
14401  if (len)
14402    {
14403      rtx reg = gen_reg_rtx (QImode);
14404      emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
14405					      plus_constant (in, offset))));
14406      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
14407			    reg));
14408    }
14409}
14410
14411void
14412thumb_reload_out_hi (rtx *operands)
14413{
14414  emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
14415}
14416
14417/* Handle reading a half-word from memory during reload.  */
14418void
14419thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
14420{
14421  gcc_unreachable ();
14422}
14423
14424/* Return the length of a function name prefix
14425    that starts with the character 'c'.  */
14426static int
14427arm_get_strip_length (int c)
14428{
14429  switch (c)
14430    {
14431    ARM_NAME_ENCODING_LENGTHS
14432      default: return 0;
14433    }
14434}
14435
14436/* Return a pointer to a function's name with any
14437   and all prefix encodings stripped from it.  */
14438const char *
14439arm_strip_name_encoding (const char *name)
14440{
14441  int skip;
14442
14443  while ((skip = arm_get_strip_length (* name)))
14444    name += skip;
14445
14446  return name;
14447}
14448
14449/* If there is a '*' anywhere in the name's prefix, then
14450   emit the stripped name verbatim, otherwise prepend an
14451   underscore if leading underscores are being used.  */
14452void
14453arm_asm_output_labelref (FILE *stream, const char *name)
14454{
14455  int skip;
14456  int verbatim = 0;
14457
14458  while ((skip = arm_get_strip_length (* name)))
14459    {
14460      verbatim |= (*name == '*');
14461      name += skip;
14462    }
14463
14464  if (verbatim)
14465    fputs (name, stream);
14466  else
14467    asm_fprintf (stream, "%U%s", name);
14468}
14469
14470static void
14471arm_file_start (void)
14472{
14473  int val;
14474
14475  if (TARGET_BPABI)
14476    {
14477      const char *fpu_name;
14478      if (arm_select[0].string)
14479	asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
14480      else if (arm_select[1].string)
14481	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
14482      else
14483	asm_fprintf (asm_out_file, "\t.cpu %s\n",
14484		     all_cores[arm_default_cpu].name);
14485
14486      if (TARGET_SOFT_FLOAT)
14487	{
14488	  if (TARGET_VFP)
14489	    fpu_name = "softvfp";
14490	  else
14491	    fpu_name = "softfpa";
14492	}
14493      else
14494	{
14495	  switch (arm_fpu_arch)
14496	    {
14497	    case FPUTYPE_FPA:
14498	      fpu_name = "fpa";
14499	      break;
14500	    case FPUTYPE_FPA_EMU2:
14501	      fpu_name = "fpe2";
14502	      break;
14503	    case FPUTYPE_FPA_EMU3:
14504	      fpu_name = "fpe3";
14505	      break;
14506	    case FPUTYPE_MAVERICK:
14507	      fpu_name = "maverick";
14508	      break;
14509	    case FPUTYPE_VFP:
14510	      if (TARGET_HARD_FLOAT)
14511		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
14512	      if (TARGET_HARD_FLOAT_ABI)
14513		asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
14514	      fpu_name = "vfp";
14515	      break;
14516	    default:
14517	      abort();
14518	    }
14519	}
14520      asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
14521
14522      /* Some of these attributes only apply when the corresponding features
14523         are used.  However we don't have any easy way of figuring this out.
14524	 Conservatively record the setting that would have been used.  */
14525
14526      /* Tag_ABI_PCS_wchar_t.  */
14527      asm_fprintf (asm_out_file, "\t.eabi_attribute 18, %d\n",
14528		   (int)WCHAR_TYPE_SIZE / BITS_PER_UNIT);
14529
14530      /* Tag_ABI_FP_rounding.  */
14531      if (flag_rounding_math)
14532	asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
14533      if (!flag_unsafe_math_optimizations)
14534	{
14535	  /* Tag_ABI_FP_denomal.  */
14536	  asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
14537	  /* Tag_ABI_FP_exceptions.  */
14538	  asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
14539	}
14540      /* Tag_ABI_FP_user_exceptions.  */
14541      if (flag_signaling_nans)
14542	asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
14543      /* Tag_ABI_FP_number_model.  */
14544      asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
14545		   flag_finite_math_only ? 1 : 3);
14546
14547      /* Tag_ABI_align8_needed.  */
14548      asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
14549      /* Tag_ABI_align8_preserved.  */
14550      asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
14551      /* Tag_ABI_enum_size.  */
14552      asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
14553		   flag_short_enums ? 1 : 2);
14554
14555      /* Tag_ABI_optimization_goals.  */
14556      if (optimize_size)
14557	val = 4;
14558      else if (optimize >= 2)
14559	val = 2;
14560      else if (optimize)
14561	val = 1;
14562      else
14563	val = 6;
14564      asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
14565    }
14566  default_file_start();
14567}
14568
14569static void
14570arm_file_end (void)
14571{
14572  int regno;
14573
14574  if (! thumb_call_reg_needed)
14575    return;
14576
14577  switch_to_section (text_section);
14578  asm_fprintf (asm_out_file, "\t.code 16\n");
14579  ASM_OUTPUT_ALIGN (asm_out_file, 1);
14580
14581  for (regno = 0; regno < LR_REGNUM; regno++)
14582    {
14583      rtx label = thumb_call_via_label[regno];
14584
14585      if (label != 0)
14586	{
14587	  targetm.asm_out.internal_label (asm_out_file, "L",
14588					  CODE_LABEL_NUMBER (label));
14589	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14590	}
14591    }
14592}
14593
14594rtx aof_pic_label;
14595
14596#ifdef AOF_ASSEMBLER
14597/* Special functions only needed when producing AOF syntax assembler.  */
14598
14599struct pic_chain
14600{
14601  struct pic_chain * next;
14602  const char * symname;
14603};
14604
14605static struct pic_chain * aof_pic_chain = NULL;
14606
14607rtx
14608aof_pic_entry (rtx x)
14609{
14610  struct pic_chain ** chainp;
14611  int offset;
14612
14613  if (aof_pic_label == NULL_RTX)
14614    {
14615      aof_pic_label = gen_rtx_SYMBOL_REF (Pmode, "x$adcons");
14616    }
14617
14618  for (offset = 0, chainp = &aof_pic_chain; *chainp;
14619       offset += 4, chainp = &(*chainp)->next)
14620    if ((*chainp)->symname == XSTR (x, 0))
14621      return plus_constant (aof_pic_label, offset);
14622
14623  *chainp = (struct pic_chain *) xmalloc (sizeof (struct pic_chain));
14624  (*chainp)->next = NULL;
14625  (*chainp)->symname = XSTR (x, 0);
14626  return plus_constant (aof_pic_label, offset);
14627}
14628
14629void
14630aof_dump_pic_table (FILE *f)
14631{
14632  struct pic_chain * chain;
14633
14634  if (aof_pic_chain == NULL)
14635    return;
14636
14637  asm_fprintf (f, "\tAREA |%r$$adcons|, BASED %r\n",
14638	       PIC_OFFSET_TABLE_REGNUM,
14639	       PIC_OFFSET_TABLE_REGNUM);
14640  fputs ("|x$adcons|\n", f);
14641
14642  for (chain = aof_pic_chain; chain; chain = chain->next)
14643    {
14644      fputs ("\tDCD\t", f);
14645      assemble_name (f, chain->symname);
14646      fputs ("\n", f);
14647    }
14648}
14649
14650int arm_text_section_count = 1;
14651
14652/* A get_unnamed_section callback for switching to the text section.  */
14653
14654static void
14655aof_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
14656{
14657  fprintf (asm_out_file, "\tAREA |C$$code%d|, CODE, READONLY",
14658	   arm_text_section_count++);
14659  if (flag_pic)
14660    fprintf (asm_out_file, ", PIC, REENTRANT");
14661  fprintf (asm_out_file, "\n");
14662}
14663
14664static int arm_data_section_count = 1;
14665
14666/* A get_unnamed_section callback for switching to the data section.  */
14667
14668static void
14669aof_output_data_section_asm_op (const void *data ATTRIBUTE_UNUSED)
14670{
14671  fprintf (asm_out_file, "\tAREA |C$$data%d|, DATA\n",
14672	   arm_data_section_count++);
14673}
14674
14675/* Implement TARGET_ASM_INIT_SECTIONS.
14676
14677   AOF Assembler syntax is a nightmare when it comes to areas, since once
14678   we change from one area to another, we can't go back again.  Instead,
14679   we must create a new area with the same attributes and add the new output
14680   to that.  Unfortunately, there is nothing we can do here to guarantee that
14681   two areas with the same attributes will be linked adjacently in the
14682   resulting executable, so we have to be careful not to do pc-relative
14683   addressing across such boundaries.  */
14684
14685static void
14686aof_asm_init_sections (void)
14687{
14688  text_section = get_unnamed_section (SECTION_CODE,
14689				      aof_output_text_section_asm_op, NULL);
14690  data_section = get_unnamed_section (SECTION_WRITE,
14691				      aof_output_data_section_asm_op, NULL);
14692  readonly_data_section = text_section;
14693}
14694
14695void
14696zero_init_section (void)
14697{
14698  static int zero_init_count = 1;
14699
14700  fprintf (asm_out_file, "\tAREA |C$$zidata%d|,NOINIT\n", zero_init_count++);
14701  in_section = NULL;
14702}
14703
14704/* The AOF assembler is religiously strict about declarations of
14705   imported and exported symbols, so that it is impossible to declare
14706   a function as imported near the beginning of the file, and then to
14707   export it later on.  It is, however, possible to delay the decision
14708   until all the functions in the file have been compiled.  To get
14709   around this, we maintain a list of the imports and exports, and
14710   delete from it any that are subsequently defined.  At the end of
14711   compilation we spit the remainder of the list out before the END
14712   directive.  */
14713
14714struct import
14715{
14716  struct import * next;
14717  const char * name;
14718};
14719
14720static struct import * imports_list = NULL;
14721
14722void
14723aof_add_import (const char *name)
14724{
14725  struct import * new;
14726
14727  for (new = imports_list; new; new = new->next)
14728    if (new->name == name)
14729      return;
14730
14731  new = (struct import *) xmalloc (sizeof (struct import));
14732  new->next = imports_list;
14733  imports_list = new;
14734  new->name = name;
14735}
14736
14737void
14738aof_delete_import (const char *name)
14739{
14740  struct import ** old;
14741
14742  for (old = &imports_list; *old; old = & (*old)->next)
14743    {
14744      if ((*old)->name == name)
14745	{
14746	  *old = (*old)->next;
14747	  return;
14748	}
14749    }
14750}
14751
14752int arm_main_function = 0;
14753
14754static void
14755aof_dump_imports (FILE *f)
14756{
14757  /* The AOF assembler needs this to cause the startup code to be extracted
14758     from the library.  Brining in __main causes the whole thing to work
14759     automagically.  */
14760  if (arm_main_function)
14761    {
14762      switch_to_section (text_section);
14763      fputs ("\tIMPORT __main\n", f);
14764      fputs ("\tDCD __main\n", f);
14765    }
14766
14767  /* Now dump the remaining imports.  */
14768  while (imports_list)
14769    {
14770      fprintf (f, "\tIMPORT\t");
14771      assemble_name (f, imports_list->name);
14772      fputc ('\n', f);
14773      imports_list = imports_list->next;
14774    }
14775}
14776
14777static void
14778aof_globalize_label (FILE *stream, const char *name)
14779{
14780  default_globalize_label (stream, name);
14781  if (! strcmp (name, "main"))
14782    arm_main_function = 1;
14783}
14784
14785static void
14786aof_file_start (void)
14787{
14788  fputs ("__r0\tRN\t0\n", asm_out_file);
14789  fputs ("__a1\tRN\t0\n", asm_out_file);
14790  fputs ("__a2\tRN\t1\n", asm_out_file);
14791  fputs ("__a3\tRN\t2\n", asm_out_file);
14792  fputs ("__a4\tRN\t3\n", asm_out_file);
14793  fputs ("__v1\tRN\t4\n", asm_out_file);
14794  fputs ("__v2\tRN\t5\n", asm_out_file);
14795  fputs ("__v3\tRN\t6\n", asm_out_file);
14796  fputs ("__v4\tRN\t7\n", asm_out_file);
14797  fputs ("__v5\tRN\t8\n", asm_out_file);
14798  fputs ("__v6\tRN\t9\n", asm_out_file);
14799  fputs ("__sl\tRN\t10\n", asm_out_file);
14800  fputs ("__fp\tRN\t11\n", asm_out_file);
14801  fputs ("__ip\tRN\t12\n", asm_out_file);
14802  fputs ("__sp\tRN\t13\n", asm_out_file);
14803  fputs ("__lr\tRN\t14\n", asm_out_file);
14804  fputs ("__pc\tRN\t15\n", asm_out_file);
14805  fputs ("__f0\tFN\t0\n", asm_out_file);
14806  fputs ("__f1\tFN\t1\n", asm_out_file);
14807  fputs ("__f2\tFN\t2\n", asm_out_file);
14808  fputs ("__f3\tFN\t3\n", asm_out_file);
14809  fputs ("__f4\tFN\t4\n", asm_out_file);
14810  fputs ("__f5\tFN\t5\n", asm_out_file);
14811  fputs ("__f6\tFN\t6\n", asm_out_file);
14812  fputs ("__f7\tFN\t7\n", asm_out_file);
14813  switch_to_section (text_section);
14814}
14815
14816static void
14817aof_file_end (void)
14818{
14819  if (flag_pic)
14820    aof_dump_pic_table (asm_out_file);
14821  arm_file_end ();
14822  aof_dump_imports (asm_out_file);
14823  fputs ("\tEND\n", asm_out_file);
14824}
14825#endif /* AOF_ASSEMBLER */
14826
14827#ifndef ARM_PE
14828/* Symbols in the text segment can be accessed without indirecting via the
14829   constant pool; it may take an extra binary operation, but this is still
14830   faster than indirecting via memory.  Don't do this when not optimizing,
14831   since we won't be calculating al of the offsets necessary to do this
14832   simplification.  */
14833
14834static void
14835arm_encode_section_info (tree decl, rtx rtl, int first)
14836{
14837  /* This doesn't work with AOF syntax, since the string table may be in
14838     a different AREA.  */
14839#ifndef AOF_ASSEMBLER
14840  if (optimize > 0 && TREE_CONSTANT (decl))
14841    SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
14842#endif
14843
14844  /* If we are referencing a function that is weak then encode a long call
14845     flag in the function name, otherwise if the function is static or
14846     or known to be defined in this file then encode a short call flag.  */
14847  if (first && DECL_P (decl))
14848    {
14849      if (TREE_CODE (decl) == FUNCTION_DECL && DECL_WEAK (decl))
14850        arm_encode_call_attribute (decl, LONG_CALL_FLAG_CHAR);
14851      else if (! TREE_PUBLIC (decl))
14852        arm_encode_call_attribute (decl, SHORT_CALL_FLAG_CHAR);
14853    }
14854
14855  default_encode_section_info (decl, rtl, first);
14856}
14857#endif /* !ARM_PE */
14858
14859static void
14860arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
14861{
14862  if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
14863      && !strcmp (prefix, "L"))
14864    {
14865      arm_ccfsm_state = 0;
14866      arm_target_insn = NULL;
14867    }
14868  default_internal_label (stream, prefix, labelno);
14869}
14870
14871/* Output code to add DELTA to the first argument, and then jump
14872   to FUNCTION.  Used for C++ multiple inheritance.  */
14873static void
14874arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
14875		     HOST_WIDE_INT delta,
14876		     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
14877		     tree function)
14878{
14879  static int thunk_label = 0;
14880  char label[256];
14881  char labelpc[256];
14882  int mi_delta = delta;
14883  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
14884  int shift = 0;
14885  int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
14886                    ? 1 : 0);
14887  if (mi_delta < 0)
14888    mi_delta = - mi_delta;
14889  if (TARGET_THUMB)
14890    {
14891      int labelno = thunk_label++;
14892      ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
14893      fputs ("\tldr\tr12, ", file);
14894      assemble_name (file, label);
14895      fputc ('\n', file);
14896      if (flag_pic)
14897	{
14898	  /* If we are generating PIC, the ldr instruction below loads
14899	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
14900	     the address of the add + 8, so we have:
14901
14902	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
14903	         = target + 1.
14904
14905	     Note that we have "+ 1" because some versions of GNU ld
14906	     don't set the low bit of the result for R_ARM_REL32
14907	     relocations against thumb function symbols.  */
14908	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
14909	  assemble_name (file, labelpc);
14910	  fputs (":\n", file);
14911	  fputs ("\tadd\tr12, pc, r12\n", file);
14912	}
14913    }
14914  while (mi_delta != 0)
14915    {
14916      if ((mi_delta & (3 << shift)) == 0)
14917        shift += 2;
14918      else
14919        {
14920          asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
14921                       mi_op, this_regno, this_regno,
14922                       mi_delta & (0xff << shift));
14923          mi_delta &= ~(0xff << shift);
14924          shift += 8;
14925        }
14926    }
14927  if (TARGET_THUMB)
14928    {
14929      fprintf (file, "\tbx\tr12\n");
14930      ASM_OUTPUT_ALIGN (file, 2);
14931      assemble_name (file, label);
14932      fputs (":\n", file);
14933      if (flag_pic)
14934	{
14935	  /* Output ".word .LTHUNKn-7-.LTHUNKPCn".  */
14936	  rtx tem = XEXP (DECL_RTL (function), 0);
14937	  tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
14938	  tem = gen_rtx_MINUS (GET_MODE (tem),
14939			       tem,
14940			       gen_rtx_SYMBOL_REF (Pmode,
14941						   ggc_strdup (labelpc)));
14942	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
14943	}
14944      else
14945	/* Output ".word .LTHUNKn".  */
14946	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
14947    }
14948  else
14949    {
14950      fputs ("\tb\t", file);
14951      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14952      if (NEED_PLT_RELOC)
14953        fputs ("(PLT)", file);
14954      fputc ('\n', file);
14955    }
14956}
14957
14958int
14959arm_emit_vector_const (FILE *file, rtx x)
14960{
14961  int i;
14962  const char * pattern;
14963
14964  gcc_assert (GET_CODE (x) == CONST_VECTOR);
14965
14966  switch (GET_MODE (x))
14967    {
14968    case V2SImode: pattern = "%08x"; break;
14969    case V4HImode: pattern = "%04x"; break;
14970    case V8QImode: pattern = "%02x"; break;
14971    default:       gcc_unreachable ();
14972    }
14973
14974  fprintf (file, "0x");
14975  for (i = CONST_VECTOR_NUNITS (x); i--;)
14976    {
14977      rtx element;
14978
14979      element = CONST_VECTOR_ELT (x, i);
14980      fprintf (file, pattern, INTVAL (element));
14981    }
14982
14983  return 1;
14984}
14985
14986const char *
14987arm_output_load_gr (rtx *operands)
14988{
14989  rtx reg;
14990  rtx offset;
14991  rtx wcgr;
14992  rtx sum;
14993
14994  if (GET_CODE (operands [1]) != MEM
14995      || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
14996      || GET_CODE (reg = XEXP (sum, 0)) != REG
14997      || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
14998      || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
14999    return "wldrw%?\t%0, %1";
15000
15001  /* Fix up an out-of-range load of a GR register.  */
15002  output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
15003  wcgr = operands[0];
15004  operands[0] = reg;
15005  output_asm_insn ("ldr%?\t%0, %1", operands);
15006
15007  operands[0] = wcgr;
15008  operands[1] = reg;
15009  output_asm_insn ("tmcr%?\t%0, %1", operands);
15010  output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
15011
15012  return "";
15013}
15014
15015/* Worker function for TARGET_SETUP_INCOMING_VARARGS.
15016
15017   On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
15018   named arg and all anonymous args onto the stack.
15019   XXX I know the prologue shouldn't be pushing registers, but it is faster
15020   that way.  */
15021
15022static void
15023arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
15024			    enum machine_mode mode ATTRIBUTE_UNUSED,
15025			    tree type ATTRIBUTE_UNUSED,
15026			    int *pretend_size,
15027			    int second_time ATTRIBUTE_UNUSED)
15028{
15029  cfun->machine->uses_anonymous_args = 1;
15030  if (cum->nregs < NUM_ARG_REGS)
15031    *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
15032}
15033
15034/* Return nonzero if the CONSUMER instruction (a store) does not need
15035   PRODUCER's value to calculate the address.  */
15036
15037int
15038arm_no_early_store_addr_dep (rtx producer, rtx consumer)
15039{
15040  rtx value = PATTERN (producer);
15041  rtx addr = PATTERN (consumer);
15042
15043  if (GET_CODE (value) == COND_EXEC)
15044    value = COND_EXEC_CODE (value);
15045  if (GET_CODE (value) == PARALLEL)
15046    value = XVECEXP (value, 0, 0);
15047  value = XEXP (value, 0);
15048  if (GET_CODE (addr) == COND_EXEC)
15049    addr = COND_EXEC_CODE (addr);
15050  if (GET_CODE (addr) == PARALLEL)
15051    addr = XVECEXP (addr, 0, 0);
15052  addr = XEXP (addr, 0);
15053
15054  return !reg_overlap_mentioned_p (value, addr);
15055}
15056
15057/* Return nonzero if the CONSUMER instruction (an ALU op) does not
15058   have an early register shift value or amount dependency on the
15059   result of PRODUCER.  */
15060
15061int
15062arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
15063{
15064  rtx value = PATTERN (producer);
15065  rtx op = PATTERN (consumer);
15066  rtx early_op;
15067
15068  if (GET_CODE (value) == COND_EXEC)
15069    value = COND_EXEC_CODE (value);
15070  if (GET_CODE (value) == PARALLEL)
15071    value = XVECEXP (value, 0, 0);
15072  value = XEXP (value, 0);
15073  if (GET_CODE (op) == COND_EXEC)
15074    op = COND_EXEC_CODE (op);
15075  if (GET_CODE (op) == PARALLEL)
15076    op = XVECEXP (op, 0, 0);
15077  op = XEXP (op, 1);
15078
15079  early_op = XEXP (op, 0);
15080  /* This is either an actual independent shift, or a shift applied to
15081     the first operand of another operation.  We want the whole shift
15082     operation.  */
15083  if (GET_CODE (early_op) == REG)
15084    early_op = op;
15085
15086  return !reg_overlap_mentioned_p (value, early_op);
15087}
15088
15089/* Return nonzero if the CONSUMER instruction (an ALU op) does not
15090   have an early register shift value dependency on the result of
15091   PRODUCER.  */
15092
15093int
15094arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
15095{
15096  rtx value = PATTERN (producer);
15097  rtx op = PATTERN (consumer);
15098  rtx early_op;
15099
15100  if (GET_CODE (value) == COND_EXEC)
15101    value = COND_EXEC_CODE (value);
15102  if (GET_CODE (value) == PARALLEL)
15103    value = XVECEXP (value, 0, 0);
15104  value = XEXP (value, 0);
15105  if (GET_CODE (op) == COND_EXEC)
15106    op = COND_EXEC_CODE (op);
15107  if (GET_CODE (op) == PARALLEL)
15108    op = XVECEXP (op, 0, 0);
15109  op = XEXP (op, 1);
15110
15111  early_op = XEXP (op, 0);
15112
15113  /* This is either an actual independent shift, or a shift applied to
15114     the first operand of another operation.  We want the value being
15115     shifted, in either case.  */
15116  if (GET_CODE (early_op) != REG)
15117    early_op = XEXP (early_op, 0);
15118
15119  return !reg_overlap_mentioned_p (value, early_op);
15120}
15121
15122/* Return nonzero if the CONSUMER (a mul or mac op) does not
15123   have an early register mult dependency on the result of
15124   PRODUCER.  */
15125
15126int
15127arm_no_early_mul_dep (rtx producer, rtx consumer)
15128{
15129  rtx value = PATTERN (producer);
15130  rtx op = PATTERN (consumer);
15131
15132  if (GET_CODE (value) == COND_EXEC)
15133    value = COND_EXEC_CODE (value);
15134  if (GET_CODE (value) == PARALLEL)
15135    value = XVECEXP (value, 0, 0);
15136  value = XEXP (value, 0);
15137  if (GET_CODE (op) == COND_EXEC)
15138    op = COND_EXEC_CODE (op);
15139  if (GET_CODE (op) == PARALLEL)
15140    op = XVECEXP (op, 0, 0);
15141  op = XEXP (op, 1);
15142
15143  return (GET_CODE (op) == PLUS
15144	  && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
15145}
15146
15147
15148/* We can't rely on the caller doing the proper promotion when
15149   using APCS or ATPCS.  */
15150
15151static bool
15152arm_promote_prototypes (tree t ATTRIBUTE_UNUSED)
15153{
15154    return !TARGET_AAPCS_BASED;
15155}
15156
15157
15158/* AAPCS based ABIs use short enums by default.  */
15159
15160static bool
15161arm_default_short_enums (void)
15162{
15163  return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
15164}
15165
15166
15167/* AAPCS requires that anonymous bitfields affect structure alignment.  */
15168
15169static bool
15170arm_align_anon_bitfield (void)
15171{
15172  return TARGET_AAPCS_BASED;
15173}
15174
15175
15176/* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
15177
15178static tree
15179arm_cxx_guard_type (void)
15180{
15181  return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
15182}
15183
15184
15185/* The EABI says test the least significant bit of a guard variable.  */
15186
15187static bool
15188arm_cxx_guard_mask_bit (void)
15189{
15190  return TARGET_AAPCS_BASED;
15191}
15192
15193
15194/* The EABI specifies that all array cookies are 8 bytes long.  */
15195
15196static tree
15197arm_get_cookie_size (tree type)
15198{
15199  tree size;
15200
15201  if (!TARGET_AAPCS_BASED)
15202    return default_cxx_get_cookie_size (type);
15203
15204  size = build_int_cst (sizetype, 8);
15205  return size;
15206}
15207
15208
15209/* The EABI says that array cookies should also contain the element size.  */
15210
15211static bool
15212arm_cookie_has_size (void)
15213{
15214  return TARGET_AAPCS_BASED;
15215}
15216
15217
15218/* The EABI says constructors and destructors should return a pointer to
15219   the object constructed/destroyed.  */
15220
15221static bool
15222arm_cxx_cdtor_returns_this (void)
15223{
15224  return TARGET_AAPCS_BASED;
15225}
15226
15227/* The EABI says that an inline function may never be the key
15228   method.  */
15229
15230static bool
15231arm_cxx_key_method_may_be_inline (void)
15232{
15233  return !TARGET_AAPCS_BASED;
15234}
15235
15236static void
15237arm_cxx_determine_class_data_visibility (tree decl)
15238{
15239  if (!TARGET_AAPCS_BASED)
15240    return;
15241
15242  /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
15243     is exported.  However, on systems without dynamic vague linkage,
15244     \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
15245  if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
15246    DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
15247  else
15248    DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
15249  DECL_VISIBILITY_SPECIFIED (decl) = 1;
15250}
15251
15252static bool
15253arm_cxx_class_data_always_comdat (void)
15254{
15255  /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
15256     vague linkage if the class has no key function.  */
15257  return !TARGET_AAPCS_BASED;
15258}
15259
15260
15261/* The EABI says __aeabi_atexit should be used to register static
15262   destructors.  */
15263
15264static bool
15265arm_cxx_use_aeabi_atexit (void)
15266{
15267  return TARGET_AAPCS_BASED;
15268}
15269
15270
15271void
15272arm_set_return_address (rtx source, rtx scratch)
15273{
15274  arm_stack_offsets *offsets;
15275  HOST_WIDE_INT delta;
15276  rtx addr;
15277  unsigned long saved_regs;
15278
15279  saved_regs = arm_compute_save_reg_mask ();
15280
15281  if ((saved_regs & (1 << LR_REGNUM)) == 0)
15282    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
15283  else
15284    {
15285      if (frame_pointer_needed)
15286	addr = plus_constant(hard_frame_pointer_rtx, -4);
15287      else
15288	{
15289	  /* LR will be the first saved register.  */
15290	  offsets = arm_get_frame_offsets ();
15291	  delta = offsets->outgoing_args - (offsets->frame + 4);
15292
15293
15294	  if (delta >= 4096)
15295	    {
15296	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
15297				     GEN_INT (delta & ~4095)));
15298	      addr = scratch;
15299	      delta &= 4095;
15300	    }
15301	  else
15302	    addr = stack_pointer_rtx;
15303
15304	  addr = plus_constant (addr, delta);
15305	}
15306      emit_move_insn (gen_frame_mem (Pmode, addr), source);
15307    }
15308}
15309
15310
15311void
15312thumb_set_return_address (rtx source, rtx scratch)
15313{
15314  arm_stack_offsets *offsets;
15315  HOST_WIDE_INT delta;
15316  int reg;
15317  rtx addr;
15318  unsigned long mask;
15319
15320  emit_insn (gen_rtx_USE (VOIDmode, source));
15321
15322  mask = thumb_compute_save_reg_mask ();
15323  if (mask & (1 << LR_REGNUM))
15324    {
15325      offsets = arm_get_frame_offsets ();
15326
15327      /* Find the saved regs.  */
15328      if (frame_pointer_needed)
15329	{
15330	  delta = offsets->soft_frame - offsets->saved_args;
15331	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
15332	}
15333      else
15334	{
15335	  delta = offsets->outgoing_args - offsets->saved_args;
15336	  reg = SP_REGNUM;
15337	}
15338      /* Allow for the stack frame.  */
15339      if (TARGET_BACKTRACE)
15340	delta -= 16;
15341      /* The link register is always the first saved register.  */
15342      delta -= 4;
15343
15344      /* Construct the address.  */
15345      addr = gen_rtx_REG (SImode, reg);
15346      if ((reg != SP_REGNUM && delta >= 128)
15347	  || delta >= 1024)
15348	{
15349	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
15350	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
15351	  addr = scratch;
15352	}
15353      else
15354	addr = plus_constant (addr, delta);
15355
15356      emit_move_insn (gen_frame_mem (Pmode, addr), source);
15357    }
15358  else
15359    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
15360}
15361
15362/* Implements target hook vector_mode_supported_p.  */
15363bool
15364arm_vector_mode_supported_p (enum machine_mode mode)
15365{
15366  if ((mode == V2SImode)
15367      || (mode == V4HImode)
15368      || (mode == V8QImode))
15369    return true;
15370
15371  return false;
15372}
15373
15374/* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
15375   ARM insns and therefore guarantee that the shift count is modulo 256.
15376   DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
15377   guarantee no particular behavior for out-of-range counts.  */
15378
15379static unsigned HOST_WIDE_INT
15380arm_shift_truncation_mask (enum machine_mode mode)
15381{
15382  return mode == SImode ? 255 : 0;
15383}
15384
15385
15386/* Map internal gcc register numbers to DWARF2 register numbers.  */
15387
15388unsigned int
15389arm_dbx_register_number (unsigned int regno)
15390{
15391  if (regno < 16)
15392    return regno;
15393
15394  /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
15395     compatibility.  The EABI defines them as registers 96-103.  */
15396  if (IS_FPA_REGNUM (regno))
15397    return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
15398
15399  if (IS_VFP_REGNUM (regno))
15400    return 64 + regno - FIRST_VFP_REGNUM;
15401
15402  if (IS_IWMMXT_GR_REGNUM (regno))
15403    return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
15404
15405  if (IS_IWMMXT_REGNUM (regno))
15406    return 112 + regno - FIRST_IWMMXT_REGNUM;
15407
15408  gcc_unreachable ();
15409}
15410
15411
15412#ifdef TARGET_UNWIND_INFO
15413/* Emit unwind directives for a store-multiple instruction.  This should
15414   only ever be generated by the function prologue code, so we expect it
15415   to have a particular form.  */
15416
15417static void
15418arm_unwind_emit_stm (FILE * asm_out_file, rtx p)
15419{
15420  int i;
15421  HOST_WIDE_INT offset;
15422  HOST_WIDE_INT nregs;
15423  int reg_size;
15424  unsigned reg;
15425  unsigned lastreg;
15426  rtx e;
15427
15428  /* First insn will adjust the stack pointer.  */
15429  e = XVECEXP (p, 0, 0);
15430  if (GET_CODE (e) != SET
15431      || GET_CODE (XEXP (e, 0)) != REG
15432      || REGNO (XEXP (e, 0)) != SP_REGNUM
15433      || GET_CODE (XEXP (e, 1)) != PLUS)
15434    abort ();
15435
15436  offset = -INTVAL (XEXP (XEXP (e, 1), 1));
15437  nregs = XVECLEN (p, 0) - 1;
15438
15439  reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
15440  if (reg < 16)
15441    {
15442      /* The function prologue may also push pc, but not annotate it as it is
15443	 never restored.  We turn this into a stack pointer adjustment.  */
15444      if (nregs * 4 == offset - 4)
15445	{
15446	  fprintf (asm_out_file, "\t.pad #4\n");
15447	  offset -= 4;
15448	}
15449      reg_size = 4;
15450    }
15451  else if (IS_VFP_REGNUM (reg))
15452    {
15453      /* FPA register saves use an additional word.  */
15454      offset -= 4;
15455      reg_size = 8;
15456    }
15457  else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
15458    {
15459      /* FPA registers are done differently.  */
15460      asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
15461      return;
15462    }
15463  else
15464    /* Unknown register type.  */
15465    abort ();
15466
15467  /* If the stack increment doesn't match the size of the saved registers,
15468     something has gone horribly wrong.  */
15469  if (offset != nregs * reg_size)
15470    abort ();
15471
15472  fprintf (asm_out_file, "\t.save {");
15473
15474  offset = 0;
15475  lastreg = 0;
15476  /* The remaining insns will describe the stores.  */
15477  for (i = 1; i <= nregs; i++)
15478    {
15479      /* Expect (set (mem <addr>) (reg)).
15480         Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
15481      e = XVECEXP (p, 0, i);
15482      if (GET_CODE (e) != SET
15483	  || GET_CODE (XEXP (e, 0)) != MEM
15484	  || GET_CODE (XEXP (e, 1)) != REG)
15485	abort ();
15486
15487      reg = REGNO (XEXP (e, 1));
15488      if (reg < lastreg)
15489	abort ();
15490
15491      if (i != 1)
15492	fprintf (asm_out_file, ", ");
15493      /* We can't use %r for vfp because we need to use the
15494	 double precision register names.  */
15495      if (IS_VFP_REGNUM (reg))
15496	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
15497      else
15498	asm_fprintf (asm_out_file, "%r", reg);
15499
15500#ifdef ENABLE_CHECKING
15501      /* Check that the addresses are consecutive.  */
15502      e = XEXP (XEXP (e, 0), 0);
15503      if (GET_CODE (e) == PLUS)
15504	{
15505	  offset += reg_size;
15506	  if (GET_CODE (XEXP (e, 0)) != REG
15507	      || REGNO (XEXP (e, 0)) != SP_REGNUM
15508	      || GET_CODE (XEXP (e, 1)) != CONST_INT
15509	      || offset != INTVAL (XEXP (e, 1)))
15510	    abort ();
15511	}
15512      else if (i != 1
15513	       || GET_CODE (e) != REG
15514	       || REGNO (e) != SP_REGNUM)
15515	abort ();
15516#endif
15517    }
15518  fprintf (asm_out_file, "}\n");
15519}
15520
15521/*  Emit unwind directives for a SET.  */
15522
15523static void
15524arm_unwind_emit_set (FILE * asm_out_file, rtx p)
15525{
15526  rtx e0;
15527  rtx e1;
15528
15529  e0 = XEXP (p, 0);
15530  e1 = XEXP (p, 1);
15531  switch (GET_CODE (e0))
15532    {
15533    case MEM:
15534      /* Pushing a single register.  */
15535      if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
15536	  || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
15537	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
15538	abort ();
15539
15540      asm_fprintf (asm_out_file, "\t.save ");
15541      if (IS_VFP_REGNUM (REGNO (e1)))
15542	asm_fprintf(asm_out_file, "{d%d}\n",
15543		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
15544      else
15545	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
15546      break;
15547
15548    case REG:
15549      if (REGNO (e0) == SP_REGNUM)
15550	{
15551	  /* A stack increment.  */
15552	  if (GET_CODE (e1) != PLUS
15553	      || GET_CODE (XEXP (e1, 0)) != REG
15554	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
15555	      || GET_CODE (XEXP (e1, 1)) != CONST_INT)
15556	    abort ();
15557
15558	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
15559		       -INTVAL (XEXP (e1, 1)));
15560	}
15561      else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
15562	{
15563	  HOST_WIDE_INT offset;
15564	  unsigned reg;
15565
15566	  if (GET_CODE (e1) == PLUS)
15567	    {
15568	      if (GET_CODE (XEXP (e1, 0)) != REG
15569		  || GET_CODE (XEXP (e1, 1)) != CONST_INT)
15570		abort ();
15571	      reg = REGNO (XEXP (e1, 0));
15572	      offset = INTVAL (XEXP (e1, 1));
15573	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
15574			   HARD_FRAME_POINTER_REGNUM, reg,
15575			   INTVAL (XEXP (e1, 1)));
15576	    }
15577	  else if (GET_CODE (e1) == REG)
15578	    {
15579	      reg = REGNO (e1);
15580	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
15581			   HARD_FRAME_POINTER_REGNUM, reg);
15582	    }
15583	  else
15584	    abort ();
15585	}
15586      else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
15587	{
15588	  /* Move from sp to reg.  */
15589	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
15590	}
15591     else if (GET_CODE (e1) == PLUS
15592	      && GET_CODE (XEXP (e1, 0)) == REG
15593	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
15594	      && GET_CODE (XEXP (e1, 1)) == CONST_INT)
15595	{
15596	  /* Set reg to offset from sp.  */
15597	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
15598		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
15599	}
15600      else
15601	abort ();
15602      break;
15603
15604    default:
15605      abort ();
15606    }
15607}
15608
15609
15610/* Emit unwind directives for the given insn.  */
15611
15612static void
15613arm_unwind_emit (FILE * asm_out_file, rtx insn)
15614{
15615  rtx pat;
15616
15617  if (!ARM_EABI_UNWIND_TABLES)
15618    return;
15619
15620  if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
15621    return;
15622
15623  pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
15624  if (pat)
15625    pat = XEXP (pat, 0);
15626  else
15627    pat = PATTERN (insn);
15628
15629  switch (GET_CODE (pat))
15630    {
15631    case SET:
15632      arm_unwind_emit_set (asm_out_file, pat);
15633      break;
15634
15635    case SEQUENCE:
15636      /* Store multiple.  */
15637      arm_unwind_emit_stm (asm_out_file, pat);
15638      break;
15639
15640    default:
15641      abort();
15642    }
15643}
15644
15645
15646/* Output a reference from a function exception table to the type_info
15647   object X.  The EABI specifies that the symbol should be relocated by
15648   an R_ARM_TARGET2 relocation.  */
15649
15650static bool
15651arm_output_ttype (rtx x)
15652{
15653  fputs ("\t.word\t", asm_out_file);
15654  output_addr_const (asm_out_file, x);
15655  /* Use special relocations for symbol references.  */
15656  if (GET_CODE (x) != CONST_INT)
15657    fputs ("(TARGET2)", asm_out_file);
15658  fputc ('\n', asm_out_file);
15659
15660  return TRUE;
15661}
15662#endif /* TARGET_UNWIND_INFO */
15663
15664
15665/* Output unwind directives for the start/end of a function.  */
15666
15667void
15668arm_output_fn_unwind (FILE * f, bool prologue)
15669{
15670  if (!ARM_EABI_UNWIND_TABLES)
15671    return;
15672
15673  if (prologue)
15674    fputs ("\t.fnstart\n", f);
15675  else
15676    fputs ("\t.fnend\n", f);
15677}
15678
15679static bool
15680arm_emit_tls_decoration (FILE *fp, rtx x)
15681{
15682  enum tls_reloc reloc;
15683  rtx val;
15684
15685  val = XVECEXP (x, 0, 0);
15686  reloc = INTVAL (XVECEXP (x, 0, 1));
15687
15688  output_addr_const (fp, val);
15689
15690  switch (reloc)
15691    {
15692    case TLS_GD32:
15693      fputs ("(tlsgd)", fp);
15694      break;
15695    case TLS_LDM32:
15696      fputs ("(tlsldm)", fp);
15697      break;
15698    case TLS_LDO32:
15699      fputs ("(tlsldo)", fp);
15700      break;
15701    case TLS_IE32:
15702      fputs ("(gottpoff)", fp);
15703      break;
15704    case TLS_LE32:
15705      fputs ("(tpoff)", fp);
15706      break;
15707    default:
15708      gcc_unreachable ();
15709    }
15710
15711  switch (reloc)
15712    {
15713    case TLS_GD32:
15714    case TLS_LDM32:
15715    case TLS_IE32:
15716      fputs (" + (. - ", fp);
15717      output_addr_const (fp, XVECEXP (x, 0, 2));
15718      fputs (" - ", fp);
15719      output_addr_const (fp, XVECEXP (x, 0, 3));
15720      fputc (')', fp);
15721      break;
15722    default:
15723      break;
15724    }
15725
15726  return TRUE;
15727}
15728
15729bool
15730arm_output_addr_const_extra (FILE *fp, rtx x)
15731{
15732  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
15733    return arm_emit_tls_decoration (fp, x);
15734  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
15735    {
15736      char label[256];
15737      int labelno = INTVAL (XVECEXP (x, 0, 0));
15738
15739      ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
15740      assemble_name_raw (fp, label);
15741
15742      return TRUE;
15743    }
15744  else if (GET_CODE (x) == CONST_VECTOR)
15745    return arm_emit_vector_const (fp, x);
15746
15747  return FALSE;
15748}
15749
15750#include "gt-arm.h"
15751