1/* Subroutines used for code generation on the Synopsys DesignWare ARC cpu.
2   Copyright (C) 1994-2020 Free Software Foundation, Inc.
3
4   Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
5   behalf of Synopsys Inc.
6
7   Position Independent Code support added,Code cleaned up,
8   Comments and Support For ARC700 instructions added by
9   Saurabh Verma (saurabh.verma@codito.com)
10   Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
11
12   Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines,
13   profiling support added by Joern Rennecke <joern.rennecke@embecosm.com>
14
15This file is part of GCC.
16
17GCC is free software; you can redistribute it and/or modify
18it under the terms of the GNU General Public License as published by
19the Free Software Foundation; either version 3, or (at your option)
20any later version.
21
22GCC is distributed in the hope that it will be useful,
23but WITHOUT ANY WARRANTY; without even the implied warranty of
24MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25GNU General Public License for more details.
26
27You should have received a copy of the GNU General Public License
28along with GCC; see the file COPYING3.  If not see
29<http://www.gnu.org/licenses/>.  */
30
31#define IN_TARGET_CODE 1
32
33#include "config.h"
34#include "system.h"
35#include "coretypes.h"
36#include "memmodel.h"
37#include "backend.h"
38#include "target.h"
39#include "rtl.h"
40#include "tree.h"
41#include "cfghooks.h"
42#include "df.h"
43#include "tm_p.h"
44#include "stringpool.h"
45#include "attribs.h"
46#include "optabs.h"
47#include "regs.h"
48#include "emit-rtl.h"
49#include "recog.h"
50#include "diagnostic.h"
51#include "fold-const.h"
52#include "varasm.h"
53#include "stor-layout.h"
54#include "calls.h"
55#include "output.h"
56#include "insn-attr.h"
57#include "flags.h"
58#include "explow.h"
59#include "expr.h"
60#include "langhooks.h"
61#include "tm-constrs.h"
62#include "reload.h" /* For operands_match_p */
63#include "cfgrtl.h"
64#include "tree-pass.h"
65#include "context.h"
66#include "builtins.h"
67#include "rtl-iter.h"
68#include "alias.h"
69#include "opts.h"
70#include "hw-doloop.h"
71
72/* Which cpu we're compiling for (ARC600, ARC601, ARC700).  */
73static char arc_cpu_name[10] = "";
74static const char *arc_cpu_string = arc_cpu_name;
75
76typedef struct GTY (()) _arc_jli_section
77{
78  const char *name;
79  struct _arc_jli_section *next;
80} arc_jli_section;
81
82static arc_jli_section *arc_jli_sections = NULL;
83
84/* Track which regs are set fixed/call saved/call used from commnad line.  */
85HARD_REG_SET overrideregs;
86
87/* Maximum size of a loop.  */
88#define ARC_MAX_LOOP_LENGTH 4095
89
90/* Check if an rtx fits in the store instruction format.  Loads can
91   handle any constant.  */
92#define RTX_OK_FOR_OFFSET_P(MODE, X)					\
93  (GET_CODE (X) == CONST_INT						\
94   && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & (~0x03), \
95		       (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3	\
96			? 0						\
97			: -(-GET_MODE_SIZE (MODE) | (~0x03)) >> 1)))
98
99/* Array of valid operand punctuation characters.  */
100char arc_punct_chars[256];
101
102/* State used by arc_ccfsm_advance to implement conditional execution.  */
103struct GTY (()) arc_ccfsm
104{
105  int state;
106  int cc;
107  rtx cond;
108  rtx_insn *target_insn;
109  int target_label;
110};
111
112/* Status of the IRQ_CTRL_AUX register.  */
113typedef struct irq_ctrl_saved_t
114{
115  /* Last register number used by IRQ_CTRL_SAVED aux_reg.  */
116  short irq_save_last_reg;
117  /* True if BLINK is automatically saved.  */
118  bool  irq_save_blink;
119  /* True if LPCOUNT is automatically saved.  */
120  bool  irq_save_lpcount;
121} irq_ctrl_saved_t;
122static irq_ctrl_saved_t irq_ctrl_saved;
123
124#define ARC_AUTOBLINK_IRQ_P(FNTYPE)				\
125  ((ARC_INTERRUPT_P (FNTYPE)					\
126    && irq_ctrl_saved.irq_save_blink)				\
127   || (ARC_FAST_INTERRUPT_P (FNTYPE)				\
128       && rgf_banked_register_count > 8))
129
130#define ARC_AUTOFP_IRQ_P(FNTYPE)				\
131  ((ARC_INTERRUPT_P (FNTYPE)					\
132    && (irq_ctrl_saved.irq_save_last_reg > 26))			\
133  || (ARC_FAST_INTERRUPT_P (FNTYPE)				\
134      && rgf_banked_register_count > 8))
135
136#define ARC_AUTO_IRQ_P(FNTYPE)					\
137  (ARC_INTERRUPT_P (FNTYPE) && !ARC_FAST_INTERRUPT_P (FNTYPE)	\
138   && (irq_ctrl_saved.irq_save_blink				\
139       || (irq_ctrl_saved.irq_save_last_reg >= 0)))
140
141/* Number of registers in second bank for FIRQ support.  */
142static int rgf_banked_register_count;
143
144#define arc_ccfsm_current cfun->machine->ccfsm_current
145
146#define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
147  ((STATE)->state == 1 || (STATE)->state == 2)
148
149/* Indicate we're conditionalizing insns now.  */
150#define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \
151  ((STATE)->state += 2)
152
153#define ARC_CCFSM_COND_EXEC_P(STATE) \
154  ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \
155   || current_insn_predicate)
156
157/* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE.  */
158#define CCFSM_ISCOMPACT(INSN,STATE) \
159  (ARC_CCFSM_COND_EXEC_P (STATE) \
160   ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
161      || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
162   : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
163
164/* Likewise, but also consider that INSN might be in a delay slot of JUMP.  */
165#define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \
166  ((ARC_CCFSM_COND_EXEC_P (STATE) \
167    || (JUMP_P (JUMP) \
168	&& INSN_ANNULLED_BRANCH_P (JUMP) \
169	&& (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \
170   ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
171      || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
172   : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
173
174/* Start enter/leave register range.  */
175#define ENTER_LEAVE_START_REG 13
176
177/* End enter/leave register range.  */
178#define ENTER_LEAVE_END_REG 26
179
180/* The maximum number of insns skipped which will be conditionalised if
181   possible.  */
182/* When optimizing for speed:
183    Let p be the probability that the potentially skipped insns need to
184    be executed, pn the cost of a correctly predicted non-taken branch,
185    mt the cost of a mis/non-predicted taken branch,
186    mn mispredicted non-taken, pt correctly predicted taken ;
187    costs expressed in numbers of instructions like the ones considered
188    skipping.
189    Unfortunately we don't have a measure of predictability - this
190    is linked to probability only in that in the no-eviction-scenario
191    there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger
192    value that can be assumed *if* the distribution is perfectly random.
193    A predictability of 1 is perfectly plausible not matter what p is,
194    because the decision could be dependent on an invocation parameter
195    of the program.
196    For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn
197    For small p, we want MAX_INSNS_SKIPPED == pt
198
199   When optimizing for size:
200    We want to skip insn unless we could use 16 opcodes for the
201    non-conditionalized insn to balance the branch length or more.
202    Performance can be tie-breaker.  */
203/* If the potentially-skipped insns are likely to be executed, we'll
204   generally save one non-taken branch
205   o
206   this to be no less than the 1/p  */
207#define MAX_INSNS_SKIPPED 3
208
209/* ZOL control registers.  */
210#define AUX_LP_START 0x02
211#define AUX_LP_END 0x03
212
213/* FPX AUX registers.  */
214#define AUX_DPFP_START 0x301
215
216/* ARC600 MULHI register.  */
217#define AUX_MULHI 0x12
218
219/* A nop is needed between a 4 byte insn that sets the condition codes and
220   a branch that uses them (the same isn't true for an 8 byte insn that sets
221   the condition codes).  Set by arc_ccfsm_advance.  Used by
222   arc_print_operand.  */
223
224static int get_arc_condition_code (rtx);
225
226static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
227static tree arc_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
228static tree arc_handle_jli_attribute (tree *, tree, tree, int, bool *);
229static tree arc_handle_secure_attribute (tree *, tree, tree, int, bool *);
230static tree arc_handle_uncached_attribute (tree *, tree, tree, int, bool *);
231static tree arc_handle_aux_attribute (tree *, tree, tree, int, bool *);
232
233/* Initialized arc_attribute_table to NULL since arc doesnot have any
234   machine specific supported attributes.  */
235const struct attribute_spec arc_attribute_table[] =
236{
237 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
238      affects_type_identity, handler, exclude } */
239  { "interrupt", 1, 1, true, false, false, true,
240    arc_handle_interrupt_attribute, NULL },
241  /* Function calls made to this symbol must be done indirectly, because
242     it may lie outside of the 21/25 bit addressing range of a normal function
243     call.  */
244  { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
245  /* Whereas these functions are always known to reside within the 25 bit
246     addressing range of unconditionalized bl.  */
247  { "medium_call",   0, 0, false, true,  true, false, NULL, NULL },
248  /* And these functions are always known to reside within the 21 bit
249     addressing range of blcc.  */
250  { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
251  /* Function which are not having the prologue and epilogue generated
252     by the compiler.  */
253  { "naked", 0, 0, true, false, false,  false, arc_handle_fndecl_attribute,
254    NULL },
255  /* Functions calls made using jli instruction.  The pointer in JLI
256     table is found latter.  */
257  { "jli_always",    0, 0, false, true,  true, false,  NULL, NULL },
258  /* Functions calls made using jli instruction.  The pointer in JLI
259     table is given as input parameter.  */
260  { "jli_fixed",    1, 1, false, true,  true, false, arc_handle_jli_attribute,
261    NULL },
262  /* Call a function using secure-mode.  */
263  { "secure_call",  1, 1, false, true, true, false, arc_handle_secure_attribute,
264    NULL },
265   /* Bypass caches using .di flag.  */
266  { "uncached", 0, 0, false, true, false, false, arc_handle_uncached_attribute,
267    NULL },
268  { "aux", 0, 1, true, false, false, false, arc_handle_aux_attribute, NULL },
269  { NULL, 0, 0, false, false, false, false, NULL, NULL }
270};
271static int arc_comp_type_attributes (const_tree, const_tree);
272static void arc_file_start (void);
273static void arc_internal_label (FILE *, const char *, unsigned long);
274static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
275				 tree);
276static int arc_address_cost (rtx, machine_mode, addr_space_t, bool);
277static void arc_encode_section_info (tree decl, rtx rtl, int first);
278
279static void arc_init_builtins (void);
280static rtx arc_expand_builtin (tree, rtx, rtx, machine_mode, int);
281
282static int branch_dest (rtx);
283
284static void  arc_output_pic_addr_const (FILE *,  rtx, int);
285static bool arc_function_ok_for_sibcall (tree, tree);
286static rtx arc_function_value (const_tree, const_tree, bool);
287const char * output_shift (rtx *);
288static void arc_reorg (void);
289static bool arc_in_small_data_p (const_tree);
290
291static void arc_init_reg_tables (void);
292static bool arc_return_in_memory (const_tree, const_tree);
293static bool arc_vector_mode_supported_p (machine_mode);
294
295static bool arc_can_use_doloop_p (const widest_int &, const widest_int &,
296				  unsigned int, bool);
297static const char *arc_invalid_within_doloop (const rtx_insn *);
298
299static void output_short_suffix (FILE *file);
300
301static bool arc_frame_pointer_required (void);
302
303static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
304						unsigned int,
305						enum by_pieces_operation op,
306						bool);
307
308/* Globally visible information about currently selected cpu.  */
309const arc_cpu_t *arc_selected_cpu;
310
311/* Traditionally, we push saved registers first in the prologue,
312   then we allocate the rest of the frame - and reverse in the epilogue.
313   This has still its merits for ease of debugging, or saving code size
314   or even execution time if the stack frame is so large that some accesses
315   can't be encoded anymore with offsets in the instruction code when using
316   a different scheme.
317   Also, it would be a good starting point if we got instructions to help
318   with register save/restore.
319
320   However, often stack frames are small, and the pushing / popping has
321   some costs:
322   - the stack modification prevents a lot of scheduling.
323   - frame allocation / deallocation may need extra instructions.
324   - we need to place a memory barrier after frame allocation to avoid
325     the delay slot scheduler to reschedule a frame related info and
326     messing up with dwarf unwinding.  The barrier before deallocation
327     is for flushing all pending sp operations.
328
329   Thus, for small frames, we'd like to use a different scheme:
330   - The frame is allocated in full with the first prologue instruction,
331     and deallocated in full with the last epilogue instruction.
332     Thus, the instructions in-between can be freely scheduled.
333   - If the function has no outgoing arguments on the stack, we can allocate
334     one register save slot at the top of the stack.  This register can then
335     be saved simultaneously with frame allocation, and restored with
336     frame deallocation.
337     This register can be picked depending on scheduling considerations,
338     although same though should go into having some set of registers
339     to be potentially lingering after a call, and others to be available
340     immediately - i.e. in the absence of interprocedual optimization, we
341     can use an ABI-like convention for register allocation to reduce
342     stalls after function return.  */
343
344/* ARCompact stack frames look like:
345
346           Before call                     After call
347  high  +-----------------------+       +-----------------------+
348  mem   |  reg parm save area   |       | reg parm save area    |
349        |  only created for     |       | only created for      |
350        |  variable arg fns     |       | variable arg fns      |
351    AP  +-----------------------+       +-----------------------+
352        |  return addr register |       | return addr register  |
353        |  (if required)        |       | (if required)         |
354        +-----------------------+       +-----------------------+
355        |                       |       |                       |
356        |  reg save area        |       | reg save area         |
357        |                       |       |                       |
358        +-----------------------+       +-----------------------+
359        |  frame pointer        |       | frame pointer         |
360        |  (if required)        |       | (if required)         |
361    FP  +-----------------------+       +-----------------------+
362        |                       |       |                       |
363        |  local/temp variables |       | local/temp variables  |
364        |                       |       |                       |
365        +-----------------------+       +-----------------------+
366        |                       |       |                       |
367        |  arguments on stack   |       | arguments on stack    |
368        |                       |       |                       |
369    SP  +-----------------------+       +-----------------------+
370                                        | reg parm save area    |
371                                        | only created for      |
372                                        | variable arg fns      |
373                                    AP  +-----------------------+
374                                        | return addr register  |
375                                        | (if required)         |
376                                        +-----------------------+
377                                        |                       |
378                                        | reg save area         |
379                                        |                       |
380                                        +-----------------------+
381                                        | frame pointer         |
382                                        | (if required)         |
383                                    FP  +-----------------------+
384                                        |                       |
385                                        | local/temp variables  |
386                                        |                       |
387                                        +-----------------------+
388                                        |                       |
389                                        | arguments on stack    |
390  low                                   |                       |
391  mem                               SP  +-----------------------+
392
393Notes:
3941) The "reg parm save area" does not exist for non variable argument fns.
395   The "reg parm save area" can be eliminated completely if we created our
396   own va-arc.h, but that has tradeoffs as well (so it's not done).  */
397
398/* Structure to be filled in by arc_compute_frame_size with register
399   save masks, and offsets for the current function.  */
400struct GTY (()) arc_frame_info
401{
402  unsigned int total_size;	/* # bytes that the entire frame takes up.  */
403  unsigned int extra_size;	/* # bytes of extra stuff.  */
404  unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
405  unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
406  unsigned int reg_size;	/* # bytes needed to store regs.  */
407  unsigned int var_size;	/* # bytes that variables take up.  */
408  uint64_t gmask;		/* Mask of saved gp registers.  */
409  bool initialized; /* FALSE if frame size already calculated.  */
410  short millicode_start_reg;
411  short millicode_end_reg;
412  bool save_return_addr;
413};
414
415/* GMASK bit length -1.  */
416#define GMASK_LEN 63
417
418/* Defining data structures for per-function information.  */
419
420typedef struct GTY (()) machine_function
421{
422  unsigned int fn_type;
423  struct arc_frame_info frame_info;
424  /* To keep track of unalignment caused by short insns.  */
425  int unalign;
426  struct arc_ccfsm ccfsm_current;
427  /* Map from uid to ccfsm state during branch shortening.  */
428  rtx ccfsm_current_insn;
429  char arc_reorg_started;
430  char prescan_initialized;
431} machine_function;
432
433
434/* Given a symbol RTX (const (symb <+ const_int>), returns its
435   alignment.  */
436
437static int
438get_symbol_alignment (rtx x)
439{
440  tree decl = NULL_TREE;
441  int align = 0;
442
443  switch (GET_CODE (x))
444    {
445    case SYMBOL_REF:
446      decl = SYMBOL_REF_DECL (x);
447      break;
448    case CONST:
449      return get_symbol_alignment (XEXP (x, 0));
450    case PLUS:
451      gcc_assert (CONST_INT_P (XEXP (x, 1)));
452      return get_symbol_alignment (XEXP (x, 0));
453    default:
454      return 0;
455    }
456
457  if (decl)
458    align = DECL_ALIGN (decl);
459  align = align / BITS_PER_UNIT;
460  return align;
461}
462
463/* Return true if x is ok to be used as a small data address.  */
464
465static bool
466legitimate_small_data_address_p (rtx x, machine_mode mode)
467{
468  switch (GET_CODE (x))
469    {
470    case CONST:
471      return legitimate_small_data_address_p (XEXP (x, 0), mode);
472    case SYMBOL_REF:
473      return SYMBOL_REF_SMALL_P (x);
474    case PLUS:
475      {
476	bool p0 = (GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
477	  && SYMBOL_REF_SMALL_P (XEXP (x, 0));
478
479	/* If no constant then we cannot do small data.  */
480	if (!CONST_INT_P (XEXP (x, 1)))
481	  return false;
482
483	/* Small data relocs works with scalled addresses, check if
484	   the immediate fits the requirements.  */
485	switch (GET_MODE_SIZE (mode))
486	  {
487	  case 1:
488	    return p0;
489	  case 2:
490	    return p0 && ((INTVAL (XEXP (x, 1)) & 0x1) == 0);
491	  case 4:
492	  case 8:
493	    return p0 && ((INTVAL (XEXP (x, 1)) & 0x3) == 0);
494	  default:
495	    return false;
496	  }
497      }
498    default:
499      return false;
500    }
501}
502
503/* TRUE if op is an scaled address.  */
504static bool
505legitimate_scaled_address_p (machine_mode mode, rtx op, bool strict)
506{
507  if (GET_CODE (op) != PLUS)
508    return false;
509
510  if (GET_CODE (XEXP (op, 0)) != MULT)
511    return false;
512
513  /* Check multiplication operands.  */
514  if (!RTX_OK_FOR_INDEX_P (XEXP (XEXP (op, 0), 0), strict))
515    return false;
516
517  if (!CONST_INT_P (XEXP (XEXP (op, 0), 1)))
518    return false;
519
520  switch (GET_MODE_SIZE (mode))
521    {
522    case 2:
523      if (INTVAL (XEXP (XEXP (op, 0), 1)) != 2)
524	return false;
525      break;
526    case 8:
527      if (!TARGET_LL64)
528	return false;
529      /*  Fall through. */
530    case 4:
531      if (INTVAL (XEXP (XEXP (op, 0), 1)) != 4)
532	return false;
533      /*  Fall through. */
534    default:
535      return false;
536    }
537
538  /* Check the base.  */
539  if (RTX_OK_FOR_BASE_P (XEXP (op, 1), (strict)))
540    return true;
541
542  if (flag_pic)
543    {
544      if (CONST_INT_P (XEXP (op, 1)))
545	return true;
546      return false;
547    }
548
549  /* Scalled addresses for sdata is done other places.  */
550  if (legitimate_small_data_address_p (op, mode))
551    return false;
552
553  if (CONSTANT_P (XEXP (op, 1)))
554      return true;
555
556  return false;
557}
558
559/* Check for constructions like REG + OFFS, where OFFS can be a
560   register, an immediate or an long immediate. */
561
562static bool
563legitimate_offset_address_p (machine_mode mode, rtx x, bool index, bool strict)
564{
565  if (GET_CODE (x) != PLUS)
566    return false;
567
568  if (!RTX_OK_FOR_BASE_P (XEXP (x, 0), (strict)))
569    return false;
570
571  /* Check for: [Rx + small offset] or [Rx + Ry].  */
572  if (((index && RTX_OK_FOR_INDEX_P (XEXP (x, 1), (strict))
573	&& GET_MODE_SIZE ((mode)) <= 4)
574       || RTX_OK_FOR_OFFSET_P (mode, XEXP (x, 1))))
575    return true;
576
577  /* Check for [Rx + symbol].  */
578  if (!flag_pic
579      && (GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
580      /* Avoid this type of address for double or larger modes.  */
581      && (GET_MODE_SIZE (mode) <= 4)
582      /* Avoid small data which ends in something like GP +
583	 symb@sda.  */
584      && (!SYMBOL_REF_SMALL_P (XEXP (x, 1))))
585    return true;
586
587  return false;
588}
589
590/* Implements target hook vector_mode_supported_p.  */
591
592static bool
593arc_vector_mode_supported_p (machine_mode mode)
594{
595  switch (mode)
596    {
597    case E_V2HImode:
598      return TARGET_PLUS_DMPY;
599    case E_V4HImode:
600    case E_V2SImode:
601      return TARGET_PLUS_QMACW;
602    case E_V4SImode:
603    case E_V8HImode:
604      return TARGET_SIMD_SET;
605
606    default:
607      return false;
608    }
609}
610
611/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
612
613static machine_mode
614arc_preferred_simd_mode (scalar_mode mode)
615{
616  switch (mode)
617    {
618    case E_HImode:
619      return TARGET_PLUS_QMACW ? V4HImode : V2HImode;
620    case E_SImode:
621      return V2SImode;
622
623    default:
624      return word_mode;
625    }
626}
627
628/* Implements target hook
629   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.  */
630
631static unsigned int
632arc_autovectorize_vector_modes (vector_modes *modes, bool)
633{
634  if (TARGET_PLUS_QMACW)
635    {
636      modes->quick_push (V4HImode);
637      modes->quick_push (V2HImode);
638    }
639  return 0;
640}
641
642
643/* Implements target hook TARGET_SCHED_ISSUE_RATE.  */
644static int
645arc_sched_issue_rate (void)
646{
647  switch (arc_tune)
648    {
649    case TUNE_ARCHS4X:
650    case TUNE_ARCHS4XD:
651      return 3;
652    default:
653      break;
654    }
655  return 1;
656}
657
658/* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review.  */
659static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
660static rtx arc_delegitimize_address (rtx);
661static bool arc_can_follow_jump (const rtx_insn *follower,
662				 const rtx_insn *followee);
663
664static rtx frame_insn (rtx);
665static void arc_function_arg_advance (cumulative_args_t,
666				      const function_arg_info &);
667static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
668
669/* initialize the GCC target structure.  */
670#undef  TARGET_COMP_TYPE_ATTRIBUTES
671#define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes
672#undef TARGET_ASM_FILE_START
673#define TARGET_ASM_FILE_START arc_file_start
674#undef TARGET_ATTRIBUTE_TABLE
675#define TARGET_ATTRIBUTE_TABLE arc_attribute_table
676#undef TARGET_ASM_INTERNAL_LABEL
677#define TARGET_ASM_INTERNAL_LABEL arc_internal_label
678#undef TARGET_RTX_COSTS
679#define TARGET_RTX_COSTS arc_rtx_costs
680#undef TARGET_ADDRESS_COST
681#define TARGET_ADDRESS_COST arc_address_cost
682
683#undef TARGET_ENCODE_SECTION_INFO
684#define TARGET_ENCODE_SECTION_INFO arc_encode_section_info
685
686#undef TARGET_CANNOT_FORCE_CONST_MEM
687#define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem
688
689#undef  TARGET_INIT_BUILTINS
690#define TARGET_INIT_BUILTINS  arc_init_builtins
691
692#undef  TARGET_EXPAND_BUILTIN
693#define TARGET_EXPAND_BUILTIN arc_expand_builtin
694
695#undef  TARGET_BUILTIN_DECL
696#define TARGET_BUILTIN_DECL arc_builtin_decl
697
698#undef  TARGET_ASM_OUTPUT_MI_THUNK
699#define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk
700
701#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
702#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
703
704#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
705#define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall
706
707#undef  TARGET_MACHINE_DEPENDENT_REORG
708#define TARGET_MACHINE_DEPENDENT_REORG arc_reorg
709
710#undef TARGET_IN_SMALL_DATA_P
711#define TARGET_IN_SMALL_DATA_P arc_in_small_data_p
712
713#undef TARGET_PROMOTE_FUNCTION_MODE
714#define TARGET_PROMOTE_FUNCTION_MODE \
715  default_promote_function_mode_always_promote
716
717#undef TARGET_PROMOTE_PROTOTYPES
718#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
719
720#undef TARGET_RETURN_IN_MEMORY
721#define TARGET_RETURN_IN_MEMORY arc_return_in_memory
722#undef TARGET_PASS_BY_REFERENCE
723#define TARGET_PASS_BY_REFERENCE arc_pass_by_reference
724
725#undef TARGET_SETUP_INCOMING_VARARGS
726#define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs
727
728#undef TARGET_ARG_PARTIAL_BYTES
729#define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes
730
731#undef TARGET_MUST_PASS_IN_STACK
732#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
733
734#undef TARGET_FUNCTION_VALUE
735#define TARGET_FUNCTION_VALUE arc_function_value
736
737#undef  TARGET_SCHED_ADJUST_PRIORITY
738#define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
739
740#undef TARGET_SCHED_ISSUE_RATE
741#define TARGET_SCHED_ISSUE_RATE arc_sched_issue_rate
742
743#undef TARGET_VECTOR_MODE_SUPPORTED_P
744#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
745
746#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
747#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
748
749#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
750#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES arc_autovectorize_vector_modes
751
752#undef TARGET_CAN_USE_DOLOOP_P
753#define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
754
755#undef TARGET_INVALID_WITHIN_DOLOOP
756#define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop
757
758#undef TARGET_PRESERVE_RELOAD_P
759#define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p
760
761#undef TARGET_CAN_FOLLOW_JUMP
762#define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump
763
764#undef TARGET_DELEGITIMIZE_ADDRESS
765#define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
766
767#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
768#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
769  arc_use_by_pieces_infrastructure_p
770
771/* Usually, we will be able to scale anchor offsets.
772   When this fails, we want LEGITIMIZE_ADDRESS to kick in.  */
773#undef TARGET_MIN_ANCHOR_OFFSET
774#define TARGET_MIN_ANCHOR_OFFSET (-1024)
775#undef TARGET_MAX_ANCHOR_OFFSET
776#define TARGET_MAX_ANCHOR_OFFSET (1020)
777
778#undef TARGET_SECONDARY_RELOAD
779#define TARGET_SECONDARY_RELOAD arc_secondary_reload
780
781#define TARGET_OPTION_OVERRIDE arc_override_options
782
783#define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage
784
785#define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline
786
787#define TARGET_CAN_ELIMINATE arc_can_eliminate
788
789#define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required
790
791#define TARGET_FUNCTION_ARG arc_function_arg
792
793#define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance
794
795#define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p
796
797#define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p
798
799#define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p
800
801#define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address
802
803#undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
804#define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P	\
805  arc_no_speculation_in_delay_slots_p
806
807#undef TARGET_LRA_P
808#define TARGET_LRA_P arc_lra_p
809#define TARGET_REGISTER_PRIORITY arc_register_priority
810/* Stores with scaled offsets have different displacement ranges.  */
811#define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
812#define TARGET_SPILL_CLASS arc_spill_class
813
814#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
815#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arc_allocate_stack_slots_for_args
816
817#undef TARGET_WARN_FUNC_RETURN
818#define TARGET_WARN_FUNC_RETURN arc_warn_func_return
819
820#include "target-def.h"
821
822#undef TARGET_ASM_ALIGNED_HI_OP
823#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
824#undef TARGET_ASM_ALIGNED_SI_OP
825#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
826
827#ifdef HAVE_AS_TLS
828#undef TARGET_HAVE_TLS
829#define TARGET_HAVE_TLS HAVE_AS_TLS
830#endif
831
832#undef TARGET_DWARF_REGISTER_SPAN
833#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
834
835#undef TARGET_HARD_REGNO_NREGS
836#define TARGET_HARD_REGNO_NREGS arc_hard_regno_nregs
837#undef TARGET_HARD_REGNO_MODE_OK
838#define TARGET_HARD_REGNO_MODE_OK arc_hard_regno_mode_ok
839
840#undef TARGET_MODES_TIEABLE_P
841#define TARGET_MODES_TIEABLE_P arc_modes_tieable_p
842
843/* Try to keep the (mov:DF _, reg) as early as possible so
844   that the d<add/sub/mul>h-lr insns appear together and can
845   use the peephole2 pattern.  */
846
847static int
848arc_sched_adjust_priority (rtx_insn *insn, int priority)
849{
850  rtx set = single_set (insn);
851  if (set
852      && GET_MODE (SET_SRC(set)) == DFmode
853      && GET_CODE (SET_SRC(set)) == REG)
854    {
855      /* Incrementing priority by 20 (empirically derived).  */
856      return priority + 20;
857    }
858
859  return priority;
860}
861
862/* For ARC base register + offset addressing, the validity of the
863   address is mode-dependent for most of the offset range, as the
864   offset can be scaled by the access size.
865   We don't expose these as mode-dependent addresses in the
866   mode_dependent_address_p target hook, because that would disable
867   lots of optimizations, and most uses of these addresses are for 32
868   or 64 bit accesses anyways, which are fine.
869   However, that leaves some addresses for 8 / 16 bit values not
870   properly reloaded by the generic code, which is why we have to
871   schedule secondary reloads for these.  */
872
873static reg_class_t
874arc_secondary_reload (bool in_p,
875		      rtx x,
876		      reg_class_t cl,
877		      machine_mode mode,
878		      secondary_reload_info *sri)
879{
880  enum rtx_code code = GET_CODE (x);
881
882  if (cl == DOUBLE_REGS)
883    return GENERAL_REGS;
884
885 /* If we have a subreg (reg), where reg is a pseudo (that will end in
886    a memory location), then we may need a scratch register to handle
887    the fp/sp+largeoffset address.  */
888  if (code == SUBREG)
889    {
890      rtx addr = NULL_RTX;
891      x = SUBREG_REG (x);
892
893      if (REG_P (x))
894	{
895	  int regno = REGNO (x);
896	  if (regno >= FIRST_PSEUDO_REGISTER)
897	    regno = reg_renumber[regno];
898
899	  if (regno != -1)
900	    return NO_REGS;
901
902	  /* It is a pseudo that ends in a stack location.  This
903	     procedure only works with the old reload step.  */
904	  if (reg_equiv_mem (REGNO (x)) && !lra_in_progress)
905	    {
906	      /* Get the equivalent address and check the range of the
907		 offset.  */
908	      rtx mem = reg_equiv_mem (REGNO (x));
909	      addr = find_replacement (&XEXP (mem, 0));
910	    }
911	}
912      else
913	{
914	  gcc_assert (MEM_P (x));
915	  addr = XEXP (x, 0);
916	  addr = simplify_rtx (addr);
917	}
918      if (addr && GET_CODE (addr) == PLUS
919	  && CONST_INT_P (XEXP (addr, 1))
920	  && (!RTX_OK_FOR_OFFSET_P (mode, XEXP (addr, 1))))
921	{
922	  switch (mode)
923	    {
924	    case E_QImode:
925	      sri->icode =
926		in_p ? CODE_FOR_reload_qi_load : CODE_FOR_reload_qi_store;
927	      break;
928	    case E_HImode:
929	      sri->icode =
930		in_p ? CODE_FOR_reload_hi_load : CODE_FOR_reload_hi_store;
931	      break;
932	    default:
933	      break;
934	    }
935	}
936    }
937  return NO_REGS;
938}
939
940/* Convert reloads using offsets that are too large to use indirect
941   addressing.  */
942
943void
944arc_secondary_reload_conv (rtx reg, rtx mem, rtx scratch, bool store_p)
945{
946  rtx addr;
947
948  gcc_assert (GET_CODE (mem) == MEM);
949  addr = XEXP (mem, 0);
950
951  /* Large offset: use a move.  FIXME: ld ops accepts limms as
952     offsets.  Hence, the following move insn is not required.  */
953  emit_move_insn (scratch, addr);
954  mem = replace_equiv_address_nv (mem, scratch);
955
956  /* Now create the move.  */
957  if (store_p)
958    emit_insn (gen_rtx_SET (mem, reg));
959  else
960    emit_insn (gen_rtx_SET (reg, mem));
961
962  return;
963}
964
965static unsigned arc_ifcvt (void);
966
967namespace {
968
969const pass_data pass_data_arc_ifcvt =
970{
971  RTL_PASS,
972  "arc_ifcvt",				/* name */
973  OPTGROUP_NONE,			/* optinfo_flags */
974  TV_IFCVT2,				/* tv_id */
975  0,					/* properties_required */
976  0,					/* properties_provided */
977  0,					/* properties_destroyed */
978  0,					/* todo_flags_start */
979  TODO_df_finish			/* todo_flags_finish */
980};
981
982class pass_arc_ifcvt : public rtl_opt_pass
983{
984 public:
985 pass_arc_ifcvt (gcc::context *ctxt)
986   : rtl_opt_pass (pass_data_arc_ifcvt, ctxt)
987    {}
988
989  /* opt_pass methods: */
990  opt_pass * clone ()
991    {
992      return new pass_arc_ifcvt (m_ctxt);
993    }
994  virtual unsigned int execute (function *)
995  {
996    return arc_ifcvt ();
997  }
998  virtual bool gate (function *)
999  {
1000    return (optimize > 1 && !TARGET_NO_COND_EXEC);
1001  }
1002};
1003
1004} // anon namespace
1005
1006rtl_opt_pass *
1007make_pass_arc_ifcvt (gcc::context *ctxt)
1008{
1009  return new pass_arc_ifcvt (ctxt);
1010}
1011
1012static unsigned arc_predicate_delay_insns (void);
1013
1014namespace {
1015
1016const pass_data pass_data_arc_predicate_delay_insns =
1017{
1018  RTL_PASS,
1019  "arc_predicate_delay_insns",		/* name */
1020  OPTGROUP_NONE,			/* optinfo_flags */
1021  TV_IFCVT2,				/* tv_id */
1022  0,					/* properties_required */
1023  0,					/* properties_provided */
1024  0,					/* properties_destroyed */
1025  0,					/* todo_flags_start */
1026  TODO_df_finish			/* todo_flags_finish */
1027};
1028
1029class pass_arc_predicate_delay_insns : public rtl_opt_pass
1030{
1031 public:
1032 pass_arc_predicate_delay_insns(gcc::context *ctxt)
1033   : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt)
1034    {}
1035
1036  /* opt_pass methods: */
1037  virtual unsigned int execute (function *)
1038  {
1039    return arc_predicate_delay_insns ();
1040  }
1041  virtual bool gate (function *)
1042  {
1043    return flag_delayed_branch;
1044  }
1045};
1046
1047} // anon namespace
1048
1049rtl_opt_pass *
1050make_pass_arc_predicate_delay_insns (gcc::context *ctxt)
1051{
1052  return new pass_arc_predicate_delay_insns (ctxt);
1053}
1054
1055/* Called by OVERRIDE_OPTIONS to initialize various things.  */
1056
1057static void
1058arc_init (void)
1059{
1060  if (TARGET_V2)
1061    {
1062      /* I have the multiplier, then use it*/
1063      if (TARGET_MPYW || TARGET_MULTI)
1064	  arc_multcost = COSTS_N_INSNS (1);
1065    }
1066  /* Note: arc_multcost is only used in rtx_cost if speed is true.  */
1067  if (arc_multcost < 0)
1068    switch (arc_tune)
1069      {
1070      case ARC_TUNE_ARC700_4_2_STD:
1071	/* latency 7;
1072	   max throughput (1 multiply + 4 other insns) / 5 cycles.  */
1073	arc_multcost = COSTS_N_INSNS (4);
1074	if (TARGET_NOMPY_SET)
1075	  arc_multcost = COSTS_N_INSNS (30);
1076	break;
1077      case ARC_TUNE_ARC700_4_2_XMAC:
1078	/* latency 5;
1079	   max throughput (1 multiply + 2 other insns) / 3 cycles.  */
1080	arc_multcost = COSTS_N_INSNS (3);
1081	if (TARGET_NOMPY_SET)
1082	  arc_multcost = COSTS_N_INSNS (30);
1083	break;
1084      case ARC_TUNE_ARC600:
1085	if (TARGET_MUL64_SET)
1086	  {
1087	    arc_multcost = COSTS_N_INSNS (4);
1088	    break;
1089	  }
1090	/* Fall through.  */
1091      default:
1092	arc_multcost = COSTS_N_INSNS (30);
1093	break;
1094      }
1095
1096  /* MPY instructions valid only for ARC700 or ARCv2.  */
1097  if (TARGET_NOMPY_SET && TARGET_ARC600_FAMILY)
1098      error ("%<-mno-mpy%> supported only for ARC700 or ARCv2");
1099
1100  if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR)
1101      error ("%<-mno-dpfp-lrsr%> supported only with %<-mdpfp%>");
1102
1103  /* FPX-1. No fast and compact together.  */
1104  if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET)
1105      || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET))
1106    error ("FPX fast and compact options cannot be specified together");
1107
1108  /* FPX-2. No fast-spfp for arc600 or arc601.  */
1109  if (TARGET_SPFP_FAST_SET && TARGET_ARC600_FAMILY)
1110    error ("%<-mspfp_fast%> not available on ARC600 or ARC601");
1111
1112  /* FPX-4.  No FPX extensions mixed with FPU extensions.  */
1113  if ((TARGET_DPFP_FAST_SET || TARGET_DPFP_COMPACT_SET || TARGET_SPFP)
1114      && TARGET_HARD_FLOAT)
1115    error ("no FPX/FPU mixing allowed");
1116
1117  /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic.  */
1118  if (flag_pic && TARGET_ARC600_FAMILY)
1119    {
1120      warning (0, "PIC is not supported for %qs",
1121	       arc_cpu_string);
1122      flag_pic = 0;
1123    }
1124
1125  arc_init_reg_tables ();
1126
1127  /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
1128  memset (arc_punct_chars, 0, sizeof (arc_punct_chars));
1129  arc_punct_chars['#'] = 1;
1130  arc_punct_chars['*'] = 1;
1131  arc_punct_chars['?'] = 1;
1132  arc_punct_chars['!'] = 1;
1133  arc_punct_chars['^'] = 1;
1134  arc_punct_chars['&'] = 1;
1135  arc_punct_chars['+'] = 1;
1136  arc_punct_chars['_'] = 1;
1137}
1138
1139/* Parse -mirq-ctrl-saved=RegisterRange, blink, lp_copunt.  The
1140   register range is specified as two registers separated by a dash.
1141   It always starts with r0, and its upper limit is fp register.
1142   blink and lp_count registers are optional.  */
1143
1144static void
1145irq_range (const char *cstr)
1146{
1147  int i, first, last, blink, lpcount, xreg;
1148  char *str, *dash, *comma;
1149
1150  i = strlen (cstr);
1151  str = (char *) alloca (i + 1);
1152  memcpy (str, cstr, i + 1);
1153  blink = -1;
1154  lpcount = -1;
1155
1156  dash = strchr (str, '-');
1157  if (!dash)
1158    {
1159      warning (OPT_mirq_ctrl_saved_, "missing dash");
1160      return;
1161    }
1162  *dash = '\0';
1163
1164  comma = strchr (dash + 1, ',');
1165  if (comma)
1166    *comma = '\0';
1167
1168  first = decode_reg_name (str);
1169  if (first != 0)
1170    {
1171      warning (OPT_mirq_ctrl_saved_, "first register must be R0");
1172      return;
1173    }
1174
1175  /* At this moment we do not have the register names initialized
1176     accordingly.  */
1177  if (!strcmp (dash + 1, "ilink"))
1178    last = 29;
1179  else
1180    last = decode_reg_name (dash + 1);
1181
1182  if (last < 0)
1183    {
1184      warning (OPT_mirq_ctrl_saved_, "unknown register name: %s", dash + 1);
1185      return;
1186    }
1187
1188  if (!(last & 0x01))
1189    {
1190      warning (OPT_mirq_ctrl_saved_,
1191	       "last register name %s must be an odd register", dash + 1);
1192      return;
1193    }
1194
1195  *dash = '-';
1196
1197  if (first > last)
1198    {
1199      warning (OPT_mirq_ctrl_saved_,
1200	       "%s-%s is an empty range", str, dash + 1);
1201      return;
1202    }
1203
1204  while (comma)
1205    {
1206      *comma = ',';
1207      str = comma + 1;
1208
1209      comma = strchr (str, ',');
1210      if (comma)
1211	*comma = '\0';
1212
1213      xreg = decode_reg_name (str);
1214      switch (xreg)
1215	{
1216	case 31:
1217	  blink = 31;
1218	  break;
1219
1220	case 60:
1221	  lpcount = 60;
1222	  break;
1223
1224	default:
1225	  warning (OPT_mirq_ctrl_saved_,
1226		   "unknown register name: %s", str);
1227	  return;
1228	}
1229    }
1230
1231  irq_ctrl_saved.irq_save_last_reg = last;
1232  irq_ctrl_saved.irq_save_blink    = (blink == 31) || (last == 31);
1233  irq_ctrl_saved.irq_save_lpcount  = (lpcount == 60);
1234}
1235
1236/* Parse -mrgf-banked-regs=NUM option string.  Valid values for NUM are 4,
1237   8, 16, or 32.  */
1238
1239static void
1240parse_mrgf_banked_regs_option (const char *arg)
1241{
1242  long int val;
1243  char *end_ptr;
1244
1245  errno = 0;
1246  val = strtol (arg, &end_ptr, 10);
1247  if (errno != 0 || *arg == '\0' || *end_ptr != '\0'
1248      || (val != 0 && val != 4 && val != 8 && val != 16 && val != 32))
1249    {
1250      error ("invalid number in %<-mrgf-banked-regs=%s%> "
1251	     "valid values are 0, 4, 8, 16, or 32", arg);
1252      return;
1253    }
1254  rgf_banked_register_count = (int) val;
1255}
1256
1257/* Check ARC options, generate derived target attributes.  */
1258
1259static void
1260arc_override_options (void)
1261{
1262  unsigned int i;
1263  cl_deferred_option *opt;
1264  vec<cl_deferred_option> *vopt
1265    = (vec<cl_deferred_option> *) arc_deferred_options;
1266
1267  if (arc_cpu == PROCESSOR_NONE)
1268    arc_cpu = TARGET_CPU_DEFAULT;
1269
1270  /* Set the default cpu options.  */
1271  arc_selected_cpu = &arc_cpu_types[(int) arc_cpu];
1272
1273  /* Set the architectures.  */
1274  switch (arc_selected_cpu->arch_info->arch_id)
1275    {
1276    case BASE_ARCH_em:
1277      arc_cpu_string = "EM";
1278      break;
1279    case BASE_ARCH_hs:
1280      arc_cpu_string = "HS";
1281      break;
1282    case BASE_ARCH_700:
1283      if (arc_selected_cpu->processor == PROCESSOR_nps400)
1284	arc_cpu_string = "NPS400";
1285      else
1286	arc_cpu_string = "ARC700";
1287      break;
1288    case BASE_ARCH_6xx:
1289      arc_cpu_string = "ARC600";
1290      break;
1291    default:
1292      gcc_unreachable ();
1293    }
1294
1295  irq_ctrl_saved.irq_save_last_reg = -1;
1296  irq_ctrl_saved.irq_save_blink    = false;
1297  irq_ctrl_saved.irq_save_lpcount  = false;
1298
1299  rgf_banked_register_count = 0;
1300
1301  /* Handle the deferred options.  */
1302  if (vopt)
1303    FOR_EACH_VEC_ELT (*vopt, i, opt)
1304      {
1305	switch (opt->opt_index)
1306	  {
1307	  case OPT_mirq_ctrl_saved_:
1308	    if (TARGET_V2)
1309	      irq_range (opt->arg);
1310	    else
1311	      warning (OPT_mirq_ctrl_saved_,
1312		       "option %<-mirq-ctrl-saved%> valid only "
1313		       "for ARC v2 processors");
1314	    break;
1315
1316	  case OPT_mrgf_banked_regs_:
1317	    if (TARGET_V2)
1318	      parse_mrgf_banked_regs_option (opt->arg);
1319	    else
1320	      warning (OPT_mrgf_banked_regs_,
1321		       "option %<-mrgf-banked-regs%> valid only for "
1322		       "ARC v2 processors");
1323	    break;
1324
1325	  default:
1326	    gcc_unreachable();
1327	  }
1328      }
1329
1330  CLEAR_HARD_REG_SET (overrideregs);
1331  if (common_deferred_options)
1332    {
1333      vec<cl_deferred_option> v =
1334	*((vec<cl_deferred_option> *) common_deferred_options);
1335      int reg, nregs, j;
1336
1337      FOR_EACH_VEC_ELT (v, i, opt)
1338	{
1339	  switch (opt->opt_index)
1340	    {
1341	    case OPT_ffixed_:
1342	    case OPT_fcall_used_:
1343	    case OPT_fcall_saved_:
1344	      if ((reg = decode_reg_name_and_count (opt->arg, &nregs)) >= 0)
1345		for (j = reg;  j < reg + nregs; j++)
1346		  SET_HARD_REG_BIT (overrideregs, j);
1347	      break;
1348	    default:
1349	      break;
1350	    }
1351	}
1352    }
1353
1354  /* Check options against architecture options.  Throw an error if
1355     option is not allowed.  Extra, check options against default
1356     architecture/cpu flags and throw an warning if we find a
1357     mismatch.  */
1358  /* TRANSLATORS: the DOC/DOC0/DOC1 are strings which shouldn't be
1359     translated.  They are like keywords which one can relate with the
1360     architectural choices taken for an ARC CPU implementation.  */
1361#define ARC_OPTX(NAME, CODE, VAR, VAL, DOC0, DOC1)		\
1362  do {								\
1363    if ((!(arc_selected_cpu->arch_info->flags & CODE))		\
1364	&& (VAR == VAL))					\
1365      error ("option %<%s=%s%> is not available for %qs CPU",	\
1366	     DOC0, DOC1, arc_selected_cpu->name);		\
1367    if ((arc_selected_cpu->arch_info->dflags & CODE)		\
1368	&& (VAR != DEFAULT_##VAR)				\
1369	&& (VAR != VAL))					\
1370      warning (0, "option %qs is ignored, the default value %qs"	\
1371	       " is considered for %qs CPU", DOC0, DOC1,		\
1372	       arc_selected_cpu->name);				\
1373 } while (0);
1374#define ARC_OPT(NAME, CODE, MASK, DOC)				\
1375  do {								\
1376    if ((!(arc_selected_cpu->arch_info->flags & CODE))		\
1377	&& (target_flags & MASK))				\
1378      error ("option %qs is not available for %qs CPU",		\
1379	     DOC, arc_selected_cpu->name);			\
1380    if ((arc_selected_cpu->arch_info->dflags & CODE)		\
1381	&& (target_flags_explicit & MASK)			\
1382	&& (!(target_flags & MASK)))				\
1383      warning (0, "unset option %qs is ignored, it is always"	\
1384	       " enabled for %qs CPU", DOC,			\
1385	       arc_selected_cpu->name);				\
1386  } while (0);
1387
1388#include "arc-options.def"
1389
1390#undef ARC_OPTX
1391#undef ARC_OPT
1392
1393  /* Set cpu flags accordingly to architecture/selected cpu.  The cpu
1394     specific flags are set in arc-common.c.  The architecture forces
1395     the default hardware configurations in, regardless what command
1396     line options are saying.  The CPU optional hw options can be
1397     turned on or off.  */
1398#define ARC_OPT(NAME, CODE, MASK, DOC)			\
1399  do {							\
1400    if ((arc_selected_cpu->flags & CODE)		\
1401	&& ((target_flags_explicit & MASK) == 0))	\
1402      target_flags |= MASK;				\
1403    if (arc_selected_cpu->arch_info->dflags & CODE)	\
1404      target_flags |= MASK;				\
1405  } while (0);
1406#define ARC_OPTX(NAME, CODE, VAR, VAL, DOC0, DOC1)	\
1407  do {							\
1408    if ((arc_selected_cpu->flags & CODE)		\
1409	&& (VAR == DEFAULT_##VAR))			\
1410      VAR = VAL;					\
1411    if (arc_selected_cpu->arch_info->dflags & CODE)	\
1412      VAR = VAL;					\
1413  } while (0);
1414
1415#include "arc-options.def"
1416
1417#undef ARC_OPTX
1418#undef ARC_OPT
1419
1420  /* Set extras.  */
1421  switch (arc_selected_cpu->extra)
1422    {
1423    case HAS_LPCOUNT_16:
1424      arc_lpcwidth = 16;
1425      break;
1426    default:
1427      break;
1428    }
1429
1430  /* Set Tune option.  */
1431  if (arc_tune == ARC_TUNE_NONE)
1432    arc_tune = (enum arc_tune_attr) arc_selected_cpu->tune;
1433
1434  if (arc_size_opt_level == 3)
1435    optimize_size = 1;
1436
1437  if (TARGET_V2 && optimize_size && (ATTRIBUTE_PCS == 2))
1438    TARGET_CODE_DENSITY_FRAME = 1;
1439
1440  if (flag_pic)
1441    target_flags |= MASK_NO_SDATA_SET;
1442
1443  if (flag_no_common == 255)
1444    flag_no_common = !TARGET_NO_SDATA_SET;
1445
1446  /* Check for small data option */
1447  if (!global_options_set.x_g_switch_value && !TARGET_NO_SDATA_SET)
1448    g_switch_value = TARGET_LL64 ? 8 : 4;
1449
1450  /* A7 has an issue with delay slots.  */
1451  if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
1452    flag_delayed_branch = 0;
1453
1454  /* Millicode thunks doesn't work with long calls.  */
1455  if (TARGET_LONG_CALLS_SET)
1456    target_flags &= ~MASK_MILLICODE_THUNK_SET;
1457
1458  /* Set unaligned to all HS cpus.  */
1459  if (!global_options_set.x_unaligned_access && TARGET_HS)
1460    unaligned_access = 1;
1461
1462  /* These need to be done at start up.  It's convenient to do them here.  */
1463  arc_init ();
1464}
1465
1466/* The condition codes of the ARC, and the inverse function.  */
1467/* For short branches, the "c" / "nc" names are not defined in the ARC
1468   Programmers manual, so we have to use "lo" / "hs"" instead.  */
1469static const char *arc_condition_codes[] =
1470{
1471  "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv",
1472  "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
1473};
1474
1475enum arc_cc_code_index
1476{
1477  ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N,
1478  ARC_CC_C,  ARC_CC_NC, ARC_CC_V, ARC_CC_NV,
1479  ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ,
1480  ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC
1481};
1482
1483#define ARC_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
1484
1485/* Returns the index of the ARC condition code string in
1486   `arc_condition_codes'.  COMPARISON should be an rtx like
1487   `(eq (...) (...))'.  */
1488
1489static int
1490get_arc_condition_code (rtx comparison)
1491{
1492  switch (GET_MODE (XEXP (comparison, 0)))
1493    {
1494    case E_CCmode:
1495    case E_SImode: /* For BRcc.  */
1496      switch (GET_CODE (comparison))
1497	{
1498	case EQ : return ARC_CC_EQ;
1499	case NE : return ARC_CC_NE;
1500	case GT : return ARC_CC_GT;
1501	case LE : return ARC_CC_LE;
1502	case GE : return ARC_CC_GE;
1503	case LT : return ARC_CC_LT;
1504	case GTU : return ARC_CC_HI;
1505	case LEU : return ARC_CC_LS;
1506	case LTU : return ARC_CC_LO;
1507	case GEU : return ARC_CC_HS;
1508	default : gcc_unreachable ();
1509	}
1510    case E_CC_ZNmode:
1511      switch (GET_CODE (comparison))
1512	{
1513	case EQ : return ARC_CC_EQ;
1514	case NE : return ARC_CC_NE;
1515	case GE: return ARC_CC_P;
1516	case LT: return ARC_CC_N;
1517	case GT : return ARC_CC_PNZ;
1518	default : gcc_unreachable ();
1519	}
1520    case E_CC_Zmode:
1521      switch (GET_CODE (comparison))
1522	{
1523	case EQ : return ARC_CC_EQ;
1524	case NE : return ARC_CC_NE;
1525	default : gcc_unreachable ();
1526	}
1527    case E_CC_Cmode:
1528      switch (GET_CODE (comparison))
1529	{
1530	case LTU : return ARC_CC_C;
1531	case GEU : return ARC_CC_NC;
1532	default : gcc_unreachable ();
1533	}
1534    case E_CC_FP_GTmode:
1535      if (TARGET_ARGONAUT_SET && TARGET_SPFP)
1536	switch (GET_CODE (comparison))
1537	  {
1538	  case GT  : return ARC_CC_N;
1539	  case UNLE: return ARC_CC_P;
1540	  default : gcc_unreachable ();
1541	}
1542      else
1543	switch (GET_CODE (comparison))
1544	  {
1545	  case GT   : return ARC_CC_HI;
1546	  case UNLE : return ARC_CC_LS;
1547	  default : gcc_unreachable ();
1548	}
1549    case E_CC_FP_GEmode:
1550      /* Same for FPX and non-FPX.  */
1551      switch (GET_CODE (comparison))
1552	{
1553	case GE   : return ARC_CC_HS;
1554	case UNLT : return ARC_CC_LO;
1555	default : gcc_unreachable ();
1556	}
1557    case E_CC_FP_UNEQmode:
1558      switch (GET_CODE (comparison))
1559	{
1560	case UNEQ : return ARC_CC_EQ;
1561	case LTGT : return ARC_CC_NE;
1562	default : gcc_unreachable ();
1563	}
1564    case E_CC_FP_ORDmode:
1565      switch (GET_CODE (comparison))
1566	{
1567	case UNORDERED : return ARC_CC_C;
1568	case ORDERED   : return ARC_CC_NC;
1569	default : gcc_unreachable ();
1570	}
1571    case E_CC_FPXmode:
1572      switch (GET_CODE (comparison))
1573	{
1574	case EQ        : return ARC_CC_EQ;
1575	case NE        : return ARC_CC_NE;
1576	case UNORDERED : return ARC_CC_C;
1577	case ORDERED   : return ARC_CC_NC;
1578	case LTGT      : return ARC_CC_HI;
1579	case UNEQ      : return ARC_CC_LS;
1580	default : gcc_unreachable ();
1581	}
1582    case E_CC_FPUmode:
1583    case E_CC_FPUEmode:
1584      switch (GET_CODE (comparison))
1585	{
1586	case EQ	       : return ARC_CC_EQ;
1587	case NE	       : return ARC_CC_NE;
1588	case GT	       : return ARC_CC_GT;
1589	case GE	       : return ARC_CC_GE;
1590	case LT	       : return ARC_CC_C;
1591	case LE	       : return ARC_CC_LS;
1592	case UNORDERED : return ARC_CC_V;
1593	case ORDERED   : return ARC_CC_NV;
1594	case UNGT      : return ARC_CC_HI;
1595	case UNGE      : return ARC_CC_HS;
1596	case UNLT      : return ARC_CC_LT;
1597	case UNLE      : return ARC_CC_LE;
1598	  /* UNEQ and LTGT do not have representation.  */
1599	case LTGT      : /* Fall through.  */
1600	case UNEQ      : /* Fall through.  */
1601	default : gcc_unreachable ();
1602	}
1603    case E_CC_FPU_UNEQmode:
1604      switch (GET_CODE (comparison))
1605	{
1606	case LTGT : return ARC_CC_NE;
1607	case UNEQ : return ARC_CC_EQ;
1608	default : gcc_unreachable ();
1609	}
1610    default : gcc_unreachable ();
1611    }
1612  /*NOTREACHED*/
1613  return (42);
1614}
1615
1616/* Return true if COMPARISON has a short form that can accomodate OFFSET.  */
1617
1618bool
1619arc_short_comparison_p (rtx comparison, int offset)
1620{
1621  gcc_assert (ARC_CC_NC == ARC_CC_HS);
1622  gcc_assert (ARC_CC_C == ARC_CC_LO);
1623  switch (get_arc_condition_code (comparison))
1624    {
1625    case ARC_CC_EQ: case ARC_CC_NE:
1626      return offset >= -512 && offset <= 506;
1627    case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT:
1628    case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS:
1629      return offset >= -64 && offset <= 58;
1630    default:
1631      return false;
1632    }
1633}
1634
1635/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1636   return the mode to be used for the comparison.  */
1637
1638machine_mode
1639arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
1640{
1641  machine_mode mode = GET_MODE (x);
1642  rtx x1;
1643
1644  /* For an operation that sets the condition codes as a side-effect, the
1645     C and V flags is not set as for cmp, so we can only use comparisons where
1646     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
1647     instead.)  */
1648  /* ??? We could use "pnz" for greater than zero, however, we could then
1649     get into trouble because the comparison could not be reversed.  */
1650  if (GET_MODE_CLASS (mode) == MODE_INT
1651      && y == const0_rtx
1652      && (op == EQ || op == NE
1653	  || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x)) <= 4)))
1654    return CC_ZNmode;
1655
1656  /* add.f for if (a+b) */
1657  if (mode == SImode
1658      && GET_CODE (y) == NEG
1659      && (op == EQ || op == NE))
1660    return CC_ZNmode;
1661
1662  /* Check if this is a test suitable for bxor.f .  */
1663  if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1664      && ((INTVAL (y) - 1) & INTVAL (y)) == 0
1665      && INTVAL (y))
1666    return CC_Zmode;
1667
1668  /* Check if this is a test suitable for add / bmsk.f .  */
1669  if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1670      && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1)))
1671      && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0
1672      && (~INTVAL (x1) | INTVAL (y)) < 0
1673      && (~INTVAL (x1) | INTVAL (y)) > -0x800)
1674    return CC_Zmode;
1675
1676  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
1677      && GET_CODE (x) == PLUS
1678      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
1679    return CC_Cmode;
1680
1681  if (TARGET_ARGONAUT_SET
1682      && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP)))
1683    switch (op)
1684      {
1685      case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1686	return CC_FPXmode;
1687      case LT: case UNGE: case GT: case UNLE:
1688	return CC_FP_GTmode;
1689      case LE: case UNGT: case GE: case UNLT:
1690	return CC_FP_GEmode;
1691      default: gcc_unreachable ();
1692      }
1693  else if (TARGET_HARD_FLOAT
1694	   && ((mode == SFmode && TARGET_FP_SP_BASE)
1695	       || (mode == DFmode && TARGET_FP_DP_BASE)))
1696    switch (op)
1697      {
1698      case EQ:
1699      case NE:
1700      case UNORDERED:
1701      case ORDERED:
1702      case UNLT:
1703      case UNLE:
1704      case UNGT:
1705      case UNGE:
1706	return CC_FPUmode;
1707
1708      case LT:
1709      case LE:
1710      case GT:
1711      case GE:
1712	return CC_FPUEmode;
1713
1714      case LTGT:
1715      case UNEQ:
1716	return CC_FPU_UNEQmode;
1717
1718      default:
1719	gcc_unreachable ();
1720      }
1721  else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE)
1722    {
1723      switch (op)
1724	{
1725	case EQ: case NE: return CC_Zmode;
1726	case LT: case UNGE:
1727	case GT: case UNLE: return CC_FP_GTmode;
1728	case LE: case UNGT:
1729	case GE: case UNLT: return CC_FP_GEmode;
1730	case UNEQ: case LTGT: return CC_FP_UNEQmode;
1731	case ORDERED: case UNORDERED: return CC_FP_ORDmode;
1732	default: gcc_unreachable ();
1733	}
1734    }
1735  return CCmode;
1736}
1737
1738/* Vectors to keep interesting information about registers where it can easily
1739   be got.  We use to use the actual mode value as the bit number, but there
1740   is (or may be) more than 32 modes now.  Instead we use two tables: one
1741   indexed by hard register number, and one indexed by mode.  */
1742
1743/* The purpose of arc_mode_class is to shrink the range of modes so that
1744   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
1745   mapped into one arc_mode_class mode.  */
1746
1747enum arc_mode_class {
1748  C_MODE,
1749  S_MODE, D_MODE, T_MODE, O_MODE,
1750  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
1751  V_MODE
1752};
1753
1754/* Modes for condition codes.  */
1755#define C_MODES (1 << (int) C_MODE)
1756
1757/* Modes for single-word and smaller quantities.  */
1758#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
1759
1760/* Modes for double-word and smaller quantities.  */
1761#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
1762
1763/* Mode for 8-byte DF values only.  */
1764#define DF_MODES (1 << DF_MODE)
1765
1766/* Modes for quad-word and smaller quantities.  */
1767#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
1768
1769/* Modes for 128-bit vectors.  */
1770#define V_MODES (1 << (int) V_MODE)
1771
1772/* Value is 1 if register/mode pair is acceptable on arc.  */
1773
1774static unsigned int arc_hard_regno_modes[] = {
1775  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1776  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1777  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES,
1778  D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1779
1780  /* ??? Leave these as S_MODES for now.  */
1781  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1782  DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES,
1783  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1784  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES,
1785
1786  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1787  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1788  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1789  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1790
1791  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1792  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1793  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1794  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1795
1796  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1797  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1798  S_MODES, S_MODES
1799};
1800
1801static unsigned int arc_mode_class [NUM_MACHINE_MODES];
1802
1803enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
1804
1805enum reg_class
1806arc_preferred_reload_class (rtx, enum reg_class cl)
1807{
1808  return cl;
1809}
1810
1811/* Initialize the arc_mode_class array.  */
1812
1813static void
1814arc_init_reg_tables (void)
1815{
1816  int i;
1817
1818  for (i = 0; i < NUM_MACHINE_MODES; i++)
1819    {
1820      machine_mode m = (machine_mode) i;
1821
1822      switch (GET_MODE_CLASS (m))
1823	{
1824	case MODE_INT:
1825	case MODE_PARTIAL_INT:
1826	case MODE_COMPLEX_INT:
1827	  if (GET_MODE_SIZE (m) <= 4)
1828	    arc_mode_class[i] = 1 << (int) S_MODE;
1829	  else if (GET_MODE_SIZE (m) == 8)
1830	    arc_mode_class[i] = 1 << (int) D_MODE;
1831	  else if (GET_MODE_SIZE (m) == 16)
1832	    arc_mode_class[i] = 1 << (int) T_MODE;
1833	  else if (GET_MODE_SIZE (m) == 32)
1834	    arc_mode_class[i] = 1 << (int) O_MODE;
1835	  else
1836	    arc_mode_class[i] = 0;
1837	  break;
1838	case MODE_FLOAT:
1839	case MODE_COMPLEX_FLOAT:
1840	  if (GET_MODE_SIZE (m) <= 4)
1841	    arc_mode_class[i] = 1 << (int) SF_MODE;
1842	  else if (GET_MODE_SIZE (m) == 8)
1843	    arc_mode_class[i] = 1 << (int) DF_MODE;
1844	  else if (GET_MODE_SIZE (m) == 16)
1845	    arc_mode_class[i] = 1 << (int) TF_MODE;
1846	  else if (GET_MODE_SIZE (m) == 32)
1847	    arc_mode_class[i] = 1 << (int) OF_MODE;
1848	  else
1849	    arc_mode_class[i] = 0;
1850	  break;
1851	case MODE_VECTOR_INT:
1852	  if (GET_MODE_SIZE (m) == 4)
1853	    arc_mode_class[i] = (1 << (int) S_MODE);
1854	  else if (GET_MODE_SIZE (m) == 8)
1855	    arc_mode_class[i] = (1 << (int) D_MODE);
1856	  else
1857	    arc_mode_class[i] = (1 << (int) V_MODE);
1858	  break;
1859	case MODE_CC:
1860	default:
1861	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
1862	     we must explicitly check for them here.  */
1863	  if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode
1864	      || i == (int) CC_Cmode
1865	      || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode
1866	      || i == CC_FPUmode || i == CC_FPUEmode || i == CC_FPU_UNEQmode)
1867	    arc_mode_class[i] = 1 << (int) C_MODE;
1868	  else
1869	    arc_mode_class[i] = 0;
1870	  break;
1871	}
1872    }
1873}
1874
1875/* Core registers 56..59 are used for multiply extension options.
1876   The dsp option uses r56 and r57, these are then named acc1 and acc2.
1877   acc1 is the highpart, and acc2 the lowpart, so which register gets which
1878   number depends on endianness.
1879   The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi.
1880   Because mlo / mhi form a 64 bit value, we use different gcc internal
1881   register numbers to make them form a register pair as the gcc internals
1882   know it.  mmid gets number 57, if still available, and mlo / mhi get
1883   number 58 and 59, depending on endianness.  We use DBX_REGISTER_NUMBER
1884   to map this back.  */
1885  char rname56[5] = "r56";
1886  char rname57[5] = "r57";
1887  char rname58[5] = "r58";
1888  char rname59[5] = "r59";
1889  char rname29[7] = "ilink1";
1890  char rname30[7] = "ilink2";
1891
1892static void
1893arc_conditional_register_usage (void)
1894{
1895  int regno;
1896  int i;
1897  int fix_start = 60, fix_end = 55;
1898
1899  if (TARGET_V2)
1900    {
1901      /* For ARCv2 the core register set is changed.  */
1902      strcpy (rname29, "ilink");
1903      strcpy (rname30, "r30");
1904
1905      if (!TEST_HARD_REG_BIT (overrideregs, R30_REG))
1906	{
1907	  /* No user interference.  Set the r30 to be used by the
1908	     compiler.  */
1909	  call_used_regs[R30_REG] = 1;
1910	  fixed_regs[R30_REG] = 0;
1911
1912	  arc_regno_reg_class[R30_REG] = GENERAL_REGS;
1913	}
1914   }
1915
1916  if (TARGET_MUL64_SET)
1917    {
1918      fix_start = R57_REG;
1919      fix_end = R59_REG;
1920
1921      /* We don't provide a name for mmed.  In rtl / assembly resource lists,
1922	 you are supposed to refer to it as mlo & mhi, e.g
1923	 (zero_extract:SI (reg:DI 58) (const_int 32) (16)) .
1924	 In an actual asm instruction, you are of course use mmed.
1925	 The point of avoiding having a separate register for mmed is that
1926	 this way, we don't have to carry clobbers of that reg around in every
1927	 isntruction that modifies mlo and/or mhi.  */
1928      strcpy (rname57, "");
1929      strcpy (rname58, "mlo");
1930      strcpy (rname59, "mhi");
1931    }
1932
1933  /* The nature of arc_tp_regno is actually something more like a global
1934     register, however globalize_reg requires a declaration.
1935     We use EPILOGUE_USES to compensate so that sets from
1936     __builtin_set_frame_pointer are not deleted.  */
1937  if (arc_tp_regno != -1)
1938    fixed_regs[arc_tp_regno] = call_used_regs[arc_tp_regno] = 1;
1939
1940  if (TARGET_MULMAC_32BY16_SET)
1941    {
1942      fix_start = MUL32x16_REG;
1943      fix_end = fix_end > R57_REG ? fix_end : R57_REG;
1944      strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2");
1945      strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1");
1946    }
1947  for (regno = fix_start; regno <= fix_end; regno++)
1948    {
1949      if (!fixed_regs[regno])
1950	warning (0, "multiply option implies r%d is fixed", regno);
1951      fixed_regs [regno] = call_used_regs[regno] = 1;
1952    }
1953
1954  /* Reduced configuration: don't use r4-r9, r16-r25.  */
1955  if (TARGET_RF16)
1956    {
1957      for (i = R4_REG; i <= R9_REG; i++)
1958	fixed_regs[i] = call_used_regs[i] = 1;
1959      for (i = R16_REG; i <= R25_REG; i++)
1960	fixed_regs[i] = call_used_regs[i] = 1;
1961    }
1962
1963  /* ARCHS has 64-bit data-path which makes use of the even-odd paired
1964     registers.  */
1965  if (TARGET_HS)
1966    for (regno = R1_REG; regno < R32_REG; regno +=2)
1967      arc_hard_regno_modes[regno] = S_MODES;
1968
1969  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1970    if (i < ILINK1_REG)
1971      {
1972	if ((i <= R3_REG) || ((i >= R12_REG) && (i <= R15_REG)))
1973	  arc_regno_reg_class[i] = ARCOMPACT16_REGS;
1974	else
1975	  arc_regno_reg_class[i] = GENERAL_REGS;
1976      }
1977    else if (i < LP_COUNT)
1978      arc_regno_reg_class[i] = GENERAL_REGS;
1979    else
1980      arc_regno_reg_class[i] = NO_REGS;
1981
1982  /* Handle Special Registers.  */
1983  arc_regno_reg_class[CC_REG] = NO_REGS;      /* CC_REG: must be NO_REGS.  */
1984  arc_regno_reg_class[FRAME_POINTER_REGNUM] = GENERAL_REGS;
1985  arc_regno_reg_class[ARG_POINTER_REGNUM] = GENERAL_REGS;
1986
1987  if (TARGET_DPFP)
1988    for (i = R40_REG; i < R44_REG; ++i)
1989      {
1990	arc_regno_reg_class[i] = DOUBLE_REGS;
1991	if (!TARGET_ARGONAUT_SET)
1992	  CLEAR_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i);
1993      }
1994  else
1995    {
1996      /* Disable all DOUBLE_REGISTER settings, if not generating DPFP
1997	 code.  */
1998      arc_regno_reg_class[R40_REG] = ALL_REGS;
1999      arc_regno_reg_class[R41_REG] = ALL_REGS;
2000      arc_regno_reg_class[R42_REG] = ALL_REGS;
2001      arc_regno_reg_class[R43_REG] = ALL_REGS;
2002
2003      fixed_regs[R40_REG] = 1;
2004      fixed_regs[R41_REG] = 1;
2005      fixed_regs[R42_REG] = 1;
2006      fixed_regs[R43_REG] = 1;
2007
2008      arc_hard_regno_modes[R40_REG] = 0;
2009      arc_hard_regno_modes[R42_REG] = 0;
2010    }
2011
2012  if (TARGET_SIMD_SET)
2013    {
2014      gcc_assert (ARC_FIRST_SIMD_VR_REG == 64);
2015      gcc_assert (ARC_LAST_SIMD_VR_REG  == 127);
2016
2017      for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
2018	arc_regno_reg_class [i] =  SIMD_VR_REGS;
2019
2020      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128);
2021      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128);
2022      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136);
2023      gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG  == 143);
2024
2025      for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
2026	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
2027	arc_regno_reg_class [i] =  SIMD_DMA_CONFIG_REGS;
2028    }
2029
2030  /* pc : r63 */
2031  arc_regno_reg_class[PCL_REG] = NO_REGS;
2032
2033  /*ARCV2 Accumulator.  */
2034  if ((TARGET_V2
2035       && (TARGET_FP_DP_FUSED || TARGET_FP_SP_FUSED))
2036      || TARGET_PLUS_DMPY)
2037  {
2038    arc_regno_reg_class[ACCL_REGNO] = GENERAL_REGS;
2039    arc_regno_reg_class[ACCH_REGNO] = GENERAL_REGS;
2040
2041    /* Allow the compiler to freely use them.  */
2042    if (!TEST_HARD_REG_BIT (overrideregs, ACCL_REGNO))
2043      fixed_regs[ACCL_REGNO] = 0;
2044    if (!TEST_HARD_REG_BIT (overrideregs, ACCH_REGNO))
2045      fixed_regs[ACCH_REGNO] = 0;
2046
2047    if (!fixed_regs[ACCH_REGNO] && !fixed_regs[ACCL_REGNO])
2048      arc_hard_regno_modes[ACC_REG_FIRST] = D_MODES;
2049  }
2050}
2051
2052/* Implement TARGET_HARD_REGNO_NREGS.  */
2053
2054static unsigned int
2055arc_hard_regno_nregs (unsigned int regno, machine_mode mode)
2056{
2057  if (GET_MODE_SIZE (mode) == 16
2058      && regno >= ARC_FIRST_SIMD_VR_REG
2059      && regno <= ARC_LAST_SIMD_VR_REG)
2060    return 1;
2061
2062  return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
2063}
2064
2065/* Implement TARGET_HARD_REGNO_MODE_OK.  */
2066
2067static bool
2068arc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2069{
2070  return (arc_hard_regno_modes[regno] & arc_mode_class[mode]) != 0;
2071}
2072
2073/* Implement TARGET_MODES_TIEABLE_P.  Tie QI/HI/SI modes together.  */
2074
2075static bool
2076arc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2077{
2078  return (GET_MODE_CLASS (mode1) == MODE_INT
2079	  && GET_MODE_CLASS (mode2) == MODE_INT
2080	  && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD
2081	  && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD);
2082}
2083
2084/* Handle an "interrupt" attribute; arguments as in
2085   struct attribute_spec.handler.  */
2086
2087static tree
2088arc_handle_interrupt_attribute (tree *, tree name, tree args, int,
2089				bool *no_add_attrs)
2090{
2091  gcc_assert (args);
2092
2093  tree value = TREE_VALUE (args);
2094
2095  if (TREE_CODE (value) != STRING_CST)
2096    {
2097      warning (OPT_Wattributes,
2098	       "argument of %qE attribute is not a string constant",
2099	       name);
2100      *no_add_attrs = true;
2101    }
2102  else if (!TARGET_V2
2103	   && strcmp (TREE_STRING_POINTER (value), "ilink1")
2104	   && strcmp (TREE_STRING_POINTER (value), "ilink2"))
2105    {
2106      warning (OPT_Wattributes,
2107	       "argument of %qE attribute is not \"ilink1\" or \"ilink2\"",
2108	       name);
2109      *no_add_attrs = true;
2110    }
2111  else if (TARGET_V2
2112	   && strcmp (TREE_STRING_POINTER (value), "ilink")
2113	   && strcmp (TREE_STRING_POINTER (value), "firq"))
2114    {
2115      warning (OPT_Wattributes,
2116	       "argument of %qE attribute is not \"ilink\" or \"firq\"",
2117	       name);
2118      *no_add_attrs = true;
2119    }
2120
2121  return NULL_TREE;
2122}
2123
2124static tree
2125arc_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
2126			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
2127{
2128  if (TREE_CODE (*node) != FUNCTION_DECL)
2129    {
2130      warning (OPT_Wattributes, "%qE attribute only applies to functions",
2131	       name);
2132      *no_add_attrs = true;
2133    }
2134
2135  return NULL_TREE;
2136}
2137
2138/* Type of function DECL.
2139
2140   The result is cached.  To reset the cache at the end of a function,
2141   call with DECL = NULL_TREE.  */
2142
2143static unsigned int
2144arc_compute_function_type (struct function *fun)
2145{
2146  tree attr, decl = fun->decl;
2147  unsigned int fn_type = fun->machine->fn_type;
2148
2149  if (fn_type != ARC_FUNCTION_UNKNOWN)
2150    return fn_type;
2151
2152  /* Check if it is a naked function.  */
2153  if (lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) != NULL_TREE)
2154    fn_type |= ARC_FUNCTION_NAKED;
2155  else
2156    fn_type |= ARC_FUNCTION_NORMAL;
2157
2158  /* Now see if this is an interrupt handler.  */
2159  attr = lookup_attribute ("interrupt", DECL_ATTRIBUTES (decl));
2160  if (attr != NULL_TREE)
2161    {
2162      tree value, args = TREE_VALUE (attr);
2163
2164      gcc_assert (list_length (args) == 1);
2165      value = TREE_VALUE (args);
2166      gcc_assert (TREE_CODE (value) == STRING_CST);
2167
2168      if (!strcmp (TREE_STRING_POINTER (value), "ilink1")
2169	  || !strcmp (TREE_STRING_POINTER (value), "ilink"))
2170	fn_type |= ARC_FUNCTION_ILINK1;
2171      else if (!strcmp (TREE_STRING_POINTER (value), "ilink2"))
2172	fn_type |= ARC_FUNCTION_ILINK2;
2173      else if (!strcmp (TREE_STRING_POINTER (value), "firq"))
2174	fn_type |= ARC_FUNCTION_FIRQ;
2175      else
2176	gcc_unreachable ();
2177    }
2178
2179  return fun->machine->fn_type = fn_type;
2180}
2181
2182/* Implement `TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS' */
2183
2184static bool
2185arc_allocate_stack_slots_for_args (void)
2186{
2187  /* Naked functions should not allocate stack slots for arguments.  */
2188  unsigned int fn_type = arc_compute_function_type (cfun);
2189
2190  return !ARC_NAKED_P(fn_type);
2191}
2192
2193/* Implement `TARGET_WARN_FUNC_RETURN'.  */
2194
2195static bool
2196arc_warn_func_return (tree decl)
2197{
2198  struct function *func = DECL_STRUCT_FUNCTION (decl);
2199  unsigned int fn_type = arc_compute_function_type (func);
2200
2201  return !ARC_NAKED_P (fn_type);
2202}
2203
2204/* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible,
2205   and two if they are nearly compatible (which causes a warning to be
2206   generated).  */
2207
2208static int
2209arc_comp_type_attributes (const_tree type1,
2210			  const_tree type2)
2211{
2212  int l1, l2, m1, m2, s1, s2;
2213
2214  /* Check for mismatch of non-default calling convention.  */
2215  if (TREE_CODE (type1) != FUNCTION_TYPE)
2216    return 1;
2217
2218  /* Check for mismatched call attributes.  */
2219  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
2220  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
2221  m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL;
2222  m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL;
2223  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
2224  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
2225
2226  /* Only bother to check if an attribute is defined.  */
2227  if (l1 | l2 | m1 | m2 | s1 | s2)
2228    {
2229      /* If one type has an attribute, the other must have the same attribute.  */
2230      if ((l1 != l2) || (m1 != m2) || (s1 != s2))
2231	return 0;
2232
2233      /* Disallow mixed attributes.  */
2234      if (l1 + m1 + s1 > 1)
2235	return 0;
2236    }
2237
2238
2239  return 1;
2240}
2241
2242/* Misc. utilities.  */
2243
2244/* X and Y are two things to compare using CODE.  Emit the compare insn and
2245   return the rtx for the cc reg in the proper mode.  */
2246
2247rtx
2248gen_compare_reg (rtx comparison, machine_mode omode)
2249{
2250  enum rtx_code code = GET_CODE (comparison);
2251  rtx x = XEXP (comparison, 0);
2252  rtx y = XEXP (comparison, 1);
2253  rtx tmp, cc_reg;
2254  machine_mode mode, cmode;
2255
2256
2257  cmode = GET_MODE (x);
2258  if (cmode == VOIDmode)
2259    cmode = GET_MODE (y);
2260  gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode);
2261  if (cmode == SImode)
2262    {
2263      if (!register_operand (x, SImode))
2264	{
2265	  if (register_operand (y, SImode))
2266	    {
2267	      tmp = x;
2268	      x = y;
2269	      y = tmp;
2270	      code = swap_condition (code);
2271	    }
2272	  else
2273	    x = copy_to_mode_reg (SImode, x);
2274	}
2275      if (GET_CODE (y) == SYMBOL_REF && flag_pic)
2276	y = copy_to_mode_reg (SImode, y);
2277    }
2278  else
2279    {
2280      x = force_reg (cmode, x);
2281      y = force_reg (cmode, y);
2282    }
2283  mode = SELECT_CC_MODE (code, x, y);
2284
2285  cc_reg = gen_rtx_REG (mode, CC_REG);
2286
2287  /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and
2288     cmpdfpx_raw, is not a correct comparison for floats:
2289        http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
2290   */
2291  if (TARGET_ARGONAUT_SET
2292      && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP)))
2293    {
2294      switch (code)
2295	{
2296	case NE: case EQ: case LT: case UNGE: case LE: case UNGT:
2297	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
2298	  break;
2299	case GT: case UNLE: case GE: case UNLT:
2300	  code = swap_condition (code);
2301	  tmp = x;
2302	  x = y;
2303	  y = tmp;
2304	  break;
2305	default:
2306	  gcc_unreachable ();
2307	}
2308      if (cmode == SFmode)
2309      {
2310	emit_insn (gen_cmpsfpx_raw (x, y));
2311      }
2312      else /* DFmode */
2313      {
2314	/* Accepts Dx regs directly by insns.  */
2315	emit_insn (gen_cmpdfpx_raw (x, y));
2316      }
2317
2318      if (mode != CC_FPXmode)
2319	emit_insn (gen_rtx_SET (cc_reg,
2320				gen_rtx_COMPARE (mode,
2321						 gen_rtx_REG (CC_FPXmode, 61),
2322						 const0_rtx)));
2323    }
2324  else if (TARGET_FPX_QUARK && (cmode == SFmode))
2325    {
2326      switch (code)
2327	{
2328	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
2329	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
2330	  break;
2331	case LT: case UNGE: case LE: case UNGT:
2332	  code = swap_condition (code);
2333	  tmp = x;
2334	  x = y;
2335	  y = tmp;
2336	  break;
2337	default:
2338	  gcc_unreachable ();
2339	}
2340
2341      emit_insn (gen_cmp_quark (cc_reg,
2342				gen_rtx_COMPARE (mode, x, y)));
2343    }
2344  else if (TARGET_HARD_FLOAT
2345	   && ((cmode == SFmode && TARGET_FP_SP_BASE)
2346	       || (cmode == DFmode && TARGET_FP_DP_BASE)))
2347    emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2348  else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE)
2349    {
2350      rtx op0 = gen_rtx_REG (cmode, 0);
2351      rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD);
2352      bool swap = false;
2353
2354      switch (code)
2355	{
2356	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
2357	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
2358	  break;
2359	case LT: case UNGE: case LE: case UNGT:
2360	  code = swap_condition (code);
2361	  swap = true;
2362	  break;
2363	default:
2364	  gcc_unreachable ();
2365	}
2366      if (currently_expanding_to_rtl)
2367	{
2368	  if (swap)
2369	    {
2370	      tmp = x;
2371	      x = y;
2372	      y = tmp;
2373	    }
2374	  emit_move_insn (op0, x);
2375	  emit_move_insn (op1, y);
2376	}
2377      else
2378	{
2379	  gcc_assert (rtx_equal_p (op0, x));
2380	  gcc_assert (rtx_equal_p (op1, y));
2381	  if (swap)
2382	    {
2383	      op0 = y;
2384	      op1 = x;
2385	    }
2386	}
2387      emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1)));
2388    }
2389  else
2390    emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2391  return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx);
2392}
2393
2394/* Return true if VALUE, a const_double, will fit in a limm (4 byte number).
2395   We assume the value can be either signed or unsigned.  */
2396
2397bool
2398arc_double_limm_p (rtx value)
2399{
2400  HOST_WIDE_INT low, high;
2401
2402  gcc_assert (GET_CODE (value) == CONST_DOUBLE);
2403
2404  if (TARGET_DPFP)
2405    return true;
2406
2407  low = CONST_DOUBLE_LOW (value);
2408  high = CONST_DOUBLE_HIGH (value);
2409
2410  if (low & 0x80000000)
2411    {
2412      return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0)
2413	      || (((low & - (unsigned HOST_WIDE_INT) 0x80000000)
2414		   == - (unsigned HOST_WIDE_INT) 0x80000000)
2415		  && high == -1));
2416    }
2417  else
2418    {
2419      return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0;
2420    }
2421}
2422
2423/* Do any needed setup for a variadic function.  For the ARC, we must
2424   create a register parameter block, and then copy any anonymous arguments
2425   in registers to memory.
2426
2427   CUM has not been updated for the last named argument (which is given
2428   by ARG), and we rely on this fact.  */
2429
2430static void
2431arc_setup_incoming_varargs (cumulative_args_t args_so_far,
2432			    const function_arg_info &arg,
2433			    int *pretend_size, int no_rtl)
2434{
2435  int first_anon_arg;
2436  CUMULATIVE_ARGS next_cum;
2437
2438  /* We must treat `__builtin_va_alist' as an anonymous arg.  */
2439
2440  next_cum = *get_cumulative_args (args_so_far);
2441  arc_function_arg_advance (pack_cumulative_args (&next_cum), arg);
2442  first_anon_arg = next_cum;
2443
2444  if (FUNCTION_ARG_REGNO_P (first_anon_arg))
2445    {
2446      /* First anonymous (unnamed) argument is in a reg.  */
2447
2448      /* Note that first_reg_offset < MAX_ARC_PARM_REGS.  */
2449      int first_reg_offset = first_anon_arg;
2450
2451      if (!no_rtl)
2452	{
2453	  rtx regblock
2454	    = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx,
2455			   FIRST_PARM_OFFSET (0)));
2456	  move_block_from_reg (first_reg_offset, regblock,
2457			       MAX_ARC_PARM_REGS - first_reg_offset);
2458	}
2459
2460      *pretend_size
2461	= ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD);
2462    }
2463}
2464
2465/* Cost functions.  */
2466
2467/* Provide the costs of an addressing mode that contains ADDR.
2468   If ADDR is not a valid address, its cost is irrelevant.  */
2469
2470static int
2471arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed)
2472{
2473  switch (GET_CODE (addr))
2474    {
2475    case REG :
2476      return speed || satisfies_constraint_Rcq (addr) ? 0 : 1;
2477    case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
2478    case PRE_MODIFY: case POST_MODIFY:
2479      return !speed;
2480
2481    case LABEL_REF :
2482    case SYMBOL_REF :
2483    case CONST :
2484      if (TARGET_NPS_CMEM && cmem_address (addr, SImode))
2485	return 0;
2486      /* Most likely needs a LIMM.  */
2487      return COSTS_N_INSNS (1);
2488
2489    case PLUS :
2490      {
2491	register rtx plus0 = XEXP (addr, 0);
2492	register rtx plus1 = XEXP (addr, 1);
2493
2494	if (GET_CODE (plus0) != REG
2495	    && (GET_CODE (plus0) != MULT
2496		|| !CONST_INT_P (XEXP (plus0, 1))
2497		|| (INTVAL (XEXP (plus0, 1)) != 2
2498		    && INTVAL (XEXP (plus0, 1)) != 4)))
2499	  break;
2500
2501	switch (GET_CODE (plus1))
2502	  {
2503	  case CONST_INT :
2504	    return (!RTX_OK_FOR_OFFSET_P (SImode, plus1)
2505		    ? COSTS_N_INSNS (1)
2506		    : speed
2507		    ? 0
2508		    : (satisfies_constraint_Rcq (plus0)
2509		       && satisfies_constraint_O (plus1))
2510		    ? 0
2511		    : 1);
2512	  case REG:
2513	    return (speed < 1 ? 0
2514		    : (satisfies_constraint_Rcq (plus0)
2515		       && satisfies_constraint_Rcq (plus1))
2516		    ? 0 : 1);
2517	  case CONST :
2518	  case SYMBOL_REF :
2519	  case LABEL_REF :
2520	    return COSTS_N_INSNS (1);
2521	  default:
2522	    break;
2523	  }
2524	break;
2525      }
2526    default:
2527      break;
2528    }
2529
2530  return 4;
2531}
2532
2533/* Emit instruction X with the frame related bit set.  */
2534
2535static rtx
2536frame_insn (rtx x)
2537{
2538  x = emit_insn (x);
2539  RTX_FRAME_RELATED_P (x) = 1;
2540  return x;
2541}
2542
2543/* Emit a frame insn to move SRC to DST.  */
2544
2545static rtx
2546frame_move (rtx dst, rtx src)
2547{
2548  rtx tmp = gen_rtx_SET (dst, src);
2549  RTX_FRAME_RELATED_P (tmp) = 1;
2550  return frame_insn (tmp);
2551}
2552
2553/* Like frame_move, but add a REG_INC note for REG if ADDR contains an
2554   auto increment address, or is zero.  */
2555
2556static rtx
2557frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr)
2558{
2559  rtx insn = frame_move (dst, src);
2560
2561  if (!addr
2562      || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC
2563      || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
2564    add_reg_note (insn, REG_INC, reg);
2565  return insn;
2566}
2567
2568/* Emit a frame insn which adjusts a frame address register REG by OFFSET.  */
2569
2570static rtx
2571frame_add (rtx reg, HOST_WIDE_INT offset)
2572{
2573  gcc_assert ((offset & 0x3) == 0);
2574  if (!offset)
2575    return NULL_RTX;
2576  return frame_move (reg, plus_constant (Pmode, reg, offset));
2577}
2578
2579/* Emit a frame insn which adjusts stack pointer by OFFSET.  */
2580
2581static rtx
2582frame_stack_add (HOST_WIDE_INT offset)
2583{
2584  return frame_add (stack_pointer_rtx, offset);
2585}
2586
2587/* Helper function to wrap FRAME_POINTER_NEEDED.  We do this as
2588   FRAME_POINTER_NEEDED will not be true until the IRA (Integrated
2589   Register Allocator) pass, while we want to get the frame size
2590   correct earlier than the IRA pass.
2591
2592   When a function uses eh_return we must ensure that the fp register
2593   is saved and then restored so that the unwinder can restore the
2594   correct value for the frame we are going to jump to.
2595
2596   To do this we force all frames that call eh_return to require a
2597   frame pointer (see arc_frame_pointer_required), this
2598   will ensure that the previous frame pointer is stored on entry to
2599   the function, and will then be reloaded at function exit.
2600
2601   As the frame pointer is handled as a special case in our prologue
2602   and epilogue code it must not be saved and restored using the
2603   MUST_SAVE_REGISTER mechanism otherwise we run into issues where GCC
2604   believes that the function is not using a frame pointer and that
2605   the value in the fp register is the frame pointer, while the
2606   prologue and epilogue are busy saving and restoring the fp
2607   register.
2608
2609   During compilation of a function the frame size is evaluated
2610   multiple times, it is not until the reload pass is complete the
2611   frame size is considered fixed (it is at this point that space for
2612   all spills has been allocated).  However the frame_pointer_needed
2613   variable is not set true until the register allocation pass, as a
2614   result in the early stages the frame size does not include space
2615   for the frame pointer to be spilled.
2616
2617   The problem that this causes is that the rtl generated for
2618   EH_RETURN_HANDLER_RTX uses the details of the frame size to compute
2619   the offset from the frame pointer at which the return address
2620   lives.  However, in early passes GCC has not yet realised we need a
2621   frame pointer, and so has not included space for the frame pointer
2622   in the frame size, and so gets the offset of the return address
2623   wrong.  This should not be an issue as in later passes GCC has
2624   realised that the frame pointer needs to be spilled, and has
2625   increased the frame size.  However, the rtl for the
2626   EH_RETURN_HANDLER_RTX is not regenerated to use the newer, larger
2627   offset, and the wrong smaller offset is used.  */
2628
2629static bool
2630arc_frame_pointer_needed (void)
2631{
2632  return (frame_pointer_needed || crtl->calls_eh_return);
2633}
2634
2635/* Tell prologue and epilogue if register REGNO should be saved /
2636   restored.  The SPECIAL_P is true when the register may need special
2637   ld/st sequence.  The return address, and stack pointer are treated
2638   separately.  Don't consider them here.  */
2639
2640static bool
2641arc_must_save_register (int regno, struct function *func, bool special_p)
2642{
2643  unsigned int fn_type = arc_compute_function_type (func);
2644  bool irq_auto_save_p = ((irq_ctrl_saved.irq_save_last_reg >= regno)
2645			  && ARC_AUTO_IRQ_P (fn_type));
2646  bool firq_auto_save_p = ARC_FAST_INTERRUPT_P (fn_type);
2647
2648  switch (rgf_banked_register_count)
2649    {
2650    case 4:
2651      firq_auto_save_p &= (regno < 4);
2652      break;
2653    case 8:
2654      firq_auto_save_p &= ((regno < 4) || ((regno > 11) && (regno < 16)));
2655      break;
2656    case 16:
2657      firq_auto_save_p &= ((regno < 4) || ((regno > 9) && (regno < 16))
2658			   || ((regno > 25) && (regno < 29))
2659			   || ((regno > 29) && (regno < 32)));
2660      break;
2661    case 32:
2662      firq_auto_save_p &= (regno != 29) && (regno < 32);
2663      break;
2664    default:
2665      firq_auto_save_p = false;
2666      break;
2667    }
2668
2669  switch (regno)
2670    {
2671    case ILINK1_REG:
2672    case RETURN_ADDR_REGNUM:
2673    case STACK_POINTER_REGNUM:
2674      /* The stack pointer and the return address are handled
2675	 separately.  */
2676      return false;
2677
2678    case R30_REG:
2679      /* r30 is either used as ilink2 by ARCv1 or as a free register
2680	 by ARCv2.  */
2681      if (!TARGET_V2)
2682	return false;
2683      break;
2684
2685    case R40_REG:
2686    case R41_REG:
2687    case R42_REG:
2688    case R43_REG:
2689    case R44_REG:
2690      /* If those ones are used by the FPX machinery, we handle them
2691	 separately.  */
2692      if (TARGET_DPFP && !special_p)
2693	return false;
2694      /* FALLTHRU.  */
2695
2696    case R32_REG:
2697    case R33_REG:
2698    case R34_REG:
2699    case R35_REG:
2700    case R36_REG:
2701    case R37_REG:
2702    case R38_REG:
2703    case R39_REG:
2704    case R45_REG:
2705    case R46_REG:
2706    case R47_REG:
2707    case R48_REG:
2708    case R49_REG:
2709    case R50_REG:
2710    case R51_REG:
2711    case R52_REG:
2712    case R53_REG:
2713    case R54_REG:
2714    case R55_REG:
2715    case R56_REG:
2716    case R57_REG:
2717      /* The Extension Registers.  */
2718      if (ARC_INTERRUPT_P (fn_type)
2719	  && (df_regs_ever_live_p (RETURN_ADDR_REGNUM)
2720	      || df_regs_ever_live_p (regno))
2721	  /* Not all extension registers are available, choose the
2722	     real ones.  */
2723	  && !fixed_regs[regno])
2724	return true;
2725      return false;
2726
2727    case R58_REG:
2728    case R59_REG:
2729      /* ARC600 specifies those ones as mlo/mhi registers, otherwise
2730	 just handle them like any other extension register.  */
2731      if (ARC_INTERRUPT_P (fn_type)
2732	  && (df_regs_ever_live_p (RETURN_ADDR_REGNUM)
2733	      || df_regs_ever_live_p (regno))
2734	  /* Not all extension registers are available, choose the
2735	     real ones.  */
2736	  && ((!fixed_regs[regno] && !special_p)
2737	      || (TARGET_MUL64_SET && special_p)))
2738	return true;
2739      return false;
2740
2741    case 61:
2742    case 62:
2743    case 63:
2744      /* Fixed/control register, nothing to do.  LP_COUNT is
2745	 different.  */
2746      return false;
2747
2748    case HARD_FRAME_POINTER_REGNUM:
2749      /* If we need FP reg as a frame pointer then don't save it as a
2750	 regular reg.  */
2751      if (arc_frame_pointer_needed ())
2752	return false;
2753      break;
2754
2755    default:
2756      break;
2757    }
2758
2759  if (((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
2760       /* In an interrupt save everything.  */
2761       || (ARC_INTERRUPT_P (fn_type)
2762	   && (df_regs_ever_live_p (RETURN_ADDR_REGNUM)
2763	       || df_regs_ever_live_p (regno))))
2764      /* Do not emit code for auto saved regs.  */
2765      && !irq_auto_save_p
2766      && !firq_auto_save_p)
2767    return true;
2768  return false;
2769}
2770
2771/* Return true if the return address must be saved in the current function,
2772   otherwise return false.  */
2773
2774static bool
2775arc_must_save_return_addr (struct function *func)
2776{
2777  if (func->machine->frame_info.save_return_addr)
2778    return true;
2779
2780  return false;
2781}
2782
2783/* Return non-zero if there are registers to be saved or loaded using
2784   millicode thunks.  We can only use consecutive sequences starting
2785   with r13, and not going beyond r25.
2786   GMASK is a bitmask of registers to save.  This function sets
2787   FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range
2788   of registers to be saved / restored with a millicode call.  */
2789
2790static int
2791arc_compute_millicode_save_restore_regs (uint64_t gmask,
2792					 struct arc_frame_info *frame)
2793{
2794  int regno;
2795
2796  int start_reg = 13, end_reg = 25;
2797
2798  for (regno = start_reg; regno <= end_reg && (gmask & (1ULL << regno));)
2799    regno++;
2800  end_reg = regno - 1;
2801  /* There is no point in using millicode thunks if we don't save/restore
2802     at least three registers.  For non-leaf functions we also have the
2803     blink restore.  */
2804  if (regno - start_reg >= 3 - (crtl->is_leaf == 0))
2805    {
2806      frame->millicode_start_reg = 13;
2807      frame->millicode_end_reg = regno - 1;
2808      return 1;
2809    }
2810  return 0;
2811}
2812
2813/* Return the bytes needed to compute the frame pointer from the
2814   current stack pointer.  */
2815
2816static unsigned int
2817arc_compute_frame_size (void)
2818{
2819  int regno;
2820  unsigned int total_size, var_size, args_size, pretend_size, extra_size;
2821  unsigned int reg_size;
2822  uint64_t gmask;
2823  struct arc_frame_info *frame_info;
2824  int size;
2825  unsigned int extra_plus_reg_size;
2826  unsigned int extra_plus_reg_size_aligned;
2827  unsigned int fn_type = arc_compute_function_type (cfun);
2828
2829  /* The answer might already be known.  */
2830  if (cfun->machine->frame_info.initialized)
2831    return cfun->machine->frame_info.total_size;
2832
2833  frame_info = &cfun->machine->frame_info;
2834  size = ARC_STACK_ALIGN (get_frame_size ());
2835
2836  /* 1) Size of locals and temporaries.  */
2837  var_size	= size;
2838
2839  /* 2) Size of outgoing arguments.  */
2840  args_size	= crtl->outgoing_args_size;
2841
2842  /* 3) Calculate space needed for saved registers.
2843     ??? We ignore the extension registers for now.  */
2844
2845  /* See if this is an interrupt handler.  Call used registers must be saved
2846     for them too.  */
2847
2848  reg_size = 0;
2849  gmask = 0;
2850
2851  /* The last 4 regs are special, avoid them.  */
2852  for (regno = 0; regno <= (GMASK_LEN - 4); regno++)
2853    {
2854      if (arc_must_save_register (regno, cfun, false))
2855	{
2856	  reg_size += UNITS_PER_WORD;
2857	  gmask |= 1ULL << regno;
2858	}
2859    }
2860
2861  /* In a frame that calls __builtin_eh_return two data registers are
2862     used to pass values back to the exception handler.
2863
2864     Ensure that these registers are spilled to the stack so that the
2865     exception throw code can find them, and update the saved values.
2866     The handling code will then consume these reloaded values to
2867     handle the exception.  */
2868  if (crtl->calls_eh_return)
2869    for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
2870      {
2871	reg_size += UNITS_PER_WORD;
2872	gmask |= 1ULL << regno;
2873      }
2874
2875  /* Check if we need to save the return address.  */
2876  frame_info->save_return_addr = (!crtl->is_leaf
2877				  || df_regs_ever_live_p (RETURN_ADDR_REGNUM)
2878				  || crtl->calls_eh_return);
2879
2880  /* Saving blink reg for millicode thunk calls.  */
2881  if (TARGET_MILLICODE_THUNK_SET
2882      && !ARC_INTERRUPT_P (fn_type)
2883      && !crtl->calls_eh_return)
2884    {
2885      if (arc_compute_millicode_save_restore_regs (gmask, frame_info))
2886	frame_info->save_return_addr = true;
2887    }
2888
2889  /* Save lp_count, lp_start and lp_end.  */
2890  if (arc_lpcwidth != 0 && arc_must_save_register (LP_COUNT, cfun, true))
2891    reg_size += UNITS_PER_WORD * 3;
2892
2893  /* Check for the special R40-R44 regs used by FPX extension.  */
2894  if (arc_must_save_register (TARGET_BIG_ENDIAN ? R41_REG : R40_REG,
2895			      cfun, TARGET_DPFP))
2896    reg_size += UNITS_PER_WORD * 2;
2897  if (arc_must_save_register (TARGET_BIG_ENDIAN ? R43_REG : R42_REG,
2898			      cfun, TARGET_DPFP))
2899    reg_size += UNITS_PER_WORD * 2;
2900
2901  /* Check for special MLO/MHI case used by ARC600' MUL64
2902     extension.  */
2903  if (arc_must_save_register (R58_REG, cfun, TARGET_MUL64_SET))
2904    reg_size += UNITS_PER_WORD * 2;
2905
2906  /* 4) Calculate extra size made up of the blink + fp size.  */
2907  extra_size = 0;
2908  if (arc_must_save_return_addr (cfun))
2909    extra_size = 4;
2910  /* Add FP size only when it is not autosaved.  */
2911  if (arc_frame_pointer_needed ()
2912      && !ARC_AUTOFP_IRQ_P (fn_type))
2913    extra_size += 4;
2914
2915  /* 5) Space for variable arguments passed in registers */
2916  pretend_size	= crtl->args.pretend_args_size;
2917
2918  /* Ensure everything before the locals is aligned appropriately.  */
2919  extra_plus_reg_size = extra_size + reg_size;
2920  extra_plus_reg_size_aligned = ARC_STACK_ALIGN (extra_plus_reg_size);
2921  reg_size = extra_plus_reg_size_aligned - extra_size;
2922
2923  /* Compute total frame size.  */
2924  total_size = var_size + args_size + extra_size + pretend_size + reg_size;
2925
2926  /* It used to be the case that the alignment was forced at this
2927     point.  However, that is dangerous, calculations based on
2928     total_size would be wrong.  Given that this has never cropped up
2929     as an issue I've changed this to an assert for now.  */
2930  gcc_assert (total_size == ARC_STACK_ALIGN (total_size));
2931
2932  /* Save computed information.  */
2933  frame_info->total_size   = total_size;
2934  frame_info->extra_size   = extra_size;
2935  frame_info->pretend_size = pretend_size;
2936  frame_info->var_size     = var_size;
2937  frame_info->args_size    = args_size;
2938  frame_info->reg_size     = reg_size;
2939  frame_info->gmask        = gmask;
2940  frame_info->initialized  = reload_completed;
2941
2942  /* Ok, we're done.  */
2943  return total_size;
2944}
2945
2946/* Build dwarf information when the context is saved via AUX_IRQ_CTRL
2947   mechanism.  */
2948
2949static void
2950arc_dwarf_emit_irq_save_regs (void)
2951{
2952  rtx tmp, par, insn, reg;
2953  int i, offset, j;
2954
2955  par = gen_rtx_SEQUENCE (VOIDmode,
2956			  rtvec_alloc (irq_ctrl_saved.irq_save_last_reg + 1
2957				       + irq_ctrl_saved.irq_save_blink
2958				       + irq_ctrl_saved.irq_save_lpcount
2959				       + 1));
2960
2961  /* Build the stack adjustment note for unwind info.  */
2962  j = 0;
2963  offset = UNITS_PER_WORD * (irq_ctrl_saved.irq_save_last_reg + 1
2964			     + irq_ctrl_saved.irq_save_blink
2965			     + irq_ctrl_saved.irq_save_lpcount);
2966  tmp = plus_constant (Pmode, stack_pointer_rtx, -1 * offset);
2967  tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
2968  RTX_FRAME_RELATED_P (tmp) = 1;
2969  XVECEXP (par, 0, j++) = tmp;
2970
2971  offset -= UNITS_PER_WORD;
2972
2973  /* 1st goes LP_COUNT.  */
2974  if (irq_ctrl_saved.irq_save_lpcount)
2975    {
2976      reg = gen_rtx_REG (SImode, 60);
2977      tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
2978      tmp = gen_frame_mem (SImode, tmp);
2979      tmp = gen_rtx_SET (tmp, reg);
2980      RTX_FRAME_RELATED_P (tmp) = 1;
2981      XVECEXP (par, 0, j++) = tmp;
2982      offset -= UNITS_PER_WORD;
2983    }
2984
2985  /* 2nd goes BLINK.  */
2986  if (irq_ctrl_saved.irq_save_blink)
2987    {
2988      reg = gen_rtx_REG (SImode, 31);
2989      tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
2990      tmp = gen_frame_mem (SImode, tmp);
2991      tmp = gen_rtx_SET (tmp, reg);
2992      RTX_FRAME_RELATED_P (tmp) = 1;
2993      XVECEXP (par, 0, j++) = tmp;
2994      offset -= UNITS_PER_WORD;
2995    }
2996
2997  /* Build the parallel of the remaining registers recorded as saved
2998     for unwind.  */
2999  for (i = irq_ctrl_saved.irq_save_last_reg; i >= 0; i--)
3000    {
3001      reg = gen_rtx_REG (SImode, i);
3002      tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
3003      tmp = gen_frame_mem (SImode, tmp);
3004      tmp = gen_rtx_SET (tmp, reg);
3005      RTX_FRAME_RELATED_P (tmp) = 1;
3006      XVECEXP (par, 0, j++) = tmp;
3007      offset -= UNITS_PER_WORD;
3008    }
3009
3010  /* Dummy insn used to anchor the dwarf info.  */
3011  insn = emit_insn (gen_stack_irq_dwarf());
3012  add_reg_note (insn, REG_FRAME_RELATED_EXPR, par);
3013  RTX_FRAME_RELATED_P (insn) = 1;
3014}
3015
3016/* Helper for prologue: emit frame store with pre_modify or pre_dec to
3017   save register REG on stack.  An initial offset OFFSET can be passed
3018   to the function.  */
3019
3020static int
3021frame_save_reg (rtx reg, HOST_WIDE_INT offset)
3022{
3023  rtx addr;
3024
3025  if (offset)
3026    {
3027      rtx tmp = plus_constant (Pmode, stack_pointer_rtx,
3028			       offset - GET_MODE_SIZE (GET_MODE (reg)));
3029      addr = gen_frame_mem (GET_MODE (reg),
3030			    gen_rtx_PRE_MODIFY (Pmode,
3031						stack_pointer_rtx,
3032						tmp));
3033    }
3034  else
3035    addr = gen_frame_mem (GET_MODE (reg), gen_rtx_PRE_DEC (Pmode,
3036							   stack_pointer_rtx));
3037  frame_move_inc (addr, reg, stack_pointer_rtx, 0);
3038
3039  return GET_MODE_SIZE (GET_MODE (reg)) - offset;
3040}
3041
3042/* Helper used when saving AUX regs during ISR.  */
3043
3044static int
3045push_reg (rtx reg)
3046{
3047  rtx stkslot = gen_rtx_MEM (GET_MODE (reg), gen_rtx_PRE_DEC (Pmode,
3048						   stack_pointer_rtx));
3049  rtx insn = emit_move_insn (stkslot, reg);
3050  RTX_FRAME_RELATED_P (insn) = 1;
3051  add_reg_note (insn, REG_CFA_ADJUST_CFA,
3052		gen_rtx_SET (stack_pointer_rtx,
3053			     plus_constant (Pmode, stack_pointer_rtx,
3054					    -GET_MODE_SIZE (GET_MODE (reg)))));
3055  return GET_MODE_SIZE (GET_MODE (reg));
3056}
3057
3058/* Helper for epilogue: emit frame load with post_modify or post_inc
3059   to restore register REG from stack.  The initial offset is passed
3060   via OFFSET.  */
3061
3062static int
3063frame_restore_reg (rtx reg, HOST_WIDE_INT offset)
3064{
3065  rtx addr, insn;
3066
3067  if (offset)
3068    {
3069      rtx tmp = plus_constant (Pmode, stack_pointer_rtx,
3070			       offset + GET_MODE_SIZE (GET_MODE (reg)));
3071      addr = gen_frame_mem (GET_MODE (reg),
3072			    gen_rtx_POST_MODIFY (Pmode,
3073						 stack_pointer_rtx,
3074						 tmp));
3075    }
3076  else
3077    addr = gen_frame_mem (GET_MODE (reg), gen_rtx_POST_INC (Pmode,
3078							    stack_pointer_rtx));
3079  insn = frame_move_inc (reg, addr, stack_pointer_rtx, 0);
3080  add_reg_note (insn, REG_CFA_RESTORE, reg);
3081
3082  if (reg == hard_frame_pointer_rtx)
3083    add_reg_note (insn, REG_CFA_DEF_CFA,
3084		  plus_constant (Pmode, stack_pointer_rtx,
3085				 GET_MODE_SIZE (GET_MODE (reg)) + offset));
3086  else
3087    add_reg_note (insn, REG_CFA_ADJUST_CFA,
3088		  gen_rtx_SET (stack_pointer_rtx,
3089			       plus_constant (Pmode, stack_pointer_rtx,
3090					      GET_MODE_SIZE (GET_MODE (reg))
3091					      + offset)));
3092
3093  return GET_MODE_SIZE (GET_MODE (reg)) + offset;
3094}
3095
3096/* Helper used when restoring AUX regs during ISR.  */
3097
3098static int
3099pop_reg (rtx reg)
3100{
3101  rtx stkslot = gen_rtx_MEM (GET_MODE (reg), gen_rtx_POST_INC (Pmode,
3102						   stack_pointer_rtx));
3103  rtx insn = emit_move_insn (reg, stkslot);
3104  RTX_FRAME_RELATED_P (insn) = 1;
3105  add_reg_note (insn, REG_CFA_ADJUST_CFA,
3106		gen_rtx_SET (stack_pointer_rtx,
3107			     plus_constant (Pmode, stack_pointer_rtx,
3108					    GET_MODE_SIZE (GET_MODE (reg)))));
3109  return GET_MODE_SIZE (GET_MODE (reg));
3110}
3111
3112/* Check if we have a continous range to be save/restored with the
3113   help of enter/leave instructions.  A vaild register range starts
3114   from $r13 and is up to (including) $r26.  */
3115
3116static bool
3117arc_enter_leave_p (uint64_t gmask)
3118{
3119  int regno;
3120  unsigned int rmask = 0;
3121
3122  if (!gmask)
3123    return false;
3124
3125  for (regno = ENTER_LEAVE_START_REG;
3126       regno <= ENTER_LEAVE_END_REG && (gmask & (1ULL << regno)); regno++)
3127    rmask |= 1ULL << regno;
3128
3129  if (rmask ^ gmask)
3130    return false;
3131
3132  return true;
3133}
3134
3135/* ARC's prologue, save any needed call-saved regs (and call-used if
3136   this is an interrupt handler) for ARCompact ISA, using ST/STD
3137   instructions.  */
3138
3139static int
3140arc_save_callee_saves (uint64_t gmask,
3141		       bool save_blink,
3142		       bool save_fp,
3143		       HOST_WIDE_INT offset,
3144		       bool emit_move)
3145{
3146  rtx reg;
3147  int frame_allocated = 0;
3148  int i;
3149
3150  /* The home-grown ABI says link register is saved first.  */
3151  if (save_blink)
3152    {
3153      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3154      frame_allocated += frame_save_reg (reg, offset);
3155      offset = 0;
3156    }
3157
3158  /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask.  */
3159  if (gmask)
3160    for (i = GMASK_LEN; i >= 0; i--)
3161      {
3162	machine_mode save_mode = SImode;
3163
3164	if (TARGET_LL64
3165	    && ((i - 1) % 2 == 0)
3166	    && ((gmask & (1ULL << i)) != 0)
3167	    && ((gmask & (1ULL << (i - 1))) != 0))
3168	  {
3169	    save_mode = DImode;
3170	    --i;
3171	  }
3172	else if ((gmask & (1ULL << i)) == 0)
3173	  continue;
3174
3175	reg = gen_rtx_REG (save_mode, i);
3176	frame_allocated += frame_save_reg (reg, offset);
3177	offset = 0;
3178      }
3179
3180  /* Save frame pointer if needed.  First save the FP on stack, if not
3181     autosaved.  Unfortunately, I cannot add it to gmask and use the
3182     above loop to save fp because our ABI states fp goes aftert all
3183     registers are saved.  */
3184  if (save_fp)
3185    {
3186      frame_allocated += frame_save_reg (hard_frame_pointer_rtx, offset);
3187      offset = 0;
3188    }
3189
3190  /* Emit mov fp,sp.  */
3191  if (emit_move)
3192    frame_move (hard_frame_pointer_rtx, stack_pointer_rtx);
3193
3194  return frame_allocated;
3195}
3196
3197/* ARC's epilogue, restore any required call-saved regs (and call-used
3198   if it is for an interrupt handler) using LD/LDD instructions.  */
3199
3200static int
3201arc_restore_callee_saves (uint64_t gmask,
3202			  bool restore_blink,
3203			  bool restore_fp,
3204			  HOST_WIDE_INT offset,
3205			  HOST_WIDE_INT allocated)
3206{
3207  rtx reg;
3208  int frame_deallocated = 0;
3209  HOST_WIDE_INT offs = cfun->machine->frame_info.reg_size;
3210  unsigned int fn_type = arc_compute_function_type (cfun);
3211  bool early_blink_restore;
3212  int i;
3213
3214  /* Emit mov fp,sp.  */
3215  if (arc_frame_pointer_needed () && offset)
3216    {
3217      frame_move (stack_pointer_rtx, hard_frame_pointer_rtx);
3218      frame_deallocated += offset;
3219      offset = 0;
3220    }
3221
3222  if (restore_fp)
3223    {
3224      /* Any offset is taken care by previous if-statement.  */
3225      gcc_assert (offset == 0);
3226      frame_deallocated += frame_restore_reg (hard_frame_pointer_rtx, 0);
3227    }
3228
3229  if (offset)
3230    {
3231      /* No $fp involved, we need to do an add to set the $sp to the
3232	 location of the first register.  */
3233      frame_stack_add (offset);
3234      frame_deallocated += offset;
3235      offset = 0;
3236    }
3237
3238  /* When we do not optimize for size or we aren't in an interrupt,
3239     restore first blink.  */
3240  early_blink_restore = restore_blink && !optimize_size && offs
3241    && !ARC_INTERRUPT_P (fn_type);
3242  if (early_blink_restore)
3243    {
3244      rtx addr = plus_constant (Pmode, stack_pointer_rtx, offs);
3245      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3246      rtx insn = frame_move_inc (reg, gen_frame_mem (Pmode, addr),
3247				 stack_pointer_rtx, NULL_RTX);
3248      add_reg_note (insn, REG_CFA_RESTORE, reg);
3249      restore_blink = false;
3250    }
3251
3252  /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask.  */
3253  if (gmask)
3254    for (i = 0; i <= GMASK_LEN; i++)
3255      {
3256	machine_mode restore_mode = SImode;
3257
3258	if (TARGET_LL64
3259	    && ((i % 2) == 0)
3260	    && ((gmask & (1ULL << i)) != 0)
3261	    && ((gmask & (1ULL << (i + 1))) != 0))
3262	  restore_mode = DImode;
3263	else if ((gmask & (1ULL << i)) == 0)
3264	  continue;
3265
3266	reg = gen_rtx_REG (restore_mode, i);
3267	offs = 0;
3268	switch (restore_mode)
3269	  {
3270	  case E_DImode:
3271	    if ((GMASK_LEN - __builtin_clzll (gmask)) == (i + 1)
3272		&& early_blink_restore)
3273	      offs = 4;
3274	    break;
3275	  case E_SImode:
3276	    if ((GMASK_LEN - __builtin_clzll (gmask)) == i
3277		&& early_blink_restore)
3278	      offs = 4;
3279	    break;
3280	  default:
3281	    offs = 0;
3282	  }
3283	frame_deallocated += frame_restore_reg (reg, offs);
3284	offset = 0;
3285
3286	if (restore_mode == DImode)
3287	  i++;
3288      }
3289
3290  if (restore_blink)
3291    {
3292      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3293      frame_deallocated += frame_restore_reg (reg, allocated
3294					      - frame_deallocated
3295					      /* Consider as well the
3296						 current restored
3297						 register size.  */
3298					      - UNITS_PER_WORD);
3299    }
3300
3301  return frame_deallocated;
3302}
3303
3304/* ARC prologue, save the registers using enter instruction.  Leave
3305   instruction can also save $blink (SAVE_BLINK) and $fp (SAVE_FP)
3306   register.  */
3307
3308static int
3309arc_save_callee_enter (uint64_t gmask,
3310		       bool save_blink,
3311		       bool save_fp,
3312		       HOST_WIDE_INT offset)
3313{
3314  int start_reg = ENTER_LEAVE_START_REG;
3315  int end_reg = ENTER_LEAVE_END_REG;
3316  int regno, indx, off, nregs;
3317  rtx insn, reg, mem;
3318  int frame_allocated = 0;
3319
3320  for (regno = start_reg; regno <= end_reg && (gmask & (1ULL << regno));)
3321    regno++;
3322
3323  end_reg = regno - 1;
3324  nregs = end_reg - start_reg + 1;
3325  nregs += save_blink ? 1 : 0;
3326  nregs += save_fp ? 1 : 0;
3327
3328  if (offset)
3329    frame_stack_add (offset);
3330
3331  insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + (save_fp ? 1 : 0)
3332						  + 1));
3333  indx = 0;
3334
3335  reg = gen_rtx_SET (stack_pointer_rtx,
3336		     plus_constant (Pmode,
3337				    stack_pointer_rtx,
3338				    -nregs * UNITS_PER_WORD));
3339  RTX_FRAME_RELATED_P (reg) = 1;
3340  XVECEXP (insn, 0, indx++) = reg;
3341  off = nregs * UNITS_PER_WORD;
3342
3343  if (save_blink)
3344    {
3345      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3346      mem = gen_frame_mem (Pmode, plus_constant (Pmode,
3347						 stack_pointer_rtx,
3348						 off));
3349      XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
3350      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
3351      off -= UNITS_PER_WORD;
3352      save_blink = false;
3353    }
3354
3355  for (regno = start_reg;
3356       regno <= end_reg;
3357       regno++, indx++, off -= UNITS_PER_WORD)
3358    {
3359      reg = gen_rtx_REG (SImode, regno);
3360      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3361						  stack_pointer_rtx,
3362						  off));
3363      XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
3364      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
3365      gmask = gmask & ~(1ULL << regno);
3366    }
3367
3368  if (save_fp)
3369    {
3370      mem = gen_frame_mem (Pmode, plus_constant (Pmode,
3371						 stack_pointer_rtx,
3372						 off));
3373      XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, hard_frame_pointer_rtx);
3374      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
3375      off -= UNITS_PER_WORD;
3376
3377      XVECEXP (insn, 0, indx) = gen_rtx_SET (hard_frame_pointer_rtx,
3378					     stack_pointer_rtx);
3379      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
3380      save_fp = false;
3381    }
3382
3383  gcc_assert (off == 0);
3384  insn = frame_insn (insn);
3385
3386  add_reg_note (insn, REG_INC, stack_pointer_rtx);
3387
3388  frame_allocated = nregs * UNITS_PER_WORD;
3389
3390  /* offset is a negative number, make sure we add it.  */
3391  return frame_allocated - offset;
3392}
3393
3394/* ARC epilogue, restore the registers using leave instruction.  An
3395   initial offset is passed in OFFSET.  Besides restoring an register
3396   range, leave can also restore $blink (RESTORE_BLINK), or $fp
3397   (RESTORE_FP), and can automatic return (RETURN_P).  */
3398
3399static int
3400arc_restore_callee_leave (uint64_t gmask,
3401			  bool restore_blink,
3402			  bool restore_fp,
3403			  bool return_p,
3404			  HOST_WIDE_INT offset)
3405{
3406  int start_reg = ENTER_LEAVE_START_REG;
3407  int end_reg = ENTER_LEAVE_END_REG;
3408  int regno, indx, off, nregs;
3409  rtx insn, reg, mem;
3410  int frame_allocated = 0;
3411
3412  for (regno = start_reg; regno <= end_reg && (gmask & (1ULL << regno));)
3413    regno++;
3414
3415  end_reg = regno - 1;
3416  nregs = end_reg - start_reg + 1;
3417  nregs += restore_blink ? 1 : 0;
3418  nregs += restore_fp ? 1 : 0;
3419
3420  insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1
3421						  + (return_p ? 1 : 0)));
3422  indx = 0;
3423
3424  if (return_p)
3425    XVECEXP (insn, 0, indx++) = ret_rtx;
3426
3427  if (restore_fp)
3428    {
3429      /* I cannot emit set (sp, fp) here as cselib expects a single sp
3430	 set and not two.  Thus, use the offset, and change sp adjust
3431	 value.  */
3432      frame_allocated += offset;
3433    }
3434
3435  if (offset && !restore_fp)
3436    {
3437      /* This add is only emmited when we do not restore fp with leave
3438	 instruction.  */
3439      frame_stack_add (offset);
3440      frame_allocated += offset;
3441      offset = 0;
3442    }
3443
3444  reg = gen_rtx_SET (stack_pointer_rtx,
3445		     plus_constant (Pmode,
3446				    stack_pointer_rtx,
3447				    offset + nregs * UNITS_PER_WORD));
3448  RTX_FRAME_RELATED_P (reg) = 1;
3449  XVECEXP (insn, 0, indx++) = reg;
3450  off = nregs * UNITS_PER_WORD;
3451
3452  if (restore_blink)
3453    {
3454      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3455      mem = gen_frame_mem (Pmode, plus_constant (Pmode,
3456						 stack_pointer_rtx,
3457						 off));
3458      XVECEXP (insn, 0, indx) = gen_rtx_SET (reg, mem);
3459      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
3460      off -= UNITS_PER_WORD;
3461    }
3462
3463  for (regno = start_reg;
3464       regno <= end_reg;
3465       regno++, indx++, off -= UNITS_PER_WORD)
3466    {
3467      reg = gen_rtx_REG (SImode, regno);
3468      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3469						  stack_pointer_rtx,
3470						  off));
3471      XVECEXP (insn, 0, indx) = gen_rtx_SET (reg, mem);
3472      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
3473      gmask = gmask & ~(1ULL << regno);
3474    }
3475
3476  if (restore_fp)
3477    {
3478      mem = gen_frame_mem (Pmode, plus_constant (Pmode,
3479						 stack_pointer_rtx,
3480						 off));
3481      XVECEXP (insn, 0, indx) = gen_rtx_SET (hard_frame_pointer_rtx, mem);
3482      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
3483      off -= UNITS_PER_WORD;
3484    }
3485
3486  gcc_assert (off == 0);
3487  if (return_p)
3488    {
3489      insn = emit_jump_insn (insn);
3490      RTX_FRAME_RELATED_P (insn) = 1;
3491    }
3492  else
3493    insn = frame_insn (insn);
3494
3495  add_reg_note (insn, REG_INC, stack_pointer_rtx);
3496
3497  /* Dwarf related info.  */
3498  if (restore_fp)
3499    {
3500      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
3501      add_reg_note (insn, REG_CFA_DEF_CFA,
3502		    plus_constant (Pmode, stack_pointer_rtx,
3503				   offset + nregs * UNITS_PER_WORD));
3504    }
3505  else
3506    {
3507      add_reg_note (insn, REG_CFA_ADJUST_CFA,
3508		    gen_rtx_SET (stack_pointer_rtx,
3509				 plus_constant (Pmode, stack_pointer_rtx,
3510						nregs * UNITS_PER_WORD)));
3511    }
3512  if (restore_blink)
3513    add_reg_note (insn, REG_CFA_RESTORE,
3514		  gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
3515  for (regno = start_reg; regno <= end_reg; regno++)
3516    add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (SImode, regno));
3517
3518  frame_allocated += nregs * UNITS_PER_WORD;
3519
3520  return frame_allocated;
3521}
3522
3523/* Millicode thunks implementation:
3524   Generates calls to millicodes for registers starting from r13 to r25
3525   Present Limitations:
3526   - Only one range supported.  The remaining regs will have the ordinary
3527   st and ld instructions for store and loads.  Hence a gmask asking
3528   to store r13-14, r16-r25 will only generate calls to store and
3529   load r13 to r14 while store and load insns will be generated for
3530   r16 to r25 in the prologue and epilogue respectively.
3531
3532   - Presently library only supports register ranges starting from r13.
3533*/
3534
3535static int
3536arc_save_callee_milli (uint64_t gmask,
3537		       bool save_blink,
3538		       bool save_fp,
3539		       HOST_WIDE_INT offset,
3540		       HOST_WIDE_INT reg_size)
3541{
3542  int start_reg = 13;
3543  int end_reg = 25;
3544  int regno, indx, off, nregs;
3545  rtx insn, reg, mem;
3546  int frame_allocated = 0;
3547
3548  for (regno = start_reg; regno <= end_reg && (gmask & (1ULL << regno));)
3549    regno++;
3550
3551  end_reg = regno - 1;
3552  nregs = end_reg - start_reg + 1;
3553  gcc_assert (end_reg > 14);
3554
3555
3556  /* Allocate space on stack for the registers, and take into account
3557     also the initial offset.  The registers will be saved using
3558     offsets.  N.B. OFFSET is a negative number.  */
3559  if (save_blink)
3560    {
3561      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3562      frame_allocated += frame_save_reg (reg, offset);
3563      offset = 0;
3564    }
3565
3566  if (reg_size || offset)
3567    {
3568      frame_stack_add (offset - reg_size);
3569      frame_allocated += nregs * UNITS_PER_WORD - offset;
3570      offset = 0;
3571    }
3572
3573  /* Start generate millicode call.  */
3574  insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
3575  indx = 0;
3576
3577  /* This is a call, we clobber blink.  */
3578  XVECEXP (insn, 0, nregs) =
3579    gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
3580
3581  for (regno = start_reg, indx = 0, off = 0;
3582       regno <= end_reg;
3583       regno++, indx++, off += UNITS_PER_WORD)
3584    {
3585      reg = gen_rtx_REG (SImode, regno);
3586      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3587						  stack_pointer_rtx,
3588						  off));
3589      XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
3590      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
3591      gmask = gmask & ~(1ULL << regno);
3592    }
3593  insn = frame_insn (insn);
3594
3595  /* Add DWARF info.  */
3596  for (regno = start_reg, off = 0;
3597       regno <= end_reg;
3598       regno++, off += UNITS_PER_WORD)
3599    {
3600      reg = gen_rtx_REG (SImode, regno);
3601      mem = gen_rtx_MEM (SImode, plus_constant (Pmode,
3602						stack_pointer_rtx, off));
3603      add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
3604
3605    }
3606
3607  /* In the case of millicode thunk, we need to restore the
3608     clobbered blink register.  */
3609  if (arc_must_save_return_addr (cfun))
3610    {
3611      emit_insn (gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
3612			      gen_rtx_MEM (Pmode,
3613					   plus_constant (Pmode,
3614							  stack_pointer_rtx,
3615							  reg_size))));
3616    }
3617
3618  /* Save remaining registers using st instructions.  */
3619  for (regno = 0; regno <= GMASK_LEN; regno++)
3620    {
3621      if ((gmask & (1ULL << regno)) == 0)
3622	continue;
3623
3624      reg = gen_rtx_REG (SImode, regno);
3625      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3626						  stack_pointer_rtx,
3627						  off));
3628      frame_move_inc (mem, reg, stack_pointer_rtx, 0);
3629      frame_allocated += UNITS_PER_WORD;
3630      off += UNITS_PER_WORD;
3631    }
3632
3633  /* Save frame pointer if needed.  First save the FP on stack, if not
3634     autosaved.  Unfortunately, I cannot add it to gmask and use the
3635     above loop to save fp because our ABI states fp goes aftert all
3636     registers are saved.  */
3637  if (save_fp)
3638    frame_allocated += frame_save_reg (hard_frame_pointer_rtx, offset);
3639
3640  /* Emit mov fp,sp.  */
3641  if (arc_frame_pointer_needed ())
3642    frame_move (hard_frame_pointer_rtx, stack_pointer_rtx);
3643
3644  return frame_allocated;
3645}
3646
3647/* Like the previous function but restore.  */
3648
3649static int
3650arc_restore_callee_milli (uint64_t gmask,
3651			  bool restore_blink,
3652			  bool restore_fp,
3653			  bool return_p,
3654			  HOST_WIDE_INT offset)
3655{
3656  int start_reg = 13;
3657  int end_reg = 25;
3658  int regno, indx, off, nregs;
3659  rtx insn, reg, mem;
3660  int frame_allocated = 0;
3661
3662  for (regno = start_reg; regno <= end_reg && (gmask & (1ULL << regno));)
3663    regno++;
3664
3665  end_reg = regno - 1;
3666  nregs = end_reg - start_reg + 1;
3667  gcc_assert (end_reg > 14);
3668
3669  /* Emit mov fp,sp.  */
3670  if (arc_frame_pointer_needed () && offset)
3671    {
3672      frame_move (stack_pointer_rtx, hard_frame_pointer_rtx);
3673      frame_allocated = offset;
3674      offset = 0;
3675    }
3676
3677  if (restore_fp)
3678    frame_allocated += frame_restore_reg (hard_frame_pointer_rtx, 0);
3679
3680  if (offset)
3681    {
3682      /* No fp involved, hence, we need to adjust the sp via an
3683	 add.  */
3684      frame_stack_add (offset);
3685      frame_allocated += offset;
3686      offset = 0;
3687    }
3688
3689  /* Start generate millicode call.  */
3690  insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc ((return_p ? 1 : 0)
3691						  + nregs + 1));
3692  indx = 0;
3693
3694  if (return_p)
3695    {
3696      /* sibling call, the blink is restored with the help of the
3697	 value held into r12.  */
3698      reg = gen_rtx_REG (Pmode, 12);
3699      XVECEXP (insn, 0, indx++) = ret_rtx;
3700      XVECEXP (insn, 0, indx++) =
3701	gen_rtx_SET (stack_pointer_rtx,
3702		     gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg));
3703      frame_allocated += UNITS_PER_WORD;
3704    }
3705  else
3706    {
3707      /* This is a call, we clobber blink.  */
3708      XVECEXP (insn, 0, nregs) =
3709	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
3710    }
3711
3712  for (regno = start_reg, off = 0;
3713       regno <= end_reg;
3714       regno++, indx++, off += UNITS_PER_WORD)
3715    {
3716      reg = gen_rtx_REG (SImode, regno);
3717      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3718						  stack_pointer_rtx,
3719						  off));
3720      XVECEXP (insn, 0, indx) = gen_rtx_SET (reg, mem);
3721      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
3722      gmask = gmask & ~(1ULL << regno);
3723    }
3724
3725  /* Restore remaining registers using LD instructions.  */
3726  for (regno = 0; regno <= GMASK_LEN; regno++)
3727    {
3728      if ((gmask & (1ULL << regno)) == 0)
3729	continue;
3730
3731      reg = gen_rtx_REG (SImode, regno);
3732      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3733						  stack_pointer_rtx,
3734						  off));
3735      rtx tmp = frame_move_inc (reg, mem, stack_pointer_rtx, 0);
3736      add_reg_note (tmp, REG_CFA_RESTORE, reg);
3737      off += UNITS_PER_WORD;
3738    }
3739
3740  /* Emit millicode call.  */
3741  if (return_p)
3742    {
3743      reg = gen_rtx_REG (Pmode, 12);
3744      frame_insn (gen_rtx_SET (reg, GEN_INT (off)));
3745      frame_allocated += off;
3746      insn = emit_jump_insn (insn);
3747      RTX_FRAME_RELATED_P (insn) = 1;
3748    }
3749  else
3750    insn = frame_insn (insn);
3751
3752  /* Add DWARF info.  */
3753  for (regno = start_reg; regno <= end_reg; regno++)
3754    {
3755      reg = gen_rtx_REG (SImode, regno);
3756      add_reg_note (insn, REG_CFA_RESTORE, reg);
3757
3758    }
3759
3760  if (restore_blink && !return_p)
3761    {
3762      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3763      mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
3764						 off));
3765      insn = frame_insn (gen_rtx_SET (reg, mem));
3766      add_reg_note (insn, REG_CFA_RESTORE, reg);
3767    }
3768
3769  return frame_allocated;
3770}
3771
3772/* Set up the stack and frame pointer (if desired) for the function.  */
3773
3774void
3775arc_expand_prologue (void)
3776{
3777  int size;
3778  uint64_t gmask = cfun->machine->frame_info.gmask;
3779  struct arc_frame_info *frame = &cfun->machine->frame_info;
3780  unsigned int frame_size_to_allocate;
3781  int first_offset = 0;
3782  unsigned int fn_type = arc_compute_function_type (cfun);
3783  bool save_blink = false;
3784  bool save_fp = false;
3785  bool emit_move = false;
3786
3787  /* Naked functions don't have prologue.  */
3788  if (ARC_NAKED_P (fn_type))
3789    {
3790      if (flag_stack_usage_info)
3791	current_function_static_stack_size = 0;
3792      return;
3793    }
3794
3795  /* Compute total frame size.  */
3796  size = arc_compute_frame_size ();
3797
3798  if (flag_stack_usage_info)
3799    current_function_static_stack_size = size;
3800
3801  /* Keep track of frame size to be allocated.  */
3802  frame_size_to_allocate = size;
3803
3804  /* These cases shouldn't happen.  Catch them now.  */
3805  gcc_assert (!(size == 0 && gmask));
3806
3807  /* Allocate space for register arguments if this is a variadic function.  */
3808  if (frame->pretend_size != 0)
3809    first_offset = -frame->pretend_size;
3810
3811  /* IRQ using automatic save mechanism will save the register before
3812     anything we do.  */
3813  if (ARC_AUTO_IRQ_P (fn_type)
3814      && !ARC_FAST_INTERRUPT_P (fn_type))
3815    {
3816      frame_stack_add (first_offset);
3817      first_offset = 0;
3818      arc_dwarf_emit_irq_save_regs ();
3819    }
3820
3821  save_blink = arc_must_save_return_addr (cfun)
3822    && !ARC_AUTOBLINK_IRQ_P (fn_type);
3823  save_fp = arc_frame_pointer_needed () && !ARC_AUTOFP_IRQ_P (fn_type)
3824    && !ARC_INTERRUPT_P (fn_type);
3825  emit_move = arc_frame_pointer_needed () && !ARC_INTERRUPT_P (fn_type);
3826
3827  /* Use enter/leave only for non-interrupt functions.  */
3828  if (TARGET_CODE_DENSITY
3829      && TARGET_CODE_DENSITY_FRAME
3830      && !ARC_AUTOFP_IRQ_P (fn_type)
3831      && !ARC_AUTOBLINK_IRQ_P (fn_type)
3832      && !ARC_INTERRUPT_P (fn_type)
3833      && arc_enter_leave_p (gmask))
3834      frame_size_to_allocate -= arc_save_callee_enter (gmask, save_blink,
3835						       save_fp,
3836						       first_offset);
3837  else if (frame->millicode_end_reg > 14)
3838    frame_size_to_allocate -= arc_save_callee_milli (gmask, save_blink,
3839						     save_fp,
3840						     first_offset,
3841						     frame->reg_size);
3842  else
3843    frame_size_to_allocate -= arc_save_callee_saves (gmask, save_blink, save_fp,
3844						     first_offset, emit_move);
3845
3846  /* Check if we need to save the ZOL machinery.  */
3847  if (arc_lpcwidth != 0 && arc_must_save_register (LP_COUNT, cfun, true))
3848    {
3849      rtx reg0 = gen_rtx_REG (SImode, R0_REG);
3850      emit_insn (gen_rtx_SET (reg0,
3851			      gen_rtx_UNSPEC_VOLATILE
3852			      (Pmode, gen_rtvec (1, GEN_INT (AUX_LP_START)),
3853			       VUNSPEC_ARC_LR)));
3854      frame_size_to_allocate -= push_reg (reg0);
3855      emit_insn (gen_rtx_SET (reg0,
3856			      gen_rtx_UNSPEC_VOLATILE
3857			      (Pmode, gen_rtvec (1, GEN_INT (AUX_LP_END)),
3858			       VUNSPEC_ARC_LR)));
3859      frame_size_to_allocate -= push_reg (reg0);
3860      emit_move_insn (reg0, gen_rtx_REG (SImode, LP_COUNT));
3861      frame_size_to_allocate -= push_reg (reg0);
3862    }
3863
3864  /* Save AUX regs used by FPX machinery.  */
3865  if (arc_must_save_register (TARGET_BIG_ENDIAN ? R41_REG : R40_REG,
3866			      cfun, TARGET_DPFP))
3867    {
3868      rtx reg0 = gen_rtx_REG (SImode, R0_REG);
3869      int i;
3870
3871      for (i = 0; i < 4; i++)
3872	{
3873	  emit_insn (gen_rtx_SET (reg0,
3874				  gen_rtx_UNSPEC_VOLATILE
3875				  (Pmode, gen_rtvec (1, GEN_INT (AUX_DPFP_START
3876								 + i)),
3877				   VUNSPEC_ARC_LR)));
3878	  frame_size_to_allocate -= push_reg (reg0);
3879	}
3880    }
3881
3882  /* Save ARC600' MUL64 registers.  */
3883  if (arc_must_save_register (R58_REG, cfun, true))
3884    frame_size_to_allocate -= arc_save_callee_saves (3ULL << 58,
3885						     false, false, 0, false);
3886
3887  if (arc_frame_pointer_needed () && ARC_INTERRUPT_P (fn_type))
3888    {
3889      /* Just save fp at the end of the saving context.  */
3890      frame_size_to_allocate -=
3891	arc_save_callee_saves (0, false, !ARC_AUTOFP_IRQ_P (fn_type), 0, true);
3892    }
3893
3894  /* Allocate the stack frame.  */
3895  if (frame_size_to_allocate > 0)
3896    frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate);
3897
3898  /* Emit a blockage to avoid delay slot scheduling.  */
3899  emit_insn (gen_blockage ());
3900}
3901
3902/* Return the register number of the register holding the return address
3903   for a function of type TYPE.  */
3904
3905static int
3906arc_return_address_register (unsigned int fn_type)
3907{
3908  int regno = 0;
3909
3910  if (ARC_INTERRUPT_P (fn_type))
3911    {
3912      if ((fn_type & (ARC_FUNCTION_ILINK1 | ARC_FUNCTION_FIRQ)) != 0)
3913	regno = ILINK1_REG;
3914      else if ((fn_type & ARC_FUNCTION_ILINK2) != 0)
3915	regno = ILINK2_REG;
3916      else
3917	gcc_unreachable ();
3918    }
3919  else if (ARC_NORMAL_P (fn_type) || ARC_NAKED_P (fn_type))
3920    regno = RETURN_ADDR_REGNUM;
3921
3922  gcc_assert (regno != 0);
3923  return regno;
3924}
3925
3926/* Do any necessary cleanup after a function to restore stack, frame,
3927   and regs.  */
3928
3929void
3930arc_expand_epilogue (int sibcall_p)
3931{
3932  int size;
3933  unsigned int fn_type = arc_compute_function_type (cfun);
3934  unsigned int size_to_deallocate;
3935  int restored;
3936  int can_trust_sp_p = !cfun->calls_alloca;
3937  int first_offset;
3938  bool restore_fp = arc_frame_pointer_needed () && !ARC_AUTOFP_IRQ_P (fn_type);
3939  bool restore_blink = arc_must_save_return_addr (cfun)
3940    && !ARC_AUTOBLINK_IRQ_P (fn_type);
3941  uint64_t gmask = cfun->machine->frame_info.gmask;
3942  bool return_p = !sibcall_p && fn_type == ARC_FUNCTION_NORMAL
3943		   && !cfun->machine->frame_info.pretend_size;
3944  struct arc_frame_info *frame = &cfun->machine->frame_info;
3945
3946  /* Naked functions don't have epilogue.  */
3947  if (ARC_NAKED_P (fn_type))
3948    return;
3949
3950  size = arc_compute_frame_size ();
3951  size_to_deallocate = size;
3952
3953  first_offset = size - (frame->pretend_size + frame->reg_size
3954			 + frame->extra_size);
3955
3956  if (!can_trust_sp_p)
3957    gcc_assert (arc_frame_pointer_needed ());
3958
3959  /* Emit a blockage to avoid/flush all pending sp operations.  */
3960  if (size)
3961    emit_insn (gen_blockage ());
3962
3963  if (ARC_INTERRUPT_P (fn_type) && restore_fp)
3964    {
3965      /* We need to restore FP before any SP operation in an
3966	 interrupt.  */
3967      size_to_deallocate -= arc_restore_callee_saves (0, false,
3968						      restore_fp,
3969						      first_offset,
3970						      size_to_deallocate);
3971      restore_fp = false;
3972      first_offset = 0;
3973    }
3974
3975  /* Restore ARC600' MUL64 registers.  */
3976  if (arc_must_save_register (R58_REG, cfun, true))
3977    {
3978      rtx insn;
3979      rtx reg0 = gen_rtx_REG (SImode, R0_REG);
3980      rtx reg1 = gen_rtx_REG (SImode, R1_REG);
3981      size_to_deallocate -= pop_reg (reg0);
3982      size_to_deallocate -= pop_reg (reg1);
3983
3984      insn = emit_insn (gen_mulu64 (reg0, const1_rtx));
3985      add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (SImode, R58_REG));
3986      RTX_FRAME_RELATED_P (insn) = 1;
3987      emit_insn (gen_arc600_stall ());
3988      insn = emit_insn (gen_rtx_UNSPEC_VOLATILE
3989			(VOIDmode, gen_rtvec (2, reg1, GEN_INT (AUX_MULHI)),
3990			 VUNSPEC_ARC_SR));
3991      add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (SImode, R59_REG));
3992      RTX_FRAME_RELATED_P (insn) = 1;
3993    }
3994
3995  /* Restore AUX-regs used by FPX machinery.  */
3996  if (arc_must_save_register (TARGET_BIG_ENDIAN ? R41_REG : R40_REG,
3997			      cfun, TARGET_DPFP))
3998    {
3999      rtx reg0 = gen_rtx_REG (SImode, R0_REG);
4000      int i;
4001
4002      for (i = 0; i < 4; i++)
4003	{
4004	  size_to_deallocate -= pop_reg (reg0);
4005	  emit_insn (gen_rtx_UNSPEC_VOLATILE
4006		     (VOIDmode, gen_rtvec (2, reg0, GEN_INT (AUX_DPFP_START
4007							     + i)),
4008		      VUNSPEC_ARC_SR));
4009	}
4010    }
4011
4012  /* Check if we need to restore the ZOL machinery.  */
4013  if (arc_lpcwidth !=0 && arc_must_save_register (LP_COUNT, cfun, true))
4014    {
4015      rtx reg0 = gen_rtx_REG (SImode, R0_REG);
4016
4017      size_to_deallocate -= pop_reg (reg0);
4018      emit_move_insn (gen_rtx_REG (SImode, LP_COUNT), reg0);
4019
4020      size_to_deallocate -= pop_reg (reg0);
4021      emit_insn (gen_rtx_UNSPEC_VOLATILE
4022		 (VOIDmode, gen_rtvec (2, reg0, GEN_INT (AUX_LP_END)),
4023		  VUNSPEC_ARC_SR));
4024
4025      size_to_deallocate -= pop_reg (reg0);
4026      emit_insn (gen_rtx_UNSPEC_VOLATILE
4027		 (VOIDmode, gen_rtvec (2, reg0, GEN_INT (AUX_LP_START)),
4028		  VUNSPEC_ARC_SR));
4029    }
4030
4031  if (TARGET_CODE_DENSITY
4032      && TARGET_CODE_DENSITY_FRAME
4033      && !ARC_AUTOFP_IRQ_P (fn_type)
4034      && !ARC_AUTOBLINK_IRQ_P (fn_type)
4035      && !ARC_INTERRUPT_P (fn_type)
4036      && arc_enter_leave_p (gmask))
4037    {
4038      /* Using leave instruction.  */
4039      size_to_deallocate -= arc_restore_callee_leave (gmask, restore_blink,
4040						      restore_fp,
4041						      return_p,
4042						      first_offset);
4043      if (return_p)
4044	{
4045	  gcc_assert (size_to_deallocate == 0);
4046	  return;
4047	}
4048    }
4049  else if (frame->millicode_end_reg > 14)
4050    {
4051      /* Using millicode calls.  */
4052      size_to_deallocate -= arc_restore_callee_milli (gmask, restore_blink,
4053						      restore_fp,
4054						      return_p,
4055						      first_offset);
4056      if (return_p)
4057	{
4058	  gcc_assert (size_to_deallocate == 0);
4059	  return;
4060	}
4061    }
4062  else
4063    size_to_deallocate -= arc_restore_callee_saves (gmask, restore_blink,
4064						    restore_fp,
4065						    first_offset,
4066						    size_to_deallocate);
4067
4068  /* Keep track of how much of the stack pointer we've restored.  It
4069     makes the following a lot more readable.  */
4070  restored = size - size_to_deallocate;
4071
4072  if (size > restored)
4073    frame_stack_add (size - restored);
4074
4075  /* For frames that use __builtin_eh_return, the register defined by
4076     EH_RETURN_STACKADJ_RTX is set to 0 for all standard return paths.
4077     On eh_return paths however, the register is set to the value that
4078     should be added to the stack pointer in order to restore the
4079     correct stack pointer for the exception handling frame.
4080
4081     For ARC we are going to use r2 for EH_RETURN_STACKADJ_RTX, add
4082     this onto the stack for eh_return frames.  */
4083  if (crtl->calls_eh_return)
4084    emit_insn (gen_add2_insn (stack_pointer_rtx,
4085			      EH_RETURN_STACKADJ_RTX));
4086
4087  /* Emit the return instruction.  */
4088  if (ARC_INTERRUPT_P (fn_type))
4089    {
4090      rtx ra = gen_rtx_REG (Pmode, arc_return_address_register (fn_type));
4091
4092      if (TARGET_V2)
4093	emit_jump_insn (gen_rtie ());
4094      else if (TARGET_ARC700)
4095	emit_jump_insn (gen_rtie ());
4096      else
4097	emit_jump_insn (gen_arc600_rtie (ra));
4098    }
4099  else if (sibcall_p == FALSE)
4100    emit_jump_insn (gen_simple_return ());
4101}
4102
4103/* Helper for {push/pop}_multi_operand: check if rtx OP is a suitable
4104   construct to match either enter or leave instruction.  Which one
4105   which is selected by PUSH_P argument.  */
4106
4107bool
4108arc_check_multi (rtx op, bool push_p)
4109{
4110  HOST_WIDE_INT len = XVECLEN (op, 0);
4111  unsigned int regno, i, start;
4112  unsigned int memp = push_p ? 0 : 1;
4113  rtx elt;
4114
4115  if (len <= 1)
4116    return false;
4117
4118  start = 1;
4119  elt = XVECEXP (op, 0, 0);
4120  if (!push_p && GET_CODE (elt) == RETURN)
4121    start = 2;
4122
4123  for (i = start, regno = ENTER_LEAVE_START_REG; i < len; i++, regno++)
4124    {
4125      rtx elt = XVECEXP (op, 0, i);
4126      rtx reg, mem, addr;
4127
4128      if (GET_CODE (elt) != SET)
4129	return false;
4130      mem = XEXP (elt, memp);
4131      reg = XEXP (elt, 1 - memp);
4132
4133      if (!REG_P (reg)
4134	  || !MEM_P (mem))
4135	return false;
4136
4137      /* Check for blink.  */
4138      if (REGNO (reg) == RETURN_ADDR_REGNUM
4139	  && i == start)
4140	regno = 12;
4141      else if (REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
4142	++i;
4143      else if (REGNO (reg) != regno)
4144	return false;
4145
4146      addr = XEXP (mem, 0);
4147      if (GET_CODE (addr) == PLUS)
4148	{
4149	  if (!rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
4150	      || !CONST_INT_P (XEXP (addr, 1)))
4151	    return false;
4152	}
4153      else
4154	{
4155	  if (!rtx_equal_p (stack_pointer_rtx, addr))
4156	    return false;
4157	}
4158    }
4159  return true;
4160}
4161
4162/* Return rtx for the location of the return address on the stack,
4163   suitable for use in __builtin_eh_return.  The new return address
4164   will be written to this location in order to redirect the return to
4165   the exception handler.  Our ABI says the blink is pushed first on
4166   stack followed by an unknown number of register saves, and finally
4167   by fp.  Hence we cannot use the EH_RETURN_ADDRESS macro as the
4168   stack is not finalized.  */
4169
4170void
4171arc_eh_return_address_location (rtx source)
4172{
4173  rtx mem;
4174  int offset;
4175  struct arc_frame_info *afi;
4176
4177  arc_compute_frame_size ();
4178  afi = &cfun->machine->frame_info;
4179
4180  gcc_assert (crtl->calls_eh_return);
4181  gcc_assert (afi->save_return_addr);
4182  gcc_assert (afi->extra_size >= 4);
4183
4184  /* The '-4' removes the size of the return address, which is
4185     included in the 'extra_size' field.  */
4186  offset = afi->reg_size + afi->extra_size - 4;
4187  mem = gen_frame_mem (Pmode,
4188		       plus_constant (Pmode, hard_frame_pointer_rtx, offset));
4189
4190  /* The following should not be needed, and is, really a hack.  The
4191     issue being worked around here is that the DSE (Dead Store
4192     Elimination) pass will remove this write to the stack as it sees
4193     a single store and no corresponding read.  The read however
4194     occurs in the epilogue code, which is not added into the function
4195     rtl until a later pass.  So, at the time of DSE, the decision to
4196     remove this store seems perfectly sensible.  Marking the memory
4197     address as volatile obviously has the effect of preventing DSE
4198     from removing the store.  */
4199  MEM_VOLATILE_P (mem) = true;
4200  emit_move_insn (mem, source);
4201}
4202
4203/* PIC */
4204
4205/* Helper to generate unspec constant.  */
4206
4207static rtx
4208arc_unspec_offset (rtx loc, int unspec)
4209{
4210  return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, loc),
4211					       unspec));
4212}
4213
4214/* !TARGET_BARREL_SHIFTER support.  */
4215/* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what
4216   kind of shift.  */
4217
4218void
4219emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2)
4220{
4221  rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2);
4222  rtx pat
4223    = ((shift4_operator (shift, SImode) ?  gen_shift_si3 : gen_shift_si3_loop)
4224	(op0, op1, op2, shift));
4225  emit_insn (pat);
4226}
4227
4228/* Output the assembler code for doing a shift.
4229   We go to a bit of trouble to generate efficient code as the ARC601 only has
4230   single bit shifts.  This is taken from the h8300 port.  We only have one
4231   mode of shifting and can't access individual bytes like the h8300 can, so
4232   this is greatly simplified (at the expense of not generating hyper-
4233   efficient code).
4234
4235   This function is not used if the variable shift insns are present.  */
4236
4237/* FIXME:  This probably can be done using a define_split in arc.md.
4238   Alternately, generate rtx rather than output instructions.  */
4239
4240const char *
4241output_shift (rtx *operands)
4242{
4243  /*  static int loopend_lab;*/
4244  rtx shift = operands[3];
4245  machine_mode mode = GET_MODE (shift);
4246  enum rtx_code code = GET_CODE (shift);
4247  const char *shift_one;
4248
4249  gcc_assert (mode == SImode);
4250
4251  switch (code)
4252    {
4253    case ASHIFT:   shift_one = "add %0,%1,%1"; break;
4254    case ASHIFTRT: shift_one = "asr %0,%1"; break;
4255    case LSHIFTRT: shift_one = "lsr %0,%1"; break;
4256    default:       gcc_unreachable ();
4257    }
4258
4259  if (GET_CODE (operands[2]) != CONST_INT)
4260    {
4261      output_asm_insn ("and.f lp_count,%2, 0x1f", operands);
4262      goto shiftloop;
4263    }
4264  else
4265    {
4266      int n;
4267
4268      n = INTVAL (operands[2]);
4269
4270      /* Only consider the lower 5 bits of the shift count.  */
4271      n = n & 0x1f;
4272
4273      /* First see if we can do them inline.  */
4274      /* ??? We could get better scheduling & shorter code (using short insns)
4275	 by using splitters.  Alas, that'd be even more verbose.  */
4276      if (code == ASHIFT && n <= 9 && n > 2
4277	  && dest_reg_operand (operands[4], SImode))
4278	{
4279	  output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands);
4280	  for (n -=3 ; n >= 3; n -= 3)
4281	    output_asm_insn ("add3 %0,%4,%0", operands);
4282	  if (n == 2)
4283	    output_asm_insn ("add2 %0,%4,%0", operands);
4284	  else if (n)
4285	    output_asm_insn ("add %0,%0,%0", operands);
4286	}
4287      else if (n <= 4)
4288	{
4289	  while (--n >= 0)
4290	    {
4291	      output_asm_insn (shift_one, operands);
4292	      operands[1] = operands[0];
4293	    }
4294	}
4295      /* See if we can use a rotate/and.  */
4296      else if (n == BITS_PER_WORD - 1)
4297	{
4298	  switch (code)
4299	    {
4300	    case ASHIFT :
4301	      output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands);
4302	      break;
4303	    case ASHIFTRT :
4304	      /* The ARC doesn't have a rol insn.  Use something else.  */
4305	      output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands);
4306	      break;
4307	    case LSHIFTRT :
4308	      /* The ARC doesn't have a rol insn.  Use something else.  */
4309	      output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands);
4310	      break;
4311	    default:
4312	      break;
4313	    }
4314	}
4315      else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode))
4316	{
4317	  switch (code)
4318	    {
4319	    case ASHIFT :
4320	      output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands);
4321	      break;
4322	    case ASHIFTRT :
4323#if 1 /* Need some scheduling comparisons.  */
4324	      output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t"
4325			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
4326#else
4327	      output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t"
4328			       "sbc.f %0,%0,%4\n\trlc %0,%0", operands);
4329#endif
4330	      break;
4331	    case LSHIFTRT :
4332#if 1
4333	      output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t"
4334			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
4335#else
4336	      output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t"
4337			       "and %0,%0,1\n\trlc %0,%0", operands);
4338#endif
4339	      break;
4340	    default:
4341	      break;
4342	    }
4343	}
4344      else if (n == BITS_PER_WORD - 3 && code == ASHIFT)
4345	output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0",
4346			 operands);
4347      /* Must loop.  */
4348      else
4349	{
4350	  operands[2] = GEN_INT (n);
4351	  output_asm_insn ("mov.f lp_count, %2", operands);
4352
4353	shiftloop:
4354	    {
4355	      output_asm_insn ("lpnz\t2f", operands);
4356	      output_asm_insn (shift_one, operands);
4357	      output_asm_insn ("nop", operands);
4358	      fprintf (asm_out_file, "2:\t%s end single insn loop\n",
4359		       ASM_COMMENT_START);
4360	    }
4361	}
4362    }
4363
4364  return "";
4365}
4366
4367/* Nested function support.  */
4368
4369/* Output assembler code for a block containing the constant parts of
4370   a trampoline, leaving space for variable parts.  A trampoline looks
4371   like this:
4372
4373   ld_s r12,[pcl,8]
4374   ld   r11,[pcl,12]
4375   j_s [r12]
4376   .word function's address
4377   .word static chain value
4378
4379*/
4380
4381static void
4382arc_asm_trampoline_template (FILE *f)
4383{
4384  asm_fprintf (f, "\tld_s\t%s,[pcl,8]\n", ARC_TEMP_SCRATCH_REG);
4385  asm_fprintf (f, "\tld\t%s,[pcl,12]\n", reg_names[STATIC_CHAIN_REGNUM]);
4386  asm_fprintf (f, "\tj_s\t[%s]\n", ARC_TEMP_SCRATCH_REG);
4387  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4388  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4389}
4390
4391/* Emit RTL insns to initialize the variable parts of a trampoline.
4392   FNADDR is an RTX for the address of the function's pure code.  CXT
4393   is an RTX for the static chain value for the function.
4394
4395   The fastest trampoline to execute for trampolines within +-8KB of CTX
4396   would be:
4397
4398   add2 r11,pcl,s12
4399   j [limm]           0x20200f80 limm
4400
4401   and that would also be faster to write to the stack by computing
4402   the offset from CTX to TRAMP at compile time.  However, it would
4403   really be better to get rid of the high cost of cache invalidation
4404   when generating trampolines, which requires that the code part of
4405   trampolines stays constant, and additionally either making sure
4406   that no executable code but trampolines is on the stack, no icache
4407   entries linger for the area of the stack from when before the stack
4408   was allocated, and allocating trampolines in trampoline-only cache
4409   lines or allocate trampolines fram a special pool of pre-allocated
4410   trampolines.  */
4411
4412static void
4413arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
4414{
4415  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4416
4417  emit_block_move (tramp, assemble_trampoline_template (),
4418		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4419  emit_move_insn (adjust_address (tramp, SImode, 8), fnaddr);
4420  emit_move_insn (adjust_address (tramp, SImode, 12), cxt);
4421  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4422		     LCT_NORMAL, VOIDmode, XEXP (tramp, 0), Pmode,
4423		     plus_constant (Pmode, XEXP (tramp, 0), TRAMPOLINE_SIZE),
4424		     Pmode);
4425}
4426
4427/* Add the given function declaration to emit code in JLI section.  */
4428
4429static void
4430arc_add_jli_section (rtx pat)
4431{
4432  const char *name;
4433  tree attrs;
4434  arc_jli_section *sec = arc_jli_sections, *new_section;
4435  tree decl = SYMBOL_REF_DECL (pat);
4436
4437  if (!pat)
4438    return;
4439
4440  if (decl)
4441    {
4442      /* For fixed locations do not generate the jli table entry.  It
4443	 should be provided by the user as an asm file.  */
4444      attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4445      if (lookup_attribute ("jli_fixed", attrs))
4446	return;
4447    }
4448
4449  name = XSTR (pat, 0);
4450
4451  /* Don't insert the same symbol twice.  */
4452  while (sec != NULL)
4453    {
4454      if(strcmp (name, sec->name) == 0)
4455	return;
4456      sec = sec->next;
4457    }
4458
4459  /* New name, insert it.  */
4460  new_section = (arc_jli_section *) xmalloc (sizeof (arc_jli_section));
4461  gcc_assert (new_section != NULL);
4462  new_section->name = name;
4463  new_section->next = arc_jli_sections;
4464  arc_jli_sections = new_section;
4465}
4466
4467/* This is set briefly to 1 when we output a ".as" address modifer, and then
4468   reset when we output the scaled address.  */
4469static int output_scaled = 0;
4470
4471/* Set when we force sdata output.  */
4472static int output_sdata = 0;
4473
4474/* Print operand X (an rtx) in assembler syntax to file FILE.
4475   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4476   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
4477/* In final.c:output_asm_insn:
4478    'l' : label
4479    'a' : address
4480    'c' : constant address if CONSTANT_ADDRESS_P
4481    'n' : negative
4482   Here:
4483    'Z': log2(x+1)-1
4484    'z': log2
4485    'M': log2(~x)
4486    'p': bit Position of lsb
4487    's': size of bit field
4488    '#': condbranch delay slot suffix
4489    '*': jump delay slot suffix
4490    '?' : nonjump-insn suffix for conditional execution or short instruction
4491    '!' : jump / call suffix for conditional execution or short instruction
4492    '`': fold constant inside unary o-perator, re-recognize, and emit.
4493    'd'
4494    'D'
4495    'R': Second word
4496    'S': JLI instruction
4497    'j': used by mov instruction to properly emit jli related labels.
4498    'B': Branch comparison operand - suppress sda reference
4499    'H': Most significant word
4500    'L': Least significant word
4501    'A': ASCII decimal representation of floating point value
4502    'U': Load/store update or scaling indicator
4503    'V': cache bypass indicator for volatile
4504    'P'
4505    'F'
4506    '^'
4507    'O': Operator
4508    'o': original symbol - no @ prepending.  */
4509
4510void
4511arc_print_operand (FILE *file, rtx x, int code)
4512{
4513  switch (code)
4514    {
4515    case 'Z':
4516      if (GET_CODE (x) == CONST_INT)
4517	fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 );
4518      else
4519	output_operand_lossage ("invalid operand to %%Z code");
4520
4521      return;
4522
4523    case 'z':
4524      if (GET_CODE (x) == CONST_INT)
4525	fprintf (file, "%d",exact_log2 (INTVAL (x) & 0xffffffff));
4526      else
4527	output_operand_lossage ("invalid operand to %%z code");
4528
4529      return;
4530
4531    case 'c':
4532      if (GET_CODE (x) == CONST_INT)
4533        fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) );
4534      else
4535        output_operand_lossage ("invalid operands to %%c code");
4536
4537      return;
4538
4539    case 'M':
4540      if (GET_CODE (x) == CONST_INT)
4541	fprintf (file, "%d",exact_log2(~INTVAL (x)) );
4542      else
4543	output_operand_lossage ("invalid operand to %%M code");
4544
4545      return;
4546
4547    case 'p':
4548      if (GET_CODE (x) == CONST_INT)
4549	fprintf (file, "%d", exact_log2 (INTVAL (x) & -INTVAL (x)));
4550      else
4551	output_operand_lossage ("invalid operand to %%p code");
4552      return;
4553
4554    case 's':
4555      if (GET_CODE (x) == CONST_INT)
4556	{
4557	  HOST_WIDE_INT i = INTVAL (x);
4558	  HOST_WIDE_INT s = exact_log2 (i & -i);
4559	  fprintf (file, "%d", exact_log2 (((0xffffffffUL & i) >> s) + 1));
4560	}
4561      else
4562	output_operand_lossage ("invalid operand to %%s code");
4563      return;
4564
4565    case '#' :
4566      /* Conditional branches depending on condition codes.
4567	 Note that this is only for branches that were known to depend on
4568	 condition codes before delay slot scheduling;
4569	 out-of-range brcc / bbit expansions should use '*'.
4570	 This distinction is important because of the different
4571	 allowable delay slot insns and the output of the delay suffix
4572	 for TARGET_AT_DBR_COND_EXEC.  */
4573    case '*' :
4574      /* Unconditional branches / branches not depending on condition codes.
4575	 This could also be a CALL_INSN.
4576	 Output the appropriate delay slot suffix.  */
4577      if (final_sequence && final_sequence->len () != 1)
4578	{
4579	  rtx_insn *jump = final_sequence->insn (0);
4580	  rtx_insn *delay = final_sequence->insn (1);
4581
4582	  /* For TARGET_PAD_RETURN we might have grabbed the delay insn.  */
4583	  if (delay->deleted ())
4584	    return;
4585	  if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
4586	    fputs (INSN_FROM_TARGET_P (delay) ? ".d"
4587		   : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d"
4588		   : get_attr_type (jump) == TYPE_RETURN && code == '#' ? ""
4589		   : ".nd",
4590		   file);
4591	  else
4592	    fputs (".d", file);
4593	}
4594      return;
4595    case '?' : /* with leading "." */
4596    case '!' : /* without leading "." */
4597      /* This insn can be conditionally executed.  See if the ccfsm machinery
4598	 says it should be conditionalized.
4599	 If it shouldn't, we'll check the compact attribute if this insn
4600	 has a short variant, which may be used depending on code size and
4601	 alignment considerations.  */
4602      if (current_insn_predicate)
4603	arc_ccfsm_current.cc
4604	  = get_arc_condition_code (current_insn_predicate);
4605      if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current))
4606	{
4607	  /* Is this insn in a delay slot sequence?  */
4608	  if (!final_sequence || XVECLEN (final_sequence, 0) < 2
4609	      || current_insn_predicate
4610	      || CALL_P (final_sequence->insn (0))
4611	      || simplejump_p (final_sequence->insn (0)))
4612	    {
4613	      /* This insn isn't in a delay slot sequence, or conditionalized
4614		 independently of its position in a delay slot.  */
4615	      fprintf (file, "%s%s",
4616		       code == '?' ? "." : "",
4617		       arc_condition_codes[arc_ccfsm_current.cc]);
4618	      /* If this is a jump, there are still short variants.  However,
4619		 only beq_s / bne_s have the same offset range as b_s,
4620		 and the only short conditional returns are jeq_s and jne_s.  */
4621	      if (code == '!'
4622		  && (arc_ccfsm_current.cc == ARC_CC_EQ
4623		      || arc_ccfsm_current.cc == ARC_CC_NE
4624		      || 0 /* FIXME: check if branch in 7 bit range.  */))
4625		output_short_suffix (file);
4626	    }
4627	  else if (code == '!') /* Jump with delay slot.  */
4628	    fputs (arc_condition_codes[arc_ccfsm_current.cc], file);
4629	  else /* An Instruction in a delay slot of a jump or call.  */
4630	    {
4631	      rtx jump = XVECEXP (final_sequence, 0, 0);
4632	      rtx insn = XVECEXP (final_sequence, 0, 1);
4633
4634	      /* If the insn is annulled and is from the target path, we need
4635		 to inverse the condition test.  */
4636	      if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
4637		{
4638		  if (INSN_FROM_TARGET_P (insn))
4639		    fprintf (file, "%s%s",
4640			     code == '?' ? "." : "",
4641			     arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]);
4642		  else
4643		    fprintf (file, "%s%s",
4644			     code == '?' ? "." : "",
4645			     arc_condition_codes[arc_ccfsm_current.cc]);
4646		  if (arc_ccfsm_current.state == 5)
4647		    arc_ccfsm_current.state = 0;
4648		}
4649	      else
4650		/* This insn is executed for either path, so don't
4651		   conditionalize it at all.  */
4652		output_short_suffix (file);
4653
4654	    }
4655	}
4656      else
4657	output_short_suffix (file);
4658      return;
4659    case'`':
4660      /* FIXME: fold constant inside unary operator, re-recognize, and emit.  */
4661      gcc_unreachable ();
4662    case 'd' :
4663      fputs (arc_condition_codes[get_arc_condition_code (x)], file);
4664      return;
4665    case 'D' :
4666      fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE
4667				 (get_arc_condition_code (x))],
4668	     file);
4669      return;
4670    case 'R' :
4671      /* Write second word of DImode or DFmode reference,
4672	 register or memory.  */
4673      if (GET_CODE (x) == REG)
4674	fputs (reg_names[REGNO (x)+1], file);
4675      else if (GET_CODE (x) == MEM)
4676	{
4677	  fputc ('[', file);
4678
4679	  /* Handle possible auto-increment.  For PRE_INC / PRE_DEC /
4680	    PRE_MODIFY, we will have handled the first word already;
4681	    For POST_INC / POST_DEC / POST_MODIFY, the access to the
4682	    first word will be done later.  In either case, the access
4683	    to the first word will do the modify, and we only have
4684	    to add an offset of four here.  */
4685	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
4686	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
4687	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY
4688	      || GET_CODE (XEXP (x, 0)) == POST_INC
4689	      || GET_CODE (XEXP (x, 0)) == POST_DEC
4690	      || GET_CODE (XEXP (x, 0)) == POST_MODIFY)
4691	    output_address (VOIDmode,
4692			    plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4));
4693	  else if (output_scaled)
4694	    {
4695	      rtx addr = XEXP (x, 0);
4696	      int size = GET_MODE_SIZE (GET_MODE (x));
4697
4698	      output_address (VOIDmode,
4699			      plus_constant (Pmode, XEXP (addr, 0),
4700					     ((INTVAL (XEXP (addr, 1)) + 4)
4701					      >> (size == 2 ? 1 : 2))));
4702	      output_scaled = 0;
4703	    }
4704	  else
4705	    output_address (VOIDmode,
4706			    plus_constant (Pmode, XEXP (x, 0), 4));
4707	  fputc (']', file);
4708	}
4709      else
4710	output_operand_lossage ("invalid operand to %%R code");
4711      return;
4712    case 'j':
4713    case 'S' :
4714      if (GET_CODE (x) == SYMBOL_REF
4715	  && arc_is_jli_call_p (x))
4716	{
4717	  if (SYMBOL_REF_DECL (x))
4718	    {
4719	      tree attrs = (TREE_TYPE (SYMBOL_REF_DECL (x)) != error_mark_node
4720			    ? TYPE_ATTRIBUTES (TREE_TYPE (SYMBOL_REF_DECL (x)))
4721			    : NULL_TREE);
4722	      if (lookup_attribute ("jli_fixed", attrs))
4723		{
4724		  /* No special treatment for jli_fixed functions.  */
4725		  if (code == 'j')
4726		    break;
4727		  fprintf (file, HOST_WIDE_INT_PRINT_DEC "\t; @",
4728			   TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attrs))));
4729		  assemble_name (file, XSTR (x, 0));
4730		  return;
4731		}
4732	    }
4733	  fprintf (file, "@__jli.");
4734	  assemble_name (file, XSTR (x, 0));
4735	  if (code == 'j')
4736	    arc_add_jli_section (x);
4737	  return;
4738	}
4739      if (GET_CODE (x) == SYMBOL_REF
4740	  && arc_is_secure_call_p (x))
4741	{
4742	  /* No special treatment for secure functions.  */
4743	  if (code == 'j' )
4744	    break;
4745	  tree attrs = (TREE_TYPE (SYMBOL_REF_DECL (x)) != error_mark_node
4746			? TYPE_ATTRIBUTES (TREE_TYPE (SYMBOL_REF_DECL (x)))
4747			: NULL_TREE);
4748	  fprintf (file, HOST_WIDE_INT_PRINT_DEC "\t; @",
4749		   TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attrs))));
4750	  assemble_name (file, XSTR (x, 0));
4751	  return;
4752	}
4753      break;
4754    case 'B' /* Branch or other LIMM ref - must not use sda references.  */ :
4755      if (CONSTANT_P (x))
4756	{
4757	  output_addr_const (file, x);
4758	  return;
4759	}
4760      break;
4761    case 'H' :
4762    case 'L' :
4763      if (GET_CODE (x) == REG)
4764	{
4765	  /* L = least significant word, H = most significant word.  */
4766	  if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
4767	    fputs (reg_names[REGNO (x)], file);
4768	  else
4769	    fputs (reg_names[REGNO (x)+1], file);
4770	}
4771      else if (GET_CODE (x) == CONST_INT
4772	       || GET_CODE (x) == CONST_DOUBLE)
4773	{
4774	  rtx first, second, word;
4775
4776	  split_double (x, &first, &second);
4777
4778	  if((WORDS_BIG_ENDIAN) == 0)
4779	    word = (code == 'L' ? first : second);
4780	  else
4781	    word = (code == 'L' ? second : first);
4782
4783	  fprintf (file, "0x%08" PRIx32, ((uint32_t) INTVAL (word)));
4784	}
4785      else
4786	output_operand_lossage ("invalid operand to %%H/%%L code");
4787      return;
4788    case 'A' :
4789      {
4790	char str[30];
4791
4792	gcc_assert (GET_CODE (x) == CONST_DOUBLE
4793		    && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT);
4794
4795	real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
4796	fprintf (file, "%s", str);
4797	return;
4798      }
4799    case 'U' :
4800      /* Output a load/store with update indicator if appropriate.  */
4801      if (GET_CODE (x) == MEM)
4802	{
4803	  rtx addr = XEXP (x, 0);
4804	  switch (GET_CODE (addr))
4805	    {
4806	    case PRE_INC: case PRE_DEC: case PRE_MODIFY:
4807	      fputs (".a", file); break;
4808	    case POST_INC: case POST_DEC: case POST_MODIFY:
4809	      fputs (".ab", file); break;
4810	    case PLUS:
4811	      /* Are we using a scaled index?  */
4812	      if (GET_CODE (XEXP (addr, 0)) == MULT)
4813		fputs (".as", file);
4814	      /* Can we use a scaled offset?  */
4815	      else if (CONST_INT_P (XEXP (addr, 1))
4816		       && GET_MODE_SIZE (GET_MODE (x)) > 1
4817		       && (!(INTVAL (XEXP (addr, 1))
4818			     & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3))
4819		       /* Does it make a difference?  */
4820		       && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)),
4821					   GET_MODE_SIZE (GET_MODE (x)) - 2, 0))
4822		{
4823		  fputs (".as", file);
4824		  output_scaled = 1;
4825		}
4826	      break;
4827	    case SYMBOL_REF:
4828	    case CONST:
4829	      if (legitimate_small_data_address_p (addr, GET_MODE (x))
4830		  && GET_MODE_SIZE (GET_MODE (x)) > 1)
4831		{
4832		  int align = get_symbol_alignment (addr);
4833		  int mask = 0;
4834		  switch (GET_MODE (x))
4835		    {
4836		    case E_HImode:
4837		      mask = 1;
4838		      break;
4839		    default:
4840		      mask = 3;
4841		      break;
4842		    }
4843		  if (align && ((align & mask) == 0))
4844		    fputs (".as", file);
4845		}
4846	      break;
4847	    case REG:
4848	      break;
4849	    default:
4850	      gcc_assert (CONSTANT_P (addr)); break;
4851	    }
4852	}
4853      else
4854	output_operand_lossage ("invalid operand to %%U code");
4855      return;
4856    case 'V' :
4857      /* Output cache bypass indicator for a load/store insn.  Volatile memory
4858	 refs are defined to use the cache bypass mechanism.  */
4859      if (GET_CODE (x) == MEM)
4860	{
4861	  if ((MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET)
4862	      || arc_is_uncached_mem_p (x))
4863	    fputs (".di", file);
4864	}
4865      else
4866	output_operand_lossage ("invalid operand to %%V code");
4867      return;
4868      /* plt code.  */
4869    case 'P':
4870    case 0 :
4871      /* Do nothing special.  */
4872      break;
4873    case 'F':
4874      fputs (reg_names[REGNO (x)]+1, file);
4875      return;
4876    case '^':
4877	/* This punctuation character is needed because label references are
4878	printed in the output template using %l. This is a front end
4879	character, and when we want to emit a '@' before it, we have to use
4880	this '^'.  */
4881
4882	fputc('@',file);
4883	return;
4884    case 'O':
4885      /* Output an operator.  */
4886      switch (GET_CODE (x))
4887	{
4888	case PLUS:	fputs ("add", file); return;
4889	case SS_PLUS:	fputs ("adds", file); return;
4890	case AND:	fputs ("and", file); return;
4891	case IOR:	fputs ("or", file); return;
4892	case XOR:	fputs ("xor", file); return;
4893	case MINUS:	fputs ("sub", file); return;
4894	case SS_MINUS:	fputs ("subs", file); return;
4895	case ASHIFT:	fputs ("asl", file); return;
4896	case ASHIFTRT:	fputs ("asr", file); return;
4897	case LSHIFTRT:	fputs ("lsr", file); return;
4898	case ROTATERT:	fputs ("ror", file); return;
4899	case MULT:	fputs ("mpy", file); return;
4900	case ABS:	fputs ("abs", file); return; /* Unconditional.  */
4901	case NEG:	fputs ("neg", file); return;
4902	case SS_NEG:	fputs ("negs", file); return;
4903	case NOT:	fputs ("not", file); return; /* Unconditional.  */
4904	case ZERO_EXTEND:
4905	  fputs ("ext", file); /* bmsk allows predication.  */
4906	  goto size_suffix;
4907	case SIGN_EXTEND: /* Unconditional.  */
4908	  fputs ("sex", file);
4909	size_suffix:
4910	  switch (GET_MODE (XEXP (x, 0)))
4911	    {
4912	    case E_QImode: fputs ("b", file); return;
4913	    case E_HImode: fputs ("w", file); return;
4914	    default: break;
4915	    }
4916	  break;
4917	case SS_TRUNCATE:
4918	  if (GET_MODE (x) != HImode)
4919	    break;
4920	  fputs ("sat16", file);
4921	default: break;
4922	}
4923      output_operand_lossage ("invalid operand to %%O code"); return;
4924    case 'o':
4925      if (GET_CODE (x) == SYMBOL_REF)
4926	{
4927	  assemble_name (file, XSTR (x, 0));
4928	  return;
4929	}
4930      break;
4931    case '&':
4932      if (TARGET_ANNOTATE_ALIGN)
4933	fprintf (file, "; unalign: %d", cfun->machine->unalign);
4934      return;
4935    case '+':
4936      if (TARGET_V2)
4937	fputs ("m", file);
4938      else
4939	fputs ("h", file);
4940      return;
4941    case '_':
4942      if (TARGET_V2)
4943	fputs ("h", file);
4944      else
4945	fputs ("w", file);
4946      return;
4947    default :
4948      /* Unknown flag.  */
4949      output_operand_lossage ("invalid operand output code");
4950    }
4951
4952  switch (GET_CODE (x))
4953    {
4954    case REG :
4955      fputs (reg_names[REGNO (x)], file);
4956      break;
4957    case MEM :
4958      {
4959	rtx addr = XEXP (x, 0);
4960	int size = GET_MODE_SIZE (GET_MODE (x));
4961
4962	if (legitimate_small_data_address_p (addr, GET_MODE (x)))
4963	  output_sdata = 1;
4964
4965	fputc ('[', file);
4966
4967	switch (GET_CODE (addr))
4968	  {
4969	  case PRE_INC: case POST_INC:
4970	    output_address (VOIDmode,
4971			    plus_constant (Pmode, XEXP (addr, 0), size)); break;
4972	  case PRE_DEC: case POST_DEC:
4973	    output_address (VOIDmode,
4974			    plus_constant (Pmode, XEXP (addr, 0), -size));
4975	    break;
4976	  case PRE_MODIFY: case POST_MODIFY:
4977	    output_address (VOIDmode, XEXP (addr, 1)); break;
4978	  case PLUS:
4979	    if (output_scaled)
4980	      {
4981		output_address (VOIDmode,
4982				plus_constant (Pmode, XEXP (addr, 0),
4983					       (INTVAL (XEXP (addr, 1))
4984						>> (size == 2 ? 1 : 2))));
4985		output_scaled = 0;
4986	      }
4987	    else
4988	      output_address (VOIDmode, addr);
4989	    break;
4990	  default:
4991	    if (flag_pic && CONSTANT_ADDRESS_P (addr))
4992	      arc_output_pic_addr_const (file, addr, code);
4993	    else
4994	      output_address (VOIDmode, addr);
4995	    break;
4996	  }
4997	fputc (']', file);
4998	break;
4999      }
5000    case CONST_DOUBLE :
5001      /* We handle SFmode constants here as output_addr_const doesn't.  */
5002      if (GET_MODE (x) == SFmode)
5003	{
5004	  long l;
5005
5006	  REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
5007	  fprintf (file, "0x%08lx", l);
5008	  break;
5009	}
5010      /* FALLTHRU */
5011      /* Let output_addr_const deal with it.  */
5012    default :
5013      if (flag_pic
5014	  || (GET_CODE (x) == CONST
5015	      && GET_CODE (XEXP (x, 0)) == UNSPEC
5016	      && (XINT (XEXP (x, 0), 1) == UNSPEC_TLS_OFF
5017		  || XINT (XEXP (x, 0), 1) == UNSPEC_TLS_GD))
5018	  || (GET_CODE (x) == CONST
5019	      && GET_CODE (XEXP (x, 0)) == PLUS
5020	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
5021	      && (XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_TLS_OFF
5022		  || XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_TLS_GD)))
5023	arc_output_pic_addr_const (file, x, code);
5024      else
5025	output_addr_const (file, x);
5026      break;
5027    }
5028}
5029
5030/* Print a memory address as an operand to reference that memory location.  */
5031
5032void
5033arc_print_operand_address (FILE *file , rtx addr)
5034{
5035  register rtx base, index = 0;
5036
5037  switch (GET_CODE (addr))
5038    {
5039    case REG :
5040      fputs (reg_names[REGNO (addr)], file);
5041      break;
5042    case SYMBOL_REF:
5043      if (output_sdata)
5044	fputs ("gp,", file);
5045      output_addr_const (file, addr);
5046      if (output_sdata)
5047	fputs ("@sda", file);
5048      output_sdata = 0;
5049      break;
5050    case PLUS :
5051      if (GET_CODE (XEXP (addr, 0)) == MULT)
5052	index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1);
5053      else if (CONST_INT_P (XEXP (addr, 0)))
5054	index = XEXP (addr, 0), base = XEXP (addr, 1);
5055      else
5056	base = XEXP (addr, 0), index = XEXP (addr, 1);
5057
5058      gcc_assert (OBJECT_P (base));
5059      arc_print_operand_address (file, base);
5060      if (CONSTANT_P (base) && CONST_INT_P (index))
5061	fputc ('+', file);
5062      else
5063	fputc (',', file);
5064      gcc_assert (OBJECT_P (index));
5065      arc_print_operand_address (file, index);
5066      break;
5067    case CONST:
5068      {
5069	rtx c = XEXP (addr, 0);
5070
5071	if ((GET_CODE (c) == UNSPEC
5072	     && (XINT (c, 1) == UNSPEC_TLS_OFF
5073		 || XINT (c, 1) == UNSPEC_TLS_IE))
5074	    || (GET_CODE (c) == PLUS
5075		&& GET_CODE (XEXP (c, 0)) == UNSPEC
5076		&& (XINT (XEXP (c, 0), 1) == UNSPEC_TLS_OFF
5077		    || XINT (XEXP (c, 0), 1) == ARC_UNSPEC_GOTOFFPC)))
5078	  {
5079	    arc_output_pic_addr_const (file, c, 0);
5080	    break;
5081	  }
5082	gcc_assert (GET_CODE (c) == PLUS);
5083	gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF);
5084	gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT);
5085
5086	output_address (VOIDmode, XEXP (addr, 0));
5087
5088	break;
5089      }
5090    case PRE_INC :
5091    case PRE_DEC :
5092      /* We shouldn't get here as we've lost the mode of the memory object
5093	 (which says how much to inc/dec by.  */
5094      gcc_unreachable ();
5095      break;
5096    default :
5097      if (flag_pic)
5098	arc_output_pic_addr_const (file, addr, 0);
5099      else
5100	output_addr_const (file, addr);
5101      break;
5102    }
5103}
5104
5105/* Conditional execution support.
5106
5107   This is based on the ARM port but for now is much simpler.
5108
5109   A finite state machine takes care of noticing whether or not instructions
5110   can be conditionally executed, and thus decrease execution time and code
5111   size by deleting branch instructions.  The fsm is controlled by
5112   arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the
5113   actions of PRINT_OPERAND.  The patterns in the .md file for the branch
5114   insns also have a hand in this.  */
5115/* The way we leave dealing with non-anulled or annull-false delay slot
5116   insns to the consumer is awkward.  */
5117
5118/* The state of the fsm controlling condition codes are:
5119   0: normal, do nothing special
5120   1: don't output this insn
5121   2: don't output this insn
5122   3: make insns conditional
5123   4: make insns conditional
5124   5: make insn conditional (only for outputting anulled delay slot insns)
5125
5126   special value for cfun->machine->uid_ccfsm_state:
5127   6: return with but one insn before it since function start / call
5128
5129   State transitions (state->state by whom, under what condition):
5130   0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over
5131          some instructions.
5132   0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed
5133          by zero or more non-jump insns and an unconditional branch with
5134	  the same target label as the condbranch.
5135   1 -> 3 branch patterns, after having not output the conditional branch
5136   2 -> 4 branch patterns, after having not output the conditional branch
5137   0 -> 5 branch patterns, for anulled delay slot insn.
5138   3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached
5139          (the target label has CODE_LABEL_NUMBER equal to
5140	  arc_ccfsm_target_label).
5141   4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached
5142   3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns.
5143   5 -> 0 when outputting the delay slot insn
5144
5145   If the jump clobbers the conditions then we use states 2 and 4.
5146
5147   A similar thing can be done with conditional return insns.
5148
5149   We also handle separating branches from sets of the condition code.
5150   This is done here because knowledge of the ccfsm state is required,
5151   we may not be outputting the branch.  */
5152
5153/* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current,
5154   before letting final output INSN.  */
5155
5156static void
5157arc_ccfsm_advance (rtx_insn *insn, struct arc_ccfsm *state)
5158{
5159  /* BODY will hold the body of INSN.  */
5160  register rtx body;
5161
5162  /* This will be 1 if trying to repeat the trick (ie: do the `else' part of
5163     an if/then/else), and things need to be reversed.  */
5164  int reverse = 0;
5165
5166  /* If we start with a return insn, we only succeed if we find another one.  */
5167  int seeking_return = 0;
5168
5169  /* START_INSN will hold the insn from where we start looking.  This is the
5170     first insn after the following code_label if REVERSE is true.  */
5171  rtx_insn *start_insn = insn;
5172
5173  /* Type of the jump_insn. Brcc insns don't affect ccfsm changes,
5174     since they don't rely on a cmp preceding the.  */
5175  enum attr_type jump_insn_type;
5176
5177  /* Allow -mdebug-ccfsm to turn this off so we can see how well it does.
5178     We can't do this in macro FINAL_PRESCAN_INSN because its called from
5179     final_scan_insn which has `optimize' as a local.  */
5180  if (optimize < 2 || TARGET_NO_COND_EXEC)
5181    return;
5182
5183  /* Ignore notes and labels.  */
5184  if (!INSN_P (insn))
5185    return;
5186  body = PATTERN (insn);
5187  /* If in state 4, check if the target branch is reached, in order to
5188     change back to state 0.  */
5189  if (state->state == 4)
5190    {
5191      if (insn == state->target_insn)
5192	{
5193	  state->target_insn = NULL;
5194	  state->state = 0;
5195	}
5196      return;
5197    }
5198
5199  /* If in state 3, it is possible to repeat the trick, if this insn is an
5200     unconditional branch to a label, and immediately following this branch
5201     is the previous target label which is only used once, and the label this
5202     branch jumps to is not too far off.  Or in other words "we've done the
5203     `then' part, see if we can do the `else' part."  */
5204  if (state->state == 3)
5205    {
5206      if (simplejump_p (insn))
5207	{
5208	  start_insn = next_nonnote_insn (start_insn);
5209	  if (GET_CODE (start_insn) == BARRIER)
5210	    {
5211	      /* ??? Isn't this always a barrier?  */
5212	      start_insn = next_nonnote_insn (start_insn);
5213	    }
5214	  if (GET_CODE (start_insn) == CODE_LABEL
5215	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
5216	      && LABEL_NUSES (start_insn) == 1)
5217	    reverse = TRUE;
5218	  else
5219	    return;
5220	}
5221      else if (GET_CODE (body) == SIMPLE_RETURN)
5222	{
5223	  start_insn = next_nonnote_insn (start_insn);
5224	  if (GET_CODE (start_insn) == BARRIER)
5225	    start_insn = next_nonnote_insn (start_insn);
5226	  if (GET_CODE (start_insn) == CODE_LABEL
5227	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
5228	      && LABEL_NUSES (start_insn) == 1)
5229	    {
5230	      reverse = TRUE;
5231	      seeking_return = 1;
5232	    }
5233	  else
5234	    return;
5235	}
5236      else
5237	return;
5238    }
5239
5240  if (GET_CODE (insn) != JUMP_INSN
5241      || GET_CODE (PATTERN (insn)) == ADDR_VEC
5242      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
5243    return;
5244
5245 /* We can't predicate BRCC or loop ends.
5246    Also, when generating PIC code, and considering a medium range call,
5247    we can't predicate the call.  */
5248  jump_insn_type = get_attr_type (insn);
5249  if (jump_insn_type == TYPE_BRCC
5250      || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT
5251      || jump_insn_type == TYPE_LOOP_END
5252      || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn)))
5253    return;
5254
5255  /* This jump might be paralleled with a clobber of the condition codes,
5256     the jump should always come first.  */
5257  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
5258    body = XVECEXP (body, 0, 0);
5259
5260  if (reverse
5261      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
5262	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
5263    {
5264      int insns_skipped = 0, fail = FALSE, succeed = FALSE;
5265      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
5266      int then_not_else = TRUE;
5267      /* Nonzero if next insn must be the target label.  */
5268      int next_must_be_target_label_p;
5269      rtx_insn *this_insn = start_insn;
5270      rtx label = 0;
5271
5272      /* Register the insn jumped to.  */
5273      if (reverse)
5274	{
5275	  if (!seeking_return)
5276	    label = XEXP (SET_SRC (body), 0);
5277	}
5278      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
5279	label = XEXP (XEXP (SET_SRC (body), 1), 0);
5280      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
5281	{
5282	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
5283	  then_not_else = FALSE;
5284	}
5285      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN)
5286	seeking_return = 1;
5287      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN)
5288	{
5289	  seeking_return = 1;
5290	  then_not_else = FALSE;
5291	}
5292      else
5293	gcc_unreachable ();
5294
5295      /* If this is a non-annulled branch with a delay slot, there is
5296	 no need to conditionalize the delay slot.  */
5297      if ((GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) == SEQUENCE)
5298	  && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn))
5299	{
5300	  this_insn = NEXT_INSN (this_insn);
5301	}
5302      /* See how many insns this branch skips, and what kind of insns.  If all
5303	 insns are okay, and the label or unconditional branch to the same
5304	 label is not too far away, succeed.  */
5305      for (insns_skipped = 0, next_must_be_target_label_p = FALSE;
5306	   !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED;
5307	   insns_skipped++)
5308	{
5309	  rtx scanbody;
5310
5311	  this_insn = next_nonnote_insn (this_insn);
5312	  if (!this_insn)
5313	    break;
5314
5315	  if (next_must_be_target_label_p)
5316	    {
5317	      if (GET_CODE (this_insn) == BARRIER)
5318		continue;
5319	      if (GET_CODE (this_insn) == CODE_LABEL
5320		  && this_insn == label)
5321		{
5322		  state->state = 1;
5323		  succeed = TRUE;
5324		}
5325	      else
5326		fail = TRUE;
5327	      break;
5328	    }
5329
5330	  switch (GET_CODE (this_insn))
5331	    {
5332	    case CODE_LABEL:
5333	      /* Succeed if it is the target label, otherwise fail since
5334		 control falls in from somewhere else.  */
5335	      if (this_insn == label)
5336		{
5337		  state->state = 1;
5338		  succeed = TRUE;
5339		}
5340	      else
5341		fail = TRUE;
5342	      break;
5343
5344	    case BARRIER:
5345	      /* Succeed if the following insn is the target label.
5346		 Otherwise fail.
5347		 If return insns are used then the last insn in a function
5348		 will be a barrier.  */
5349	      next_must_be_target_label_p = TRUE;
5350	      break;
5351
5352	    case CALL_INSN:
5353	      /* Can handle a call insn if there are no insns after it.
5354		 IE: The next "insn" is the target label.  We don't have to
5355		 worry about delay slots as such insns are SEQUENCE's inside
5356		 INSN's.  ??? It is possible to handle such insns though.  */
5357	      if (get_attr_cond (this_insn) == COND_CANUSE)
5358		next_must_be_target_label_p = TRUE;
5359	      else
5360		fail = TRUE;
5361	      break;
5362
5363	    case JUMP_INSN:
5364	      scanbody = PATTERN (this_insn);
5365
5366	      /* If this is an unconditional branch to the same label, succeed.
5367		 If it is to another label, do nothing.  If it is conditional,
5368		 fail.  */
5369	      /* ??? Probably, the test for the SET and the PC are
5370		 unnecessary.  */
5371
5372	      if (GET_CODE (scanbody) == SET
5373		  && GET_CODE (SET_DEST (scanbody)) == PC)
5374		{
5375		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
5376		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
5377		    {
5378		      state->state = 2;
5379		      succeed = TRUE;
5380		    }
5381		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
5382		    fail = TRUE;
5383		  else if (get_attr_cond (this_insn) != COND_CANUSE)
5384		    fail = TRUE;
5385		}
5386	      else if (GET_CODE (scanbody) == SIMPLE_RETURN
5387		       && seeking_return)
5388		{
5389		  state->state = 2;
5390		  succeed = TRUE;
5391		}
5392	      else if (GET_CODE (scanbody) == PARALLEL)
5393		{
5394		  if (get_attr_cond (this_insn) != COND_CANUSE)
5395		    fail = TRUE;
5396		}
5397	      break;
5398
5399	    case INSN:
5400	      scanbody = PATTERN (this_insn);
5401
5402	      /* We can only do this with insns that can use the condition
5403		 codes (and don't set them).  */
5404	      if (GET_CODE (scanbody) == SET
5405		  || GET_CODE (scanbody) == PARALLEL)
5406		{
5407		  if (get_attr_cond (this_insn) != COND_CANUSE)
5408		    fail = TRUE;
5409		}
5410	      /* We can't handle other insns like sequences.  */
5411	      else
5412		fail = TRUE;
5413	      break;
5414
5415	    default:
5416	      break;
5417	    }
5418	}
5419
5420      if (succeed)
5421	{
5422	  if ((!seeking_return) && (state->state == 1 || reverse))
5423	    state->target_label = CODE_LABEL_NUMBER (label);
5424	  else if (seeking_return || state->state == 2)
5425	    {
5426	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
5427		{
5428		  this_insn = next_nonnote_insn (this_insn);
5429
5430		  gcc_assert (!this_insn ||
5431			      (GET_CODE (this_insn) != BARRIER
5432			       && GET_CODE (this_insn) != CODE_LABEL));
5433		}
5434	      if (!this_insn)
5435		{
5436		  /* Oh dear! we ran off the end, give up.  */
5437		  extract_insn_cached (insn);
5438		  state->state = 0;
5439		  state->target_insn = NULL;
5440		  return;
5441		}
5442	      state->target_insn = this_insn;
5443	    }
5444	  else
5445	    gcc_unreachable ();
5446
5447	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
5448	     what it was.  */
5449	  if (!reverse)
5450	    {
5451	      state->cond = XEXP (SET_SRC (body), 0);
5452	      state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0));
5453	    }
5454
5455	  if (reverse || then_not_else)
5456	    state->cc = ARC_INVERSE_CONDITION_CODE (state->cc);
5457	}
5458
5459      /* Restore recog_operand.  Getting the attributes of other insns can
5460	 destroy this array, but final.c assumes that it remains intact
5461	 across this call; since the insn has been recognized already we
5462	 call insn_extract direct.  */
5463      extract_insn_cached (insn);
5464    }
5465}
5466
5467/* Record that we are currently outputting label NUM with prefix PREFIX.
5468   It it's the label we're looking for, reset the ccfsm machinery.
5469
5470   Called from ASM_OUTPUT_INTERNAL_LABEL.  */
5471
5472static void
5473arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state)
5474{
5475  if (state->state == 3 && state->target_label == num
5476      && !strcmp (prefix, "L"))
5477    {
5478      state->state = 0;
5479      state->target_insn = NULL;
5480    }
5481}
5482
5483/* We are considering a conditional branch with the condition COND.
5484   Check if we want to conditionalize a delay slot insn, and if so modify
5485   the ccfsm state accordingly.
5486   REVERSE says branch will branch when the condition is false.  */
5487void
5488arc_ccfsm_record_condition (rtx cond, bool reverse, rtx_insn *jump,
5489			    struct arc_ccfsm *state)
5490{
5491  rtx_insn *seq_insn = NEXT_INSN (PREV_INSN (jump));
5492  if (!state)
5493    state = &arc_ccfsm_current;
5494
5495  gcc_assert (state->state == 0);
5496  if (seq_insn != jump)
5497    {
5498      rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1);
5499
5500      if (!as_a<rtx_insn *> (insn)->deleted ()
5501	  && INSN_ANNULLED_BRANCH_P (jump)
5502	  && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn)))
5503	{
5504	  state->cond = cond;
5505	  state->cc = get_arc_condition_code (cond);
5506	  if (!reverse)
5507	    arc_ccfsm_current.cc
5508	      = ARC_INVERSE_CONDITION_CODE (state->cc);
5509	  rtx pat = PATTERN (insn);
5510	  if (GET_CODE (pat) == COND_EXEC)
5511	    gcc_assert ((INSN_FROM_TARGET_P (insn)
5512			 ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc)
5513			== get_arc_condition_code (XEXP (pat, 0)));
5514	  else
5515	    state->state = 5;
5516	}
5517    }
5518}
5519
5520/* Update *STATE as we would when we emit INSN.  */
5521
5522static void
5523arc_ccfsm_post_advance (rtx_insn *insn, struct arc_ccfsm *state)
5524{
5525  enum attr_type type;
5526
5527  if (LABEL_P (insn))
5528    arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state);
5529  else if (JUMP_P (insn)
5530	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
5531	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
5532	   && ((type = get_attr_type (insn)) == TYPE_BRANCH
5533	       || ((type == TYPE_UNCOND_BRANCH
5534		    || type == TYPE_RETURN)
5535		   && ARC_CCFSM_BRANCH_DELETED_P (state))))
5536    {
5537      if (ARC_CCFSM_BRANCH_DELETED_P (state))
5538	ARC_CCFSM_RECORD_BRANCH_DELETED (state);
5539      else
5540	{
5541	  rtx src = SET_SRC (PATTERN (insn));
5542	  arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx,
5543				      insn, state);
5544	}
5545    }
5546  else if (arc_ccfsm_current.state == 5)
5547    arc_ccfsm_current.state = 0;
5548}
5549
5550/* Return true if the current insn, which is a conditional branch, is to be
5551   deleted.  */
5552
5553bool
5554arc_ccfsm_branch_deleted_p (void)
5555{
5556  return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current);
5557}
5558
5559/* Record a branch isn't output because subsequent insns can be
5560   conditionalized.  */
5561
5562void
5563arc_ccfsm_record_branch_deleted (void)
5564{
5565  ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current);
5566}
5567
5568/* During insn output, indicate if the current insn is predicated.  */
5569
5570bool
5571arc_ccfsm_cond_exec_p (void)
5572{
5573  return (cfun->machine->prescan_initialized
5574	  && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current));
5575}
5576
5577/* When deciding if an insn should be output short, we want to know something
5578   about the following insns:
5579   - if another insn follows which we know we can output as a short insn
5580     before an alignment-sensitive point, we can output this insn short:
5581     the decision about the eventual alignment can be postponed.
5582   - if a to-be-aligned label comes next, we should output this insn such
5583     as to get / preserve 4-byte alignment.
5584   - if a likely branch without delay slot insn, or a call with an immediately
5585     following short insn comes next, we should out output this insn such as to
5586     get / preserve 2 mod 4 unalignment.
5587   - do the same for a not completely unlikely branch with a short insn
5588     following before any other branch / label.
5589   - in order to decide if we are actually looking at a branch, we need to
5590     call arc_ccfsm_advance.
5591   - in order to decide if we are looking at a short insn, we should know
5592     if it is conditionalized.  To a first order of approximation this is
5593     the case if the state from arc_ccfsm_advance from before this insn
5594     indicates the insn is conditionalized.  However, a further refinement
5595     could be to not conditionalize an insn if the destination register(s)
5596     is/are dead in the non-executed case.  */
5597/* Return non-zero if INSN should be output as a short insn.  UNALIGN is
5598   zero if the current insn is aligned to a 4-byte-boundary, two otherwise.
5599   If CHECK_ATTR is greater than 0, check the iscompact attribute first.  */
5600
5601static int
5602arc_verify_short (rtx_insn *insn, int, int check_attr)
5603{
5604  enum attr_iscompact iscompact;
5605
5606  if (check_attr > 0)
5607    {
5608      iscompact = get_attr_iscompact (insn);
5609      if (iscompact == ISCOMPACT_FALSE)
5610	return 0;
5611    }
5612
5613  return (get_attr_length (insn) & 2) != 0;
5614}
5615
5616/* When outputting an instruction (alternative) that can potentially be short,
5617   output the short suffix if the insn is in fact short, and update
5618   cfun->machine->unalign accordingly.  */
5619
5620static void
5621output_short_suffix (FILE *file)
5622{
5623  rtx_insn *insn = current_output_insn;
5624  if (!insn)
5625    return;
5626
5627  if (arc_verify_short (insn, cfun->machine->unalign, 1))
5628    {
5629      fprintf (file, "_s");
5630      cfun->machine->unalign ^= 2;
5631    }
5632  /* Restore recog_operand.  */
5633  extract_insn_cached (insn);
5634}
5635
5636/* Implement FINAL_PRESCAN_INSN.  */
5637
5638void
5639arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
5640			int noperands ATTRIBUTE_UNUSED)
5641{
5642  if (TARGET_DUMPISIZE)
5643    fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5644
5645  if (!cfun->machine->prescan_initialized)
5646    {
5647      /* Clear lingering state from branch shortening.  */
5648      memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current);
5649      cfun->machine->prescan_initialized = 1;
5650    }
5651  arc_ccfsm_advance (insn, &arc_ccfsm_current);
5652}
5653
5654/* Given FROM and TO register numbers, say whether this elimination is allowed.
5655   Frame pointer elimination is automatically handled.
5656
5657   All eliminations are permissible. If we need a frame
5658   pointer, we must eliminate ARG_POINTER_REGNUM into
5659   FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
5660
5661static bool
5662arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
5663{
5664  return ((to == HARD_FRAME_POINTER_REGNUM) || (to == STACK_POINTER_REGNUM));
5665}
5666
5667/* Define the offset between two registers, one to be eliminated, and
5668   the other its replacement, at the start of a routine.  */
5669
5670int
5671arc_initial_elimination_offset (int from, int to)
5672{
5673  if (!cfun->machine->frame_info.initialized)
5674    arc_compute_frame_size ();
5675
5676  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5677    {
5678      return (cfun->machine->frame_info.extra_size
5679	      + cfun->machine->frame_info.reg_size);
5680    }
5681
5682  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5683    {
5684      return (cfun->machine->frame_info.total_size
5685	      - cfun->machine->frame_info.pretend_size);
5686    }
5687
5688  if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
5689    {
5690      return (cfun->machine->frame_info.total_size
5691	      - (cfun->machine->frame_info.pretend_size
5692	      + cfun->machine->frame_info.extra_size
5693	      + cfun->machine->frame_info.reg_size));
5694    }
5695  if ((from == FRAME_POINTER_REGNUM) && (to == HARD_FRAME_POINTER_REGNUM))
5696    return 0;
5697
5698  gcc_unreachable ();
5699}
5700
5701static bool
5702arc_frame_pointer_required (void)
5703{
5704 return cfun->calls_alloca || crtl->calls_eh_return;
5705}
5706
5707
5708/* Return the destination address of a branch.  */
5709
5710static int
5711branch_dest (rtx branch)
5712{
5713  rtx pat = PATTERN (branch);
5714  rtx dest = (GET_CODE (pat) == PARALLEL
5715	      ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat));
5716  int dest_uid;
5717
5718  if (GET_CODE (dest) == IF_THEN_ELSE)
5719    dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1);
5720
5721  dest = XEXP (dest, 0);
5722  dest_uid = INSN_UID (dest);
5723
5724  return INSN_ADDRESSES (dest_uid);
5725}
5726
5727
5728/* Implement TARGET_ENCODE_SECTION_INFO hook.  */
5729
5730static void
5731arc_encode_section_info (tree decl, rtx rtl, int first)
5732{
5733  /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION.
5734     This clears machine specific flags, so has to come first.  */
5735  default_encode_section_info (decl, rtl, first);
5736
5737  /* Check if it is a function, and whether it has the
5738     [long/medium/short]_call attribute specified.  */
5739  if (TREE_CODE (decl) == FUNCTION_DECL)
5740    {
5741      rtx symbol = XEXP (rtl, 0);
5742      int flags = SYMBOL_REF_FLAGS (symbol);
5743
5744      tree attr = (TREE_TYPE (decl) != error_mark_node
5745		   ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE);
5746      tree long_call_attr = lookup_attribute ("long_call", attr);
5747      tree medium_call_attr = lookup_attribute ("medium_call", attr);
5748      tree short_call_attr = lookup_attribute ("short_call", attr);
5749
5750      if (long_call_attr != NULL_TREE)
5751	flags |= SYMBOL_FLAG_LONG_CALL;
5752      else if (medium_call_attr != NULL_TREE)
5753	flags |= SYMBOL_FLAG_MEDIUM_CALL;
5754      else if (short_call_attr != NULL_TREE)
5755	flags |= SYMBOL_FLAG_SHORT_CALL;
5756
5757      SYMBOL_REF_FLAGS (symbol) = flags;
5758    }
5759  else if (TREE_CODE (decl) == VAR_DECL)
5760    {
5761      rtx symbol = XEXP (rtl, 0);
5762
5763      tree attr = (TREE_TYPE (decl) != error_mark_node
5764		   ? DECL_ATTRIBUTES (decl) : NULL_TREE);
5765
5766      tree sec_attr = lookup_attribute ("section", attr);
5767      if (sec_attr)
5768	{
5769	  const char *sec_name
5770	    = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (sec_attr)));
5771	  if (strcmp (sec_name, ".cmem") == 0
5772	      || strcmp (sec_name, ".cmem_shared") == 0
5773	      || strcmp (sec_name, ".cmem_private") == 0)
5774	    SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_CMEM;
5775	}
5776    }
5777}
5778
5779/* This is how to output a definition of an internal numbered label where
5780   PREFIX is the class of label and NUM is the number within the class.  */
5781
5782static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
5783{
5784  if (cfun)
5785    arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current);
5786  default_internal_label (stream, prefix, labelno);
5787}
5788
5789/* Set the cpu type and print out other fancy things,
5790   at the top of the file.  */
5791
5792static void arc_file_start (void)
5793{
5794  default_file_start ();
5795  fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
5796
5797  /* Set some want to have build attributes.  */
5798  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_PCS_config, %d\n",
5799	       ATTRIBUTE_PCS);
5800  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_rf16, %d\n",
5801	       TARGET_RF16 ? 1 : 0);
5802  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_pic, %d\n",
5803	       flag_pic ? 2 : 0);
5804  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_tls, %d\n",
5805	       (arc_tp_regno != -1) ? 1 : 0);
5806  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_sda, %d\n",
5807	       TARGET_NO_SDATA_SET ? 0 : 2);
5808  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_exceptions, %d\n",
5809	       TARGET_OPTFPE ? 1 : 0);
5810  if (TARGET_V2)
5811    asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_CPU_variation, %d\n",
5812		 (arc_tune < ARC_TUNE_CORE_3) ? 2 :
5813		 (arc_tune == ARC_TUNE_CORE_3 ? 3 : 4));
5814}
5815
5816/* Implement `TARGET_ASM_FILE_END'.  */
5817/* Outputs to the stdio stream FILE jli related text.  */
5818
5819void arc_file_end (void)
5820{
5821  arc_jli_section *sec = arc_jli_sections;
5822
5823  while (sec != NULL)
5824    {
5825      fprintf (asm_out_file, "\n");
5826      fprintf (asm_out_file, "# JLI entry for function ");
5827      assemble_name (asm_out_file, sec->name);
5828      fprintf (asm_out_file, "\n\t.section .jlitab, \"axG\", @progbits, "
5829	       ".jlitab.");
5830      assemble_name (asm_out_file, sec->name);
5831      fprintf (asm_out_file,", comdat\n");
5832
5833      fprintf (asm_out_file, "\t.align\t4\n");
5834      fprintf (asm_out_file, "__jli.");
5835      assemble_name (asm_out_file, sec->name);
5836      fprintf (asm_out_file, ":\n\t.weak __jli.");
5837      assemble_name (asm_out_file, sec->name);
5838      fprintf (asm_out_file, "\n\tb\t@");
5839      assemble_name (asm_out_file, sec->name);
5840      fprintf (asm_out_file, "\n");
5841      sec = sec->next;
5842    }
5843  file_end_indicate_exec_stack ();
5844}
5845
5846/* Cost functions.  */
5847
5848/* Compute a (partial) cost for rtx X.  Return true if the complete
5849   cost has been computed, and false if subexpressions should be
5850   scanned.  In either case, *TOTAL contains the cost result.  */
5851
5852static bool
5853arc_rtx_costs (rtx x, machine_mode mode, int outer_code,
5854	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
5855{
5856  int code = GET_CODE (x);
5857
5858  switch (code)
5859    {
5860      /* Small integers are as cheap as registers.  */
5861    case CONST_INT:
5862      {
5863	bool nolimm = false; /* Can we do without long immediate?  */
5864
5865	nolimm = false;
5866	if (UNSIGNED_INT6 (INTVAL (x)))
5867	  nolimm = true;
5868	else
5869	  {
5870	    switch (outer_code)
5871	      {
5872	      case AND: /* bclr, bmsk, ext[bw] */
5873		if (satisfies_constraint_Ccp (x) /* bclr */
5874		    || satisfies_constraint_C1p (x) /* bmsk */)
5875		  nolimm = true;
5876		break;
5877	      case IOR: /* bset */
5878		if (satisfies_constraint_C0p (x)) /* bset */
5879		  nolimm = true;
5880		break;
5881	      case XOR:
5882		if (satisfies_constraint_C0p (x)) /* bxor */
5883		  nolimm = true;
5884		break;
5885	      case SET:
5886		if (UNSIGNED_INT8 (INTVAL (x)))
5887		  nolimm = true;
5888		if (satisfies_constraint_Chi (x))
5889		  nolimm = true;
5890		if (satisfies_constraint_Clo (x))
5891		  nolimm = true;
5892		break;
5893	      case MULT:
5894		if (TARGET_MUL64_SET)
5895		  if (SIGNED_INT12 (INTVAL (x)))
5896		    nolimm = true;
5897		break;
5898	      default:
5899		break;
5900	      }
5901	  }
5902	if (nolimm)
5903	  {
5904	    *total = 0;
5905	    return true;
5906	  }
5907      }
5908      /* FALLTHRU */
5909
5910      /*  4 byte values can be fetched as immediate constants -
5911	  let's give that the cost of an extra insn.  */
5912    case CONST:
5913    case LABEL_REF:
5914    case SYMBOL_REF:
5915      *total = speed ? COSTS_N_INSNS (1) : COSTS_N_INSNS (4);
5916      return true;
5917
5918    case CONST_DOUBLE:
5919      {
5920	rtx first, second;
5921
5922	if (TARGET_DPFP)
5923	  {
5924	    *total = COSTS_N_INSNS (1);
5925	    return true;
5926	  }
5927	split_double (x, &first, &second);
5928	*total = COSTS_N_INSNS (!SMALL_INT (INTVAL (first))
5929				+ !SMALL_INT (INTVAL (second)));
5930	return true;
5931      }
5932
5933    /* Encourage synth_mult to find a synthetic multiply when reasonable.
5934       If we need more than 12 insns to do a multiply, then go out-of-line,
5935       since the call overhead will be < 10% of the cost of the multiply.  */
5936    case ASHIFT:
5937    case ASHIFTRT:
5938    case LSHIFTRT:
5939      if (TARGET_BARREL_SHIFTER)
5940	{
5941	  if (CONSTANT_P (XEXP (x, 0)))
5942	    {
5943	      *total += rtx_cost (XEXP (x, 1), mode, (enum rtx_code) code,
5944				  0, speed);
5945	      return true;
5946	    }
5947	  *total = COSTS_N_INSNS (1);
5948	}
5949      else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5950	*total = COSTS_N_INSNS (16);
5951      else
5952	{
5953	  *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
5954	  /* ??? want_to_gcse_p can throw negative shift counts at us,
5955	     and then panics when it gets a negative cost as result.
5956	     Seen for gcc.c-torture/compile/20020710-1.c -Os .  */
5957	  if (*total < 0)
5958	    *total = 0;
5959	}
5960      return false;
5961
5962    case DIV:
5963    case UDIV:
5964      if (GET_MODE_CLASS (mode) == MODE_FLOAT
5965	  && (TARGET_FP_SP_SQRT || TARGET_FP_DP_SQRT))
5966	*total = COSTS_N_INSNS(1);
5967      else if (GET_MODE_CLASS (mode) == MODE_INT
5968	       && TARGET_DIVREM)
5969	*total = COSTS_N_INSNS(1);
5970      else if (speed)
5971	*total = COSTS_N_INSNS(30);
5972      else
5973	*total = COSTS_N_INSNS(1);
5974	return false;
5975
5976    case MULT:
5977      if ((TARGET_DPFP && GET_MODE (x) == DFmode))
5978	*total = COSTS_N_INSNS (1);
5979      else if (speed)
5980	*total= arc_multcost;
5981      /* We do not want synth_mult sequences when optimizing
5982	 for size.  */
5983      else if (TARGET_ANY_MPY)
5984	*total = COSTS_N_INSNS (1);
5985      else
5986	*total = COSTS_N_INSNS (2);
5987      return false;
5988
5989    case PLUS:
5990      if (outer_code == MEM && CONST_INT_P (XEXP (x, 1))
5991	  && RTX_OK_FOR_OFFSET_P (mode, XEXP (x, 1)))
5992	{
5993	  *total = 0;
5994	  return true;
5995	}
5996
5997      if ((GET_CODE (XEXP (x, 0)) == ASHIFT
5998	   && _1_2_3_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
5999          || (GET_CODE (XEXP (x, 0)) == MULT
6000              && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode)))
6001	{
6002	  if (CONSTANT_P (XEXP (x, 1)) && !speed)
6003	    *total += COSTS_N_INSNS (4);
6004	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, PLUS, 1, speed);
6005	  return true;
6006	}
6007      return false;
6008    case MINUS:
6009      if ((GET_CODE (XEXP (x, 1)) == ASHIFT
6010	   && _1_2_3_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
6011          || (GET_CODE (XEXP (x, 1)) == MULT
6012              && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode)))
6013	{
6014	  if (CONSTANT_P (XEXP (x, 0)) && !speed)
6015	    *total += COSTS_N_INSNS (4);
6016	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, PLUS, 1, speed);
6017	  return true;
6018	}
6019      return false;
6020
6021    case COMPARE:
6022      {
6023	rtx op0 = XEXP (x, 0);
6024	rtx op1 = XEXP (x, 1);
6025
6026	if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
6027	    && XEXP (op0, 1) == const1_rtx)
6028	  {
6029	    /* btst / bbit0 / bbit1:
6030	       Small integers and registers are free; everything else can
6031	       be put in a register.  */
6032	    mode = GET_MODE (XEXP (op0, 0));
6033	    *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed)
6034		      + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed));
6035	    return true;
6036	  }
6037	if (GET_CODE (op0) == AND && op1 == const0_rtx
6038	    && satisfies_constraint_C1p (XEXP (op0, 1)))
6039	  {
6040	    /* bmsk.f */
6041	    *total = rtx_cost (XEXP (op0, 0), VOIDmode, SET, 1, speed);
6042	    return true;
6043	  }
6044	/* add.f  */
6045	if (GET_CODE (op1) == NEG)
6046	  {
6047	    /* op0 might be constant, the inside of op1 is rather
6048	       unlikely to be so.  So swapping the operands might lower
6049	       the cost.  */
6050	    mode = GET_MODE (op0);
6051	    *total = (rtx_cost (op0, mode, PLUS, 1, speed)
6052		      + rtx_cost (XEXP (op1, 0), mode, PLUS, 0, speed));
6053	  }
6054	return false;
6055      }
6056    case EQ: case NE:
6057      if (outer_code == IF_THEN_ELSE
6058	  && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
6059	  && XEXP (x, 1) == const0_rtx
6060	  && XEXP (XEXP (x, 0), 1) == const1_rtx)
6061	{
6062	  /* btst / bbit0 / bbit1:
6063	     Small integers and registers are free; everything else can
6064	     be put in a register.  */
6065	  rtx op0 = XEXP (x, 0);
6066
6067	  mode = GET_MODE (XEXP (op0, 0));
6068	  *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed)
6069		    + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed));
6070	  return true;
6071	}
6072      /* Fall through.  */
6073    /* scc_insn expands into two insns.  */
6074    case GTU: case GEU: case LEU:
6075      if (mode == SImode)
6076	*total += COSTS_N_INSNS (1);
6077      return false;
6078    case LTU: /* might use adc.  */
6079      if (mode == SImode)
6080	*total += COSTS_N_INSNS (1) - 1;
6081      return false;
6082    default:
6083      return false;
6084    }
6085}
6086
6087/* Return true if ADDR is a valid pic address.
6088   A valid pic address on arc should look like
6089   const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT))  */
6090
6091bool
6092arc_legitimate_pic_addr_p (rtx addr)
6093{
6094  if (GET_CODE (addr) != CONST)
6095    return false;
6096
6097  addr = XEXP (addr, 0);
6098
6099
6100  if (GET_CODE (addr) == PLUS)
6101    {
6102      if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
6103	return false;
6104      addr = XEXP (addr, 0);
6105    }
6106
6107  if (GET_CODE (addr) != UNSPEC
6108      || XVECLEN (addr, 0) != 1)
6109    return false;
6110
6111  /* Must be one of @GOT, @GOTOFF, @GOTOFFPC, @tlsgd, tlsie.  */
6112  if (XINT (addr, 1) != ARC_UNSPEC_GOT
6113      && XINT (addr, 1) != ARC_UNSPEC_GOTOFF
6114      && XINT (addr, 1) != ARC_UNSPEC_GOTOFFPC
6115      && XINT (addr, 1) != UNSPEC_TLS_GD
6116      && XINT (addr, 1) != UNSPEC_TLS_IE)
6117    return false;
6118
6119  if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF
6120      && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF)
6121    return false;
6122
6123  return true;
6124}
6125
6126
6127
6128/* Return true if OP contains a symbol reference.  */
6129
6130static bool
6131symbolic_reference_mentioned_p (rtx op)
6132{
6133  register const char *fmt;
6134  register int i;
6135
6136  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
6137    return true;
6138
6139  fmt = GET_RTX_FORMAT (GET_CODE (op));
6140  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6141    {
6142      if (fmt[i] == 'E')
6143	{
6144	  register int j;
6145
6146	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6147	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
6148	      return true;
6149	}
6150
6151      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
6152	return true;
6153    }
6154
6155  return false;
6156}
6157
6158/* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec.
6159   If SKIP_LOCAL is true, skip symbols that bind locally.
6160   This is used further down in this file, and, without SKIP_LOCAL,
6161   in the addsi3 / subsi3 expanders when generating PIC code.  */
6162
6163bool
6164arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local)
6165{
6166  register const char *fmt;
6167  register int i;
6168
6169  if (GET_CODE(op) == UNSPEC)
6170    return false;
6171
6172  if (GET_CODE (op) == SYMBOL_REF)
6173    {
6174      if (SYMBOL_REF_TLS_MODEL (op))
6175	return true;
6176      if (!flag_pic)
6177	return false;
6178      tree decl = SYMBOL_REF_DECL (op);
6179      return !skip_local || !decl || !default_binds_local_p (decl);
6180    }
6181
6182  fmt = GET_RTX_FORMAT (GET_CODE (op));
6183  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6184    {
6185      if (fmt[i] == 'E')
6186	{
6187	  register int j;
6188
6189	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6190	    if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j),
6191							skip_local))
6192	      return true;
6193	}
6194
6195      else if (fmt[i] == 'e'
6196	       && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i),
6197							  skip_local))
6198	return true;
6199    }
6200
6201  return false;
6202}
6203
6204/* The __tls_get_attr symbol.  */
6205static GTY(()) rtx arc_tls_symbol;
6206
6207/* Emit a call to __tls_get_addr.  TI is the argument to this function.
6208   RET is an RTX for the return value location.  The entire insn sequence
6209   is returned.  */
6210
6211static rtx
6212arc_call_tls_get_addr (rtx ti)
6213{
6214  rtx arg = gen_rtx_REG (Pmode, R0_REG);
6215  rtx ret = gen_rtx_REG (Pmode, R0_REG);
6216  rtx fn;
6217  rtx_insn *insn;
6218
6219  if (!arc_tls_symbol)
6220    arc_tls_symbol = init_one_libfunc ("__tls_get_addr");
6221
6222  emit_move_insn (arg, ti);
6223  fn = gen_rtx_MEM (SImode, arc_tls_symbol);
6224  insn = emit_call_insn (gen_call_value (ret, fn, const0_rtx));
6225  RTL_CONST_CALL_P (insn) = 1;
6226  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), ret);
6227  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), arg);
6228
6229  return ret;
6230}
6231
6232#define DTPOFF_ZERO_SYM ".tdata"
6233
6234/* Return a legitimized address for ADDR,
6235   which is a SYMBOL_REF with tls_model MODEL.  */
6236
6237static rtx
6238arc_legitimize_tls_address (rtx addr, enum tls_model model)
6239{
6240  rtx tmp;
6241
6242  if (!flag_pic && model == TLS_MODEL_LOCAL_DYNAMIC)
6243    model = TLS_MODEL_LOCAL_EXEC;
6244
6245
6246  /* The TP pointer needs to be set.  */
6247  gcc_assert (arc_tp_regno != -1);
6248
6249  switch (model)
6250    {
6251    case TLS_MODEL_GLOBAL_DYNAMIC:
6252      tmp = gen_reg_rtx (Pmode);
6253      emit_move_insn (tmp, arc_unspec_offset (addr, UNSPEC_TLS_GD));
6254      return arc_call_tls_get_addr (tmp);
6255
6256    case TLS_MODEL_LOCAL_DYNAMIC:
6257      rtx base;
6258      tree decl;
6259      const char *base_name;
6260
6261      decl = SYMBOL_REF_DECL (addr);
6262      base_name = DTPOFF_ZERO_SYM;
6263      if (decl && bss_initializer_p (decl))
6264	base_name = ".tbss";
6265
6266      base = gen_rtx_SYMBOL_REF (Pmode, base_name);
6267      tmp = gen_reg_rtx (Pmode);
6268      emit_move_insn (tmp, arc_unspec_offset (base, UNSPEC_TLS_GD));
6269      base = arc_call_tls_get_addr (tmp);
6270      return gen_rtx_PLUS (Pmode, force_reg (Pmode, base),
6271			   arc_unspec_offset (addr, UNSPEC_TLS_OFF));
6272
6273    case TLS_MODEL_INITIAL_EXEC:
6274      addr = arc_unspec_offset (addr, UNSPEC_TLS_IE);
6275      addr = copy_to_mode_reg (Pmode, gen_const_mem (Pmode, addr));
6276      return gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, arc_tp_regno), addr);
6277
6278    case TLS_MODEL_LOCAL_EXEC:
6279      addr = arc_unspec_offset (addr, UNSPEC_TLS_OFF);
6280      return gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, arc_tp_regno), addr);
6281
6282    default:
6283      gcc_unreachable ();
6284    }
6285}
6286
6287/* Return true if SYMBOL_REF X binds locally.  */
6288
6289static bool
6290arc_symbol_binds_local_p (const_rtx x)
6291{
6292  return (SYMBOL_REF_DECL (x)
6293	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6294	  : SYMBOL_REF_LOCAL_P (x));
6295}
6296
6297/* Legitimize a pic address reference in ADDR.  The return value is
6298   the legitimated address.  */
6299
6300static rtx
6301arc_legitimize_pic_address (rtx addr)
6302{
6303  if (!flag_pic)
6304    return addr;
6305
6306  switch (GET_CODE (addr))
6307    {
6308    case UNSPEC:
6309      /* Can be one or our GOT or GOTOFFPC unspecs.  This situation
6310	 happens when an address is not a legitimate constant and we
6311	 need the resolve it via force_reg in
6312	 prepare_move_operands.  */
6313      switch (XINT (addr, 1))
6314	{
6315	case ARC_UNSPEC_GOT:
6316	case ARC_UNSPEC_GOTOFFPC:
6317	  /* Recover the symbol ref.  */
6318	  addr = XVECEXP (addr, 0, 0);
6319	  break;
6320	default:
6321	  return addr;
6322	}
6323      /* Fall through.  */
6324    case SYMBOL_REF:
6325      /* TLS symbols are handled in different place.  */
6326      if (SYMBOL_REF_TLS_MODEL (addr))
6327	return addr;
6328
6329      /* This symbol must be referenced via a load from the Global
6330	 Offset Table (@GOTPC).  */
6331      if (!arc_symbol_binds_local_p (addr))
6332	return gen_const_mem (Pmode, arc_unspec_offset (addr, ARC_UNSPEC_GOT));
6333
6334      /* Local symb: use @pcl to access it.  */
6335      /* Fall through.  */
6336    case LABEL_REF:
6337      return arc_unspec_offset (addr, ARC_UNSPEC_GOTOFFPC);
6338
6339    default:
6340      break;
6341    }
6342
6343 return addr;
6344}
6345
6346/* Output address constant X to FILE, taking PIC into account.  */
6347
6348static void
6349arc_output_pic_addr_const (FILE * file, rtx x, int code)
6350{
6351  char buf[256];
6352
6353 restart:
6354  switch (GET_CODE (x))
6355    {
6356    case PC:
6357      if (flag_pic)
6358	putc ('.', file);
6359      else
6360	gcc_unreachable ();
6361      break;
6362
6363    case SYMBOL_REF:
6364      output_addr_const (file, x);
6365
6366      /* Local functions do not get references through the PLT.  */
6367      if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6368	fputs ("@plt", file);
6369      break;
6370
6371    case LABEL_REF:
6372      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
6373      assemble_name (file, buf);
6374      break;
6375
6376    case CODE_LABEL:
6377      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6378      assemble_name (file, buf);
6379      break;
6380
6381    case CONST_INT:
6382      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6383      break;
6384
6385    case CONST:
6386      arc_output_pic_addr_const (file, XEXP (x, 0), code);
6387      break;
6388
6389    case CONST_DOUBLE:
6390      if (GET_MODE (x) == VOIDmode)
6391	{
6392	  /* We can use %d if the number is one word and positive.  */
6393	  if (CONST_DOUBLE_HIGH (x))
6394	    fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
6395		     CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x));
6396	  else if  (CONST_DOUBLE_LOW (x) < 0)
6397	    fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
6398	  else
6399	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6400	}
6401      else
6402	/* We can't handle floating point constants;
6403	   PRINT_OPERAND must handle them.  */
6404	output_operand_lossage ("floating constant misused");
6405      break;
6406
6407    case PLUS:
6408      /* FIXME: Not needed here.  */
6409      /* Some assemblers need integer constants to appear last (eg masm).  */
6410      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6411	{
6412	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
6413	  fprintf (file, "+");
6414	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
6415	}
6416      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6417	{
6418	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
6419	  if (INTVAL (XEXP (x, 1)) >= 0)
6420	    fprintf (file, "+");
6421	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
6422	}
6423      else
6424	gcc_unreachable();
6425      break;
6426
6427    case MINUS:
6428      /* Avoid outputting things like x-x or x+5-x,
6429	 since some assemblers can't handle that.  */
6430      x = simplify_subtraction (x);
6431      if (GET_CODE (x) != MINUS)
6432	goto restart;
6433
6434      arc_output_pic_addr_const (file, XEXP (x, 0), code);
6435      fprintf (file, "-");
6436      if (GET_CODE (XEXP (x, 1)) == CONST_INT
6437	  && INTVAL (XEXP (x, 1)) < 0)
6438	{
6439	  fprintf (file, "(");
6440	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
6441	  fprintf (file, ")");
6442	}
6443      else
6444	arc_output_pic_addr_const (file, XEXP (x, 1), code);
6445      break;
6446
6447    case ZERO_EXTEND:
6448    case SIGN_EXTEND:
6449      arc_output_pic_addr_const (file, XEXP (x, 0), code);
6450      break;
6451
6452
6453    case UNSPEC:
6454      const char *suffix;
6455      bool pcrel; pcrel = false;
6456      rtx base; base = NULL;
6457      gcc_assert (XVECLEN (x, 0) >= 1);
6458      switch (XINT (x, 1))
6459	{
6460	case ARC_UNSPEC_GOT:
6461	  suffix = "@gotpc", pcrel = true;
6462	  break;
6463	case ARC_UNSPEC_GOTOFF:
6464	  suffix = "@gotoff";
6465	  break;
6466	case ARC_UNSPEC_GOTOFFPC:
6467	  suffix = "@pcl",   pcrel = true;
6468	  break;
6469	case ARC_UNSPEC_PLT:
6470	  suffix = "@plt";
6471	  break;
6472	case UNSPEC_TLS_GD:
6473	  suffix = "@tlsgd", pcrel = true;
6474	  break;
6475	case UNSPEC_TLS_IE:
6476	  suffix = "@tlsie", pcrel = true;
6477	  break;
6478	case UNSPEC_TLS_OFF:
6479	  if (XVECLEN (x, 0) == 2)
6480	    base = XVECEXP (x, 0, 1);
6481	  if (SYMBOL_REF_TLS_MODEL (XVECEXP (x, 0, 0)) == TLS_MODEL_LOCAL_EXEC
6482	      || (!flag_pic && !base))
6483	    suffix = "@tpoff";
6484	  else
6485	    suffix = "@dtpoff";
6486	  break;
6487	default:
6488	  suffix = "@invalid";
6489	  output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1));
6490	  break;
6491	}
6492      if (pcrel)
6493	fputs ("pcl,", file);
6494      arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6495      fputs (suffix, file);
6496      if (base)
6497	arc_output_pic_addr_const (file, base, code);
6498      break;
6499
6500    default:
6501      output_operand_lossage ("invalid expression as operand");
6502    }
6503}
6504
6505/* The function returning the number of words, at the beginning of an
6506   argument, must be put in registers.  The returned value must be
6507   zero for arguments that are passed entirely in registers or that
6508   are entirely pushed on the stack.
6509
6510   On some machines, certain arguments must be passed partially in
6511   registers and partially in memory.  On these machines, typically
6512   the first N words of arguments are passed in registers, and the
6513   rest on the stack.  If a multi-word argument (a `double' or a
6514   structure) crosses that boundary, its first few words must be
6515   passed in registers and the rest must be pushed.  This function
6516   tells the compiler when this occurs, and how many of the words
6517   should go in registers.
6518
6519   `FUNCTION_ARG' for these arguments should return the first register
6520   to be used by the caller for this argument; likewise
6521   `FUNCTION_INCOMING_ARG', for the called function.
6522
6523   The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS.  */
6524
6525/* If REGNO is the least arg reg available then what is the total number of arg
6526   regs available.  */
6527#define GPR_REST_ARG_REGS(REGNO) \
6528  ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 )
6529
6530/* Since arc parm regs are contiguous.  */
6531#define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 )
6532
6533/* Implement TARGET_ARG_PARTIAL_BYTES.  */
6534
6535static int
6536arc_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
6537{
6538  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6539  int bytes = arg.promoted_size_in_bytes ();
6540  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6541  int arg_num = *cum;
6542  int ret;
6543
6544  arg_num = ROUND_ADVANCE_CUM (arg_num, arg.mode, arg.type);
6545  ret = GPR_REST_ARG_REGS (arg_num);
6546
6547  /* ICEd at function.c:2361, and ret is copied to data->partial */
6548    ret = (ret >= words ? 0 : ret * UNITS_PER_WORD);
6549
6550  return ret;
6551}
6552
6553/* Implement TARGET_FUNCTION_ARG.  On the ARC the first MAX_ARC_PARM_REGS
6554   args are normally in registers and the rest are pushed.  */
6555
6556static rtx
6557arc_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
6558{
6559  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6560  int arg_num = *cum;
6561  rtx ret;
6562  const char *debstr ATTRIBUTE_UNUSED;
6563
6564  arg_num = ROUND_ADVANCE_CUM (arg_num, arg.mode, arg.type);
6565  /* Return a marker for use in the call instruction.  */
6566  if (arg.end_marker_p ())
6567    {
6568      ret = const0_rtx;
6569      debstr = "<0>";
6570    }
6571  else if (GPR_REST_ARG_REGS (arg_num) > 0)
6572    {
6573      ret = gen_rtx_REG (arg.mode, arg_num);
6574      debstr = reg_names [arg_num];
6575    }
6576  else
6577    {
6578      ret = NULL_RTX;
6579      debstr = "memory";
6580    }
6581  return ret;
6582}
6583
6584/* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
6585/* For the ARC: the cum set here is passed on to function_arg where we
6586   look at its value and say which reg to use. Strategy: advance the
6587   regnumber here till we run out of arg regs, then set *cum to last
6588   reg. In function_arg, since *cum > last arg reg we would return 0
6589   and thus the arg will end up on the stack. For straddling args of
6590   course function_arg_partial_nregs will come into play.  */
6591
6592static void
6593arc_function_arg_advance (cumulative_args_t cum_v,
6594			  const function_arg_info &arg)
6595{
6596  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6597  int bytes = arg.promoted_size_in_bytes ();
6598  int words = (bytes + UNITS_PER_WORD  - 1) / UNITS_PER_WORD;
6599  int i;
6600
6601  if (words)
6602    *cum = ROUND_ADVANCE_CUM (*cum, arg.mode, arg.type);
6603  for (i = 0; i < words; i++)
6604    *cum = ARC_NEXT_ARG_REG (*cum);
6605
6606}
6607
6608/* Define how to find the value returned by a function.
6609   VALTYPE is the data type of the value (as a tree).
6610   If the precise function being called is known, FN_DECL_OR_TYPE is its
6611   FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type.  */
6612
6613static rtx
6614arc_function_value (const_tree valtype,
6615		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
6616		    bool outgoing ATTRIBUTE_UNUSED)
6617{
6618  machine_mode mode = TYPE_MODE (valtype);
6619  int unsignedp ATTRIBUTE_UNUSED;
6620
6621  unsignedp = TYPE_UNSIGNED (valtype);
6622  if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE)
6623    PROMOTE_MODE (mode, unsignedp, valtype);
6624  return gen_rtx_REG (mode, 0);
6625}
6626
6627/* Returns the return address that is used by builtin_return_address.  */
6628
6629rtx
6630arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame)
6631{
6632  if (count != 0)
6633    return const0_rtx;
6634
6635  return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM);
6636}
6637
6638/* Determine if a given RTX is a valid constant.  We already know this
6639   satisfies CONSTANT_P.  */
6640
6641bool
6642arc_legitimate_constant_p (machine_mode mode, rtx x)
6643{
6644  switch (GET_CODE (x))
6645    {
6646    case CONST:
6647      if (flag_pic)
6648	{
6649	  if (arc_legitimate_pic_addr_p (x))
6650	    return true;
6651	}
6652      return arc_legitimate_constant_p (mode, XEXP (x, 0));
6653
6654    case SYMBOL_REF:
6655      if (SYMBOL_REF_TLS_MODEL (x))
6656	return false;
6657      /* Fall through.  */
6658    case LABEL_REF:
6659      if (flag_pic)
6660	return false;
6661      /* Fall through.  */
6662    case CONST_INT:
6663    case CONST_DOUBLE:
6664      return true;
6665
6666    case NEG:
6667      return arc_legitimate_constant_p (mode, XEXP (x, 0));
6668
6669    case PLUS:
6670    case MINUS:
6671      {
6672	bool t1 = arc_legitimate_constant_p (mode, XEXP (x, 0));
6673	bool t2 = arc_legitimate_constant_p (mode, XEXP (x, 1));
6674
6675	return (t1 && t2);
6676      }
6677
6678    case CONST_VECTOR:
6679      switch (mode)
6680	{
6681	case E_V2HImode:
6682	  return TARGET_PLUS_DMPY;
6683	case E_V2SImode:
6684	case E_V4HImode:
6685	  return TARGET_PLUS_QMACW;
6686	default:
6687	  return false;
6688	}
6689
6690    case UNSPEC:
6691      switch (XINT (x, 1))
6692	{
6693	case UNSPEC_TLS_GD:
6694	case UNSPEC_TLS_OFF:
6695	case UNSPEC_TLS_IE:
6696	  return true;
6697	default:
6698	  /* Any other unspec ending here are pic related, hence the above
6699	     constant pic address checking returned false.  */
6700	  return false;
6701	}
6702      /* Fall through.  */
6703
6704    default:
6705      fatal_insn ("unrecognized supposed constant", x);
6706    }
6707
6708  gcc_unreachable ();
6709}
6710
6711static bool
6712arc_legitimate_address_p (machine_mode mode, rtx x, bool strict)
6713{
6714  if (RTX_OK_FOR_BASE_P (x, strict))
6715     return true;
6716  if (legitimate_offset_address_p (mode, x, TARGET_INDEXED_LOADS, strict))
6717     return true;
6718  if (legitimate_scaled_address_p (mode, x, strict))
6719    return true;
6720  if (legitimate_small_data_address_p (x, mode))
6721     return true;
6722  if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x)))
6723     return true;
6724
6725  /* When we compile for size avoid const (@sym + offset)
6726     addresses.  */
6727  if (!flag_pic && optimize_size && !reload_completed
6728      && (GET_CODE (x) == CONST)
6729      && (GET_CODE (XEXP (x, 0)) == PLUS)
6730      && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
6731      && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) == 0
6732      && !SYMBOL_REF_FUNCTION_P (XEXP (XEXP (x, 0), 0)))
6733    {
6734      rtx addend = XEXP (XEXP (x, 0), 1);
6735      gcc_assert (CONST_INT_P (addend));
6736      HOST_WIDE_INT offset = INTVAL (addend);
6737
6738      /* Allow addresses having a large offset to pass.  Anyhow they
6739	 will end in a limm.  */
6740      return !(offset > -1024 && offset < 1020);
6741    }
6742
6743  if ((GET_MODE_SIZE (mode) != 16) && CONSTANT_P (x))
6744    {
6745      return arc_legitimate_constant_p (mode, x);
6746    }
6747  if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC
6748       || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC)
6749      && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict))
6750    return true;
6751      /* We're restricted here by the `st' insn.  */
6752  if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY)
6753      && GET_CODE (XEXP ((x), 1)) == PLUS
6754      && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0))
6755      && legitimate_offset_address_p (QImode, XEXP (x, 1),
6756				      TARGET_AUTO_MODIFY_REG, strict))
6757    return true;
6758  return false;
6759}
6760
6761/* Return true iff ADDR (a legitimate address expression)
6762   has an effect that depends on the machine mode it is used for.  */
6763
6764static bool
6765arc_mode_dependent_address_p (const_rtx addr, addr_space_t)
6766{
6767  /* SYMBOL_REF is not mode dependent: it is either a small data reference,
6768     which is valid for loads and stores, or a limm offset, which is valid for
6769     loads.  Scaled indices are scaled by the access mode.  */
6770  if (GET_CODE (addr) == PLUS
6771      && GET_CODE (XEXP ((addr), 0)) == MULT)
6772    return true;
6773  return false;
6774}
6775
6776/* Determine if it's legal to put X into the constant pool.  */
6777
6778static bool
6779arc_cannot_force_const_mem (machine_mode mode, rtx x)
6780{
6781  return !arc_legitimate_constant_p (mode, x);
6782}
6783
6784/* IDs for all the ARC builtins.  */
6785
6786enum arc_builtin_id
6787  {
6788#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)	\
6789    ARC_BUILTIN_ ## NAME,
6790#include "builtins.def"
6791#undef DEF_BUILTIN
6792
6793    ARC_BUILTIN_COUNT
6794  };
6795
6796struct GTY(()) arc_builtin_description
6797{
6798  enum insn_code icode;
6799  int n_args;
6800  tree fndecl;
6801};
6802
6803static GTY(()) struct arc_builtin_description
6804arc_bdesc[ARC_BUILTIN_COUNT] =
6805{
6806#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)		\
6807  { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE },
6808#include "builtins.def"
6809#undef DEF_BUILTIN
6810};
6811
6812/* Transform UP into lowercase and write the result to LO.
6813   You must provide enough space for LO.  Return LO.  */
6814
6815static char*
6816arc_tolower (char *lo, const char *up)
6817{
6818  char *lo0 = lo;
6819
6820  for (; *up; up++, lo++)
6821    *lo = TOLOWER (*up);
6822
6823  *lo = '\0';
6824
6825  return lo0;
6826}
6827
6828/* Implement `TARGET_BUILTIN_DECL'.  */
6829
6830static tree
6831arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
6832{
6833  if (id < ARC_BUILTIN_COUNT)
6834    return arc_bdesc[id].fndecl;
6835
6836  return error_mark_node;
6837}
6838
6839static void
6840arc_init_builtins (void)
6841{
6842  tree V4HI_type_node;
6843  tree V2SI_type_node;
6844  tree V2HI_type_node;
6845
6846  /* Vector types based on HS SIMD elements.  */
6847  V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
6848  V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
6849  V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
6850
6851  tree pcvoid_type_node
6852    = build_pointer_type (build_qualified_type (void_type_node,
6853						TYPE_QUAL_CONST));
6854  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node,
6855						    V8HImode);
6856
6857  tree void_ftype_void
6858    = build_function_type_list (void_type_node, NULL_TREE);
6859  tree int_ftype_int
6860    = build_function_type_list (integer_type_node, integer_type_node,
6861				NULL_TREE);
6862  tree int_ftype_pcvoid_int
6863    = build_function_type_list (integer_type_node, pcvoid_type_node,
6864				integer_type_node, NULL_TREE);
6865  tree void_ftype_usint_usint
6866    = build_function_type_list (void_type_node, long_unsigned_type_node,
6867				long_unsigned_type_node, NULL_TREE);
6868  tree int_ftype_int_int
6869    = build_function_type_list (integer_type_node, integer_type_node,
6870				integer_type_node, NULL_TREE);
6871  tree usint_ftype_usint
6872    = build_function_type_list  (long_unsigned_type_node,
6873				 long_unsigned_type_node, NULL_TREE);
6874  tree void_ftype_usint
6875    = build_function_type_list (void_type_node, long_unsigned_type_node,
6876				NULL_TREE);
6877  tree int_ftype_void
6878    = build_function_type_list (integer_type_node, void_type_node,
6879				NULL_TREE);
6880  tree void_ftype_int
6881    = build_function_type_list (void_type_node, integer_type_node,
6882				NULL_TREE);
6883  tree int_ftype_short
6884    = build_function_type_list (integer_type_node, short_integer_type_node,
6885				NULL_TREE);
6886
6887  /* Old ARC SIMD types.  */
6888  tree v8hi_ftype_v8hi_v8hi
6889    = build_function_type_list (V8HI_type_node, V8HI_type_node,
6890				V8HI_type_node, NULL_TREE);
6891  tree v8hi_ftype_v8hi_int
6892    = build_function_type_list (V8HI_type_node, V8HI_type_node,
6893				integer_type_node, NULL_TREE);
6894  tree v8hi_ftype_v8hi_int_int
6895    = build_function_type_list (V8HI_type_node, V8HI_type_node,
6896				integer_type_node, integer_type_node,
6897				NULL_TREE);
6898  tree void_ftype_v8hi_int_int
6899    = build_function_type_list (void_type_node, V8HI_type_node,
6900				integer_type_node, integer_type_node,
6901				NULL_TREE);
6902  tree void_ftype_v8hi_int_int_int
6903    = build_function_type_list (void_type_node, V8HI_type_node,
6904				integer_type_node, integer_type_node,
6905				integer_type_node, NULL_TREE);
6906  tree v8hi_ftype_int_int
6907    = build_function_type_list (V8HI_type_node, integer_type_node,
6908				integer_type_node, NULL_TREE);
6909  tree void_ftype_int_int
6910    = build_function_type_list (void_type_node, integer_type_node,
6911				integer_type_node, NULL_TREE);
6912  tree v8hi_ftype_v8hi
6913    = build_function_type_list (V8HI_type_node, V8HI_type_node,
6914				NULL_TREE);
6915  /* ARCv2 SIMD types.  */
6916  tree long_ftype_v4hi_v4hi
6917    = build_function_type_list (long_long_integer_type_node,
6918				V4HI_type_node,	V4HI_type_node, NULL_TREE);
6919  tree int_ftype_v2hi_v2hi
6920    = build_function_type_list (integer_type_node,
6921				V2HI_type_node, V2HI_type_node, NULL_TREE);
6922  tree v2si_ftype_v2hi_v2hi
6923    = build_function_type_list (V2SI_type_node,
6924				V2HI_type_node, V2HI_type_node, NULL_TREE);
6925  tree v2hi_ftype_v2hi_v2hi
6926    = build_function_type_list (V2HI_type_node,
6927				V2HI_type_node, V2HI_type_node, NULL_TREE);
6928  tree v2si_ftype_v2si_v2si
6929    = build_function_type_list (V2SI_type_node,
6930				V2SI_type_node, V2SI_type_node, NULL_TREE);
6931  tree v4hi_ftype_v4hi_v4hi
6932    = build_function_type_list (V4HI_type_node,
6933				V4HI_type_node, V4HI_type_node, NULL_TREE);
6934  tree long_ftype_v2si_v2hi
6935    = build_function_type_list (long_long_integer_type_node,
6936				V2SI_type_node, V2HI_type_node, NULL_TREE);
6937
6938  /* Add the builtins.  */
6939#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)			\
6940  {									\
6941    int id = ARC_BUILTIN_ ## NAME;					\
6942    const char *Name = "__builtin_arc_" #NAME;				\
6943    char *name = (char*) alloca (1 + strlen (Name));			\
6944									\
6945    gcc_assert (id < ARC_BUILTIN_COUNT);				\
6946    if (MASK)								\
6947      arc_bdesc[id].fndecl						\
6948	= add_builtin_function (arc_tolower(name, Name), TYPE, id,	\
6949				BUILT_IN_MD, NULL, NULL_TREE);		\
6950  }
6951#include "builtins.def"
6952#undef DEF_BUILTIN
6953}
6954
6955/* Helper to expand __builtin_arc_aligned (void* val, int
6956  alignval).  */
6957
6958static rtx
6959arc_expand_builtin_aligned (tree exp)
6960{
6961  tree arg0 = CALL_EXPR_ARG (exp, 0);
6962  tree arg1 = CALL_EXPR_ARG (exp, 1);
6963  fold (arg1);
6964  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6965  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6966
6967  if (!CONST_INT_P (op1))
6968    {
6969      /* If we can't fold the alignment to a constant integer
6970	 whilst optimizing, this is probably a user error.  */
6971      if (optimize)
6972	warning (0, "%<__builtin_arc_aligned%> with non-constant alignment");
6973    }
6974  else
6975    {
6976      HOST_WIDE_INT alignTest = INTVAL (op1);
6977      /* Check alignTest is positive, and a power of two.  */
6978      if (alignTest <= 0 || alignTest != (alignTest & -alignTest))
6979	{
6980	  error ("invalid alignment value for %<__builtin_arc_aligned%>");
6981	  return NULL_RTX;
6982	}
6983
6984      if (CONST_INT_P (op0))
6985	{
6986	  HOST_WIDE_INT pnt = INTVAL (op0);
6987
6988	  if ((pnt & (alignTest - 1)) == 0)
6989	    return const1_rtx;
6990	}
6991      else
6992	{
6993	  unsigned  align = get_pointer_alignment (arg0);
6994	  unsigned  numBits = alignTest * BITS_PER_UNIT;
6995
6996	  if (align && align >= numBits)
6997	    return const1_rtx;
6998	  /* Another attempt to ascertain alignment.  Check the type
6999	     we are pointing to.  */
7000	  if (POINTER_TYPE_P (TREE_TYPE (arg0))
7001	      && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits)
7002	    return const1_rtx;
7003	}
7004    }
7005
7006  /* Default to false.  */
7007  return const0_rtx;
7008}
7009
7010/* Helper arc_expand_builtin, generates a pattern for the given icode
7011   and arguments.  */
7012
7013static rtx_insn *
7014apply_GEN_FCN (enum insn_code icode, rtx *arg)
7015{
7016  switch (insn_data[icode].n_generator_args)
7017    {
7018    case 0:
7019      return GEN_FCN (icode) ();
7020    case 1:
7021      return GEN_FCN (icode) (arg[0]);
7022    case 2:
7023      return GEN_FCN (icode) (arg[0], arg[1]);
7024    case 3:
7025      return GEN_FCN (icode) (arg[0], arg[1], arg[2]);
7026    case 4:
7027      return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3]);
7028    case 5:
7029      return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3], arg[4]);
7030    default:
7031      gcc_unreachable ();
7032    }
7033}
7034
7035/* Expand an expression EXP that calls a built-in function,
7036   with result going to TARGET if that's convenient
7037   (and in mode MODE if that's convenient).
7038   SUBTARGET may be used as the target for computing one of EXP's operands.
7039   IGNORE is nonzero if the value is to be ignored.  */
7040
7041static rtx
7042arc_expand_builtin (tree exp,
7043		    rtx target,
7044		    rtx subtarget ATTRIBUTE_UNUSED,
7045		    machine_mode mode ATTRIBUTE_UNUSED,
7046		    int ignore ATTRIBUTE_UNUSED)
7047{
7048  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
7049  unsigned int id = DECL_MD_FUNCTION_CODE (fndecl);
7050  const struct arc_builtin_description *d = &arc_bdesc[id];
7051  int i, j, n_args = call_expr_nargs (exp);
7052  rtx pat = NULL_RTX;
7053  rtx xop[5];
7054  enum insn_code icode = d->icode;
7055  machine_mode tmode = insn_data[icode].operand[0].mode;
7056  int nonvoid;
7057  tree arg0;
7058  tree arg1;
7059  tree arg2;
7060  tree arg3;
7061  rtx op0;
7062  rtx op1;
7063  rtx op2;
7064  rtx op3;
7065  rtx op4;
7066  machine_mode mode0;
7067  machine_mode mode1;
7068  machine_mode mode2;
7069  machine_mode mode3;
7070  machine_mode mode4;
7071
7072  if (id >= ARC_BUILTIN_COUNT)
7073    internal_error ("bad builtin fcode");
7074
7075  /* 1st part: Expand special builtins.  */
7076  switch (id)
7077    {
7078    case ARC_BUILTIN_NOP:
7079      emit_insn (gen_nopv ());
7080      return NULL_RTX;
7081
7082    case ARC_BUILTIN_RTIE:
7083    case ARC_BUILTIN_SYNC:
7084    case ARC_BUILTIN_BRK:
7085    case ARC_BUILTIN_SWI:
7086    case ARC_BUILTIN_UNIMP_S:
7087      gcc_assert (icode != 0);
7088      emit_insn (GEN_FCN (icode) (const1_rtx));
7089      return NULL_RTX;
7090
7091    case ARC_BUILTIN_ALIGNED:
7092      return arc_expand_builtin_aligned (exp);
7093
7094    case ARC_BUILTIN_CLRI:
7095      target = gen_reg_rtx (SImode);
7096      emit_insn (gen_clri (target, const1_rtx));
7097      return target;
7098
7099    case ARC_BUILTIN_TRAP_S:
7100    case ARC_BUILTIN_SLEEP:
7101      arg0 = CALL_EXPR_ARG (exp, 0);
7102      fold (arg0);
7103      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
7104
7105      gcc_assert (icode != 0);
7106      emit_insn (GEN_FCN (icode) (op0));
7107      return NULL_RTX;
7108
7109    case ARC_BUILTIN_VDORUN:
7110    case ARC_BUILTIN_VDIRUN:
7111      arg0 = CALL_EXPR_ARG (exp, 0);
7112      arg1 = CALL_EXPR_ARG (exp, 1);
7113      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7114      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7115
7116      target = gen_rtx_REG (SImode, (id == ARC_BUILTIN_VDIRUN) ? 131 : 139);
7117
7118      mode0 =  insn_data[icode].operand[1].mode;
7119      mode1 =  insn_data[icode].operand[2].mode;
7120
7121      if (!insn_data[icode].operand[1].predicate (op0, mode0))
7122	op0 = copy_to_mode_reg (mode0, op0);
7123
7124      if (!insn_data[icode].operand[2].predicate (op1, mode1))
7125	op1 = copy_to_mode_reg (mode1, op1);
7126
7127      pat = GEN_FCN (icode) (target, op0, op1);
7128      if (!pat)
7129	return NULL_RTX;
7130
7131      emit_insn (pat);
7132      return NULL_RTX;
7133
7134    case ARC_BUILTIN_VDIWR:
7135    case ARC_BUILTIN_VDOWR:
7136      arg0 = CALL_EXPR_ARG (exp, 0);
7137      arg1 = CALL_EXPR_ARG (exp, 1);
7138      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7139      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7140
7141      if (!CONST_INT_P (op0)
7142	  || !(UNSIGNED_INT3 (INTVAL (op0))))
7143	error ("operand 1 should be an unsigned 3-bit immediate");
7144
7145      mode1 =  insn_data[icode].operand[1].mode;
7146
7147      if (icode == CODE_FOR_vdiwr_insn)
7148	target = gen_rtx_REG (SImode,
7149			      ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0));
7150      else if (icode == CODE_FOR_vdowr_insn)
7151	target = gen_rtx_REG (SImode,
7152			      ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0));
7153      else
7154	gcc_unreachable ();
7155
7156      if (!insn_data[icode].operand[2].predicate (op1, mode1))
7157	op1 = copy_to_mode_reg (mode1, op1);
7158
7159      pat = GEN_FCN (icode) (target, op1);
7160      if (!pat)
7161	return NULL_RTX;
7162
7163      emit_insn (pat);
7164      return NULL_RTX;
7165
7166    case ARC_BUILTIN_VASRW:
7167    case ARC_BUILTIN_VSR8:
7168    case ARC_BUILTIN_VSR8AW:
7169      arg0 = CALL_EXPR_ARG (exp, 0);
7170      arg1 = CALL_EXPR_ARG (exp, 1);
7171      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7172      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7173      op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
7174
7175      target = gen_reg_rtx (V8HImode);
7176      mode0 =  insn_data[icode].operand[1].mode;
7177      mode1 =  insn_data[icode].operand[2].mode;
7178
7179      if (!insn_data[icode].operand[1].predicate (op0, mode0))
7180	op0 = copy_to_mode_reg (mode0, op0);
7181
7182      if ((!insn_data[icode].operand[2].predicate (op1, mode1))
7183	  || !(UNSIGNED_INT3 (INTVAL (op1))))
7184	error ("operand 2 should be an unsigned 3-bit value (I0-I7)");
7185
7186      pat = GEN_FCN (icode) (target, op0, op1, op2);
7187      if (!pat)
7188	return NULL_RTX;
7189
7190      emit_insn (pat);
7191      return target;
7192
7193    case ARC_BUILTIN_VLD32WH:
7194    case ARC_BUILTIN_VLD32WL:
7195    case ARC_BUILTIN_VLD64:
7196    case ARC_BUILTIN_VLD32:
7197      rtx src_vreg;
7198      icode = d->icode;
7199      arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg.  */
7200      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
7201      arg2 = CALL_EXPR_ARG (exp, 2); /* u8.  */
7202
7203      src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7204      op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7205      op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
7206      op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
7207
7208      /* target <- src vreg.  */
7209      emit_insn (gen_move_insn (target, src_vreg));
7210
7211      /* target <- vec_concat: target, mem (Ib, u8).  */
7212      mode0 =  insn_data[icode].operand[3].mode;
7213      mode1 =  insn_data[icode].operand[1].mode;
7214
7215      if ((!insn_data[icode].operand[3].predicate (op0, mode0))
7216	  || !(UNSIGNED_INT3 (INTVAL (op0))))
7217	error ("operand 1 should be an unsigned 3-bit value (I0-I7)");
7218
7219      if ((!insn_data[icode].operand[1].predicate (op1, mode1))
7220	  || !(UNSIGNED_INT8 (INTVAL (op1))))
7221	error ("operand 2 should be an unsigned 8-bit value");
7222
7223      pat = GEN_FCN (icode) (target, op1, op2, op0);
7224      if (!pat)
7225	return NULL_RTX;
7226
7227      emit_insn (pat);
7228      return target;
7229
7230    case ARC_BUILTIN_VLD64W:
7231    case ARC_BUILTIN_VLD128:
7232      arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg.  */
7233      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
7234
7235      op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
7236      op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7237      op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7238
7239      /* target <- src vreg.  */
7240      target = gen_reg_rtx (V8HImode);
7241
7242      /* target <- vec_concat: target, mem (Ib, u8).  */
7243      mode0 =  insn_data[icode].operand[1].mode;
7244      mode1 =  insn_data[icode].operand[2].mode;
7245      mode2 =  insn_data[icode].operand[3].mode;
7246
7247      if ((!insn_data[icode].operand[2].predicate (op1, mode1))
7248	  || !(UNSIGNED_INT3 (INTVAL (op1))))
7249	error ("operand 1 should be an unsigned 3-bit value (I0-I7)");
7250
7251      if ((!insn_data[icode].operand[3].predicate (op2, mode2))
7252	  || !(UNSIGNED_INT8 (INTVAL (op2))))
7253	error ("operand 2 should be an unsigned 8-bit value");
7254
7255      pat = GEN_FCN (icode) (target, op0, op1, op2);
7256
7257      if (!pat)
7258	return NULL_RTX;
7259
7260      emit_insn (pat);
7261      return target;
7262
7263    case ARC_BUILTIN_VST128:
7264    case ARC_BUILTIN_VST64:
7265      arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg.  */
7266      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
7267      arg2 = CALL_EXPR_ARG (exp, 2); /* u8.  */
7268
7269      op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
7270      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7271      op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
7272      op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7273
7274      mode0 = insn_data[icode].operand[0].mode;
7275      mode1 = insn_data[icode].operand[1].mode;
7276      mode2 = insn_data[icode].operand[2].mode;
7277      mode3 = insn_data[icode].operand[3].mode;
7278
7279      if ((!insn_data[icode].operand[1].predicate (op1, mode1))
7280	  || !(UNSIGNED_INT3 (INTVAL (op1))))
7281	error ("operand 2 should be an unsigned 3-bit value (I0-I7)");
7282
7283      if ((!insn_data[icode].operand[2].predicate (op2, mode2))
7284	  || !(UNSIGNED_INT8 (INTVAL (op2))))
7285	error ("operand 3 should be an unsigned 8-bit value");
7286
7287      if (!insn_data[icode].operand[3].predicate (op3, mode3))
7288	op3 = copy_to_mode_reg (mode3, op3);
7289
7290      pat = GEN_FCN (icode) (op0, op1, op2, op3);
7291      if (!pat)
7292	return NULL_RTX;
7293
7294      emit_insn (pat);
7295      return NULL_RTX;
7296
7297    case ARC_BUILTIN_VST16_N:
7298    case ARC_BUILTIN_VST32_N:
7299      arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg.  */
7300      arg1 = CALL_EXPR_ARG (exp, 1); /* u3.  */
7301      arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7.  */
7302      arg3 = CALL_EXPR_ARG (exp, 3); /* u8.  */
7303
7304      op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL);
7305      op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
7306      op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
7307      op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7308      op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7309
7310      mode0 = insn_data[icode].operand[0].mode;
7311      mode2 = insn_data[icode].operand[2].mode;
7312      mode3 = insn_data[icode].operand[3].mode;
7313      mode4 = insn_data[icode].operand[4].mode;
7314
7315      /* Do some correctness checks for the operands.  */
7316      if ((!insn_data[icode].operand[0].predicate (op0, mode0))
7317	  || !(UNSIGNED_INT8 (INTVAL (op0))))
7318	error ("operand 4 should be an unsigned 8-bit value (0-255)");
7319
7320      if ((!insn_data[icode].operand[2].predicate (op2, mode2))
7321	  || !(UNSIGNED_INT3 (INTVAL (op2))))
7322	error ("operand 3 should be an unsigned 3-bit value (I0-I7)");
7323
7324      if (!insn_data[icode].operand[3].predicate (op3, mode3))
7325	op3 = copy_to_mode_reg (mode3, op3);
7326
7327      if ((!insn_data[icode].operand[4].predicate (op4, mode4))
7328	   || !(UNSIGNED_INT3 (INTVAL (op4))))
7329	error ("operand 2 should be an unsigned 3-bit value (subreg 0-7)");
7330      else if (icode == CODE_FOR_vst32_n_insn
7331	       && ((INTVAL (op4) % 2) != 0))
7332	error ("operand 2 should be an even 3-bit value (subreg 0,2,4,6)");
7333
7334      pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
7335      if (!pat)
7336	return NULL_RTX;
7337
7338      emit_insn (pat);
7339      return NULL_RTX;
7340
7341    default:
7342      break;
7343    }
7344
7345  /* 2nd part: Expand regular builtins.  */
7346  if (icode == 0)
7347    internal_error ("bad builtin fcode");
7348
7349  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
7350  j = 0;
7351
7352  if (nonvoid)
7353    {
7354      if (target == NULL_RTX
7355	  || GET_MODE (target) != tmode
7356	  || !insn_data[icode].operand[0].predicate (target, tmode))
7357	{
7358	  target = gen_reg_rtx (tmode);
7359	}
7360      xop[j++] = target;
7361    }
7362
7363  gcc_assert (n_args <= 4);
7364  for (i = 0; i < n_args; i++, j++)
7365    {
7366      tree arg = CALL_EXPR_ARG (exp, i);
7367      machine_mode mode = insn_data[icode].operand[j].mode;
7368      rtx op = expand_expr (arg, NULL_RTX, mode, EXPAND_NORMAL);
7369      machine_mode opmode = GET_MODE (op);
7370      char c = insn_data[icode].operand[j].constraint[0];
7371
7372      /* SIMD extension requires exact immediate operand match.  */
7373      if ((id > ARC_BUILTIN_SIMD_BEGIN)
7374	  && (id < ARC_BUILTIN_SIMD_END)
7375	  && (c != 'v')
7376	  && (c != 'r'))
7377	{
7378	  if (!CONST_INT_P (op))
7379	    error ("builtin requires an immediate for operand %d", j);
7380	  switch (c)
7381	    {
7382	    case 'L':
7383	      if (!satisfies_constraint_L (op))
7384		error ("operand %d should be a 6 bit unsigned immediate", j);
7385	      break;
7386	    case 'P':
7387	      if (!satisfies_constraint_P (op))
7388		error ("operand %d should be a 8 bit unsigned immediate", j);
7389	      break;
7390	    case 'K':
7391	      if (!satisfies_constraint_K (op))
7392		error ("operand %d should be a 3 bit unsigned immediate", j);
7393	      break;
7394	    default:
7395	      error ("unknown builtin immediate operand type for operand %d",
7396		     j);
7397	    }
7398	}
7399
7400      if (CONST_INT_P (op))
7401	opmode = mode;
7402
7403      if ((opmode == SImode) && (mode == HImode))
7404	{
7405	  opmode = HImode;
7406	  op = gen_lowpart (HImode, op);
7407	}
7408
7409      /* In case the insn wants input operands in modes different from
7410	 the result, abort.  */
7411      gcc_assert (opmode == mode || opmode == VOIDmode);
7412
7413      if (!insn_data[icode].operand[i + nonvoid].predicate (op, mode))
7414	op = copy_to_mode_reg (mode, op);
7415
7416      xop[j] = op;
7417    }
7418
7419  pat = apply_GEN_FCN (icode, xop);
7420  if (pat == NULL_RTX)
7421    return NULL_RTX;
7422
7423  emit_insn (pat);
7424
7425  if (nonvoid)
7426    return target;
7427  else
7428    return const0_rtx;
7429}
7430
7431/* Returns true if the operands[opno] is a valid compile-time constant to be
7432   used as register number in the code for builtins.  Else it flags an error
7433   and returns false.  */
7434
7435bool
7436check_if_valid_regno_const (rtx *operands, int opno)
7437{
7438
7439  switch (GET_CODE (operands[opno]))
7440    {
7441    case SYMBOL_REF :
7442    case CONST :
7443    case CONST_INT :
7444      return true;
7445    default:
7446	error ("register number must be a compile-time constant.  "
7447	       "Try giving higher optimization levels");
7448	break;
7449    }
7450  return false;
7451}
7452
7453/* Return true if it is ok to make a tail-call to DECL.  */
7454
7455static bool
7456arc_function_ok_for_sibcall (tree decl,
7457			     tree exp ATTRIBUTE_UNUSED)
7458{
7459  tree attrs = NULL_TREE;
7460
7461  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7462  if (ARC_INTERRUPT_P (arc_compute_function_type (cfun)))
7463    return false;
7464
7465  if (decl)
7466    {
7467      attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7468
7469      if (lookup_attribute ("jli_always", attrs))
7470	return false;
7471      if (lookup_attribute ("jli_fixed", attrs))
7472	return false;
7473      if (lookup_attribute ("secure_call", attrs))
7474	return false;
7475    }
7476
7477  /* Everything else is ok.  */
7478  return true;
7479}
7480
7481/* Output code to add DELTA to the first argument, and then jump
7482   to FUNCTION.  Used for C++ multiple inheritance.  */
7483
7484static void
7485arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7486		     HOST_WIDE_INT delta,
7487		     HOST_WIDE_INT vcall_offset,
7488		     tree function)
7489{
7490  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
7491  int mi_delta = delta;
7492  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
7493  int shift = 0;
7494  int this_regno
7495    = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0;
7496  rtx fnaddr;
7497
7498  assemble_start_function (thunk, fnname);
7499
7500  if (mi_delta < 0)
7501    mi_delta = - mi_delta;
7502
7503  /* Add DELTA.  When possible use a plain add, otherwise load it into
7504     a register first.  */
7505
7506  while (mi_delta != 0)
7507    {
7508      if ((mi_delta & (3 << shift)) == 0)
7509	shift += 2;
7510      else
7511	{
7512	  asm_fprintf (file, "\t%s\t%s, %s, %d\n",
7513		       mi_op, reg_names[this_regno], reg_names[this_regno],
7514		       mi_delta & (0xff << shift));
7515	  mi_delta &= ~(0xff << shift);
7516	  shift += 8;
7517	}
7518    }
7519
7520  /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
7521  if (vcall_offset != 0)
7522    {
7523      /* ld  r12,[this]           --> temp = *this
7524	 add r12,r12,vcall_offset --> temp = *(*this + vcall_offset)
7525	 ld r12,[r12]
7526	 add this,this,r12        --> this+ = *(*this + vcall_offset) */
7527      asm_fprintf (file, "\tld\t%s, [%s]\n",
7528		   ARC_TEMP_SCRATCH_REG, reg_names[this_regno]);
7529      asm_fprintf (file, "\tadd\t%s, %s, " HOST_WIDE_INT_PRINT_DEC "\n",
7530		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset);
7531      asm_fprintf (file, "\tld\t%s, [%s]\n",
7532		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG);
7533      asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno],
7534		   reg_names[this_regno], ARC_TEMP_SCRATCH_REG);
7535    }
7536
7537  fnaddr = XEXP (DECL_RTL (function), 0);
7538
7539  if (arc_is_longcall_p (fnaddr))
7540    {
7541      if (flag_pic)
7542	{
7543	  asm_fprintf (file, "\tld\t%s, [pcl, @",
7544		       ARC_TEMP_SCRATCH_REG);
7545	  assemble_name (file, XSTR (fnaddr, 0));
7546	  fputs ("@gotpc]\n", file);
7547	  asm_fprintf (file, "\tj\t[%s]", ARC_TEMP_SCRATCH_REG);
7548	}
7549      else
7550	{
7551	  fputs ("\tj\t@", file);
7552	  assemble_name (file, XSTR (fnaddr, 0));
7553	}
7554    }
7555  else
7556    {
7557      fputs ("\tb\t@", file);
7558      assemble_name (file, XSTR (fnaddr, 0));
7559      if (flag_pic)
7560	fputs ("@plt\n", file);
7561    }
7562  fputc ('\n', file);
7563  assemble_end_function (thunk, fnname);
7564}
7565
7566/* Return true if a 32 bit "long_call" should be generated for
7567   this calling SYM_REF.  We generate a long_call if the function:
7568
7569        a.  has an __attribute__((long call))
7570     or b.  the -mlong-calls command line switch has been specified
7571
7572   However we do not generate a long call if the function has an
7573   __attribute__ ((short_call)) or __attribute__ ((medium_call))
7574
7575   This function will be called by C fragments contained in the machine
7576   description file.  */
7577
7578bool
7579arc_is_longcall_p (rtx sym_ref)
7580{
7581  if (GET_CODE (sym_ref) != SYMBOL_REF)
7582    return false;
7583
7584  return (SYMBOL_REF_LONG_CALL_P (sym_ref)
7585	  || (TARGET_LONG_CALLS_SET
7586	      && !SYMBOL_REF_SHORT_CALL_P (sym_ref)
7587	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
7588
7589}
7590
7591/* Likewise for short calls.  */
7592
7593bool
7594arc_is_shortcall_p (rtx sym_ref)
7595{
7596  if (GET_CODE (sym_ref) != SYMBOL_REF)
7597    return false;
7598
7599  return (SYMBOL_REF_SHORT_CALL_P (sym_ref)
7600	  || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS
7601	      && !SYMBOL_REF_LONG_CALL_P (sym_ref)
7602	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
7603
7604}
7605
7606/* Worker function for TARGET_RETURN_IN_MEMORY.  */
7607
7608static bool
7609arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7610{
7611  if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type))
7612    return true;
7613  else
7614    {
7615      HOST_WIDE_INT size = int_size_in_bytes (type);
7616      return (size == -1 || size > (TARGET_V2 ? 16 : 8));
7617    }
7618}
7619
7620static bool
7621arc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7622{
7623  return (arg.type != 0
7624	  && (TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST
7625	      || TREE_ADDRESSABLE (arg.type)));
7626}
7627
7628/* Implement TARGET_CAN_USE_DOLOOP_P.  */
7629
7630static bool
7631arc_can_use_doloop_p (const widest_int &,
7632		      const widest_int &iterations_max,
7633		      unsigned int loop_depth, bool entered_at_top)
7634{
7635  /* Considering limitations in the hardware, only use doloop
7636     for innermost loops which must be entered from the top.  */
7637  if (loop_depth > 1 || !entered_at_top)
7638    return false;
7639
7640  /* Check for lp_count width boundary.  */
7641  if (arc_lpcwidth != 32
7642      && (wi::gtu_p (iterations_max, ((1 << arc_lpcwidth) - 1))
7643	  || wi::eq_p (iterations_max, 0)))
7644    return false;
7645  return true;
7646}
7647
7648/* NULL if INSN insn is valid within a low-overhead loop.  Otherwise
7649   return why doloop cannot be applied.  */
7650
7651static const char *
7652arc_invalid_within_doloop (const rtx_insn *insn)
7653{
7654  if (CALL_P (insn))
7655    return "Function call in the loop.";
7656
7657  /* FIXME! add here all the ZOL exceptions.  */
7658  return NULL;
7659}
7660
7661/* Return the next active insn, skiping the inline assembly code.  */
7662
7663static rtx_insn *
7664arc_active_insn (rtx_insn *insn)
7665{
7666  rtx_insn *nxt = next_active_insn (insn);
7667
7668  if (nxt && GET_CODE (PATTERN (nxt)) == ASM_INPUT)
7669    nxt = next_active_insn (nxt);
7670  return nxt;
7671}
7672
7673/* Search for a sequence made out of two stores and a given number of
7674   loads, insert a nop if required.  */
7675
7676static void
7677check_store_cacheline_hazard (void)
7678{
7679  rtx_insn *insn, *succ0, *insn1;
7680  bool found = false;
7681
7682  for (insn = get_insns (); insn; insn = arc_active_insn (insn))
7683    {
7684      succ0 = arc_active_insn (insn);
7685
7686      if (!succ0)
7687	return;
7688
7689      if (!single_set (insn) || !single_set (succ0))
7690	continue;
7691
7692      if ((get_attr_type (insn) != TYPE_STORE)
7693	  || (get_attr_type (succ0) != TYPE_STORE))
7694	continue;
7695
7696      /* Found at least two consecutive stores.  Goto the end of the
7697	 store sequence.  */
7698      for (insn1 = succ0; insn1; insn1 = arc_active_insn (insn1))
7699	if (!single_set (insn1) || get_attr_type (insn1) != TYPE_STORE)
7700	  break;
7701
7702      /* Now, check the next two instructions for the following cases:
7703         1. next instruction is a LD => insert 2 nops between store
7704	    sequence and load.
7705	 2. next-next instruction is a LD => inset 1 nop after the store
7706	    sequence.  */
7707      if (insn1 && single_set (insn1)
7708	  && (get_attr_type (insn1) == TYPE_LOAD))
7709	{
7710	  found = true;
7711	  emit_insn_before (gen_nopv (), insn1);
7712	  emit_insn_before (gen_nopv (), insn1);
7713	}
7714      else
7715	{
7716	  if (insn1 && (get_attr_type (insn1) == TYPE_COMPARE))
7717	    {
7718	      /* REG_SAVE_NOTE is used by Haifa scheduler, we are in
7719		 reorg, so it is safe to reuse it for avoiding the
7720		 current compare insn to be part of a BRcc
7721		 optimization.  */
7722	      add_reg_note (insn1, REG_SAVE_NOTE, GEN_INT (3));
7723	    }
7724	  insn1 = arc_active_insn (insn1);
7725	  if (insn1 && single_set (insn1)
7726	      && (get_attr_type (insn1) == TYPE_LOAD))
7727	    {
7728	      found = true;
7729	      emit_insn_before (gen_nopv (), insn1);
7730	    }
7731	}
7732
7733      insn = insn1;
7734      if (found)
7735	found = false;
7736    }
7737}
7738
7739/* Return true if a load instruction (CONSUMER) uses the same address as a
7740   store instruction (PRODUCER).  This function is used to avoid st/ld
7741   address hazard in ARC700 cores.  */
7742
7743static bool
7744arc_store_addr_hazard_internal_p (rtx_insn* producer, rtx_insn* consumer)
7745{
7746  rtx in_set, out_set;
7747  rtx out_addr, in_addr;
7748
7749  if (!producer)
7750    return false;
7751
7752  if (!consumer)
7753    return false;
7754
7755  /* Peel the producer and the consumer for the address.  */
7756  out_set = single_set (producer);
7757  if (out_set)
7758    {
7759      out_addr = SET_DEST (out_set);
7760      if (!out_addr)
7761	return false;
7762      if (GET_CODE (out_addr) == ZERO_EXTEND
7763	  || GET_CODE (out_addr) == SIGN_EXTEND)
7764	out_addr = XEXP (out_addr, 0);
7765
7766      if (!MEM_P (out_addr))
7767	return false;
7768
7769      in_set = single_set (consumer);
7770      if (in_set)
7771	{
7772	  in_addr = SET_SRC (in_set);
7773	  if (!in_addr)
7774	    return false;
7775	  if (GET_CODE (in_addr) == ZERO_EXTEND
7776	      || GET_CODE (in_addr) == SIGN_EXTEND)
7777	    in_addr = XEXP (in_addr, 0);
7778
7779	  if (!MEM_P (in_addr))
7780	    return false;
7781	  /* Get rid of the MEM and check if the addresses are
7782	     equivalent.  */
7783	  in_addr = XEXP (in_addr, 0);
7784	  out_addr = XEXP (out_addr, 0);
7785
7786	  return exp_equiv_p (in_addr, out_addr, 0, true);
7787	}
7788    }
7789  return false;
7790}
7791
7792/* Return TRUE is we have an store address hazard.  */
7793
7794bool
7795arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
7796{
7797  if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
7798    return true;
7799  return arc_store_addr_hazard_internal_p (producer, consumer);
7800}
7801
7802/* The same functionality as arc_hazard.  It is called in machine
7803   reorg before any other optimization.  Hence, the NOP size is taken
7804   into account when doing branch shortening.  */
7805
7806static void
7807workaround_arc_anomaly (void)
7808{
7809  rtx_insn *insn, *succ0;
7810  rtx_insn *succ1;
7811
7812  /* For any architecture: call arc_hazard here.  */
7813  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7814    {
7815      succ0 = next_real_insn (insn);
7816      if (arc_hazard (insn, succ0))
7817	{
7818	  emit_insn_before (gen_nopv (), succ0);
7819	}
7820    }
7821
7822  if (!TARGET_ARC700)
7823    return;
7824
7825  /* Old A7 are suffering of a cache hazard, and we need to insert two
7826     nops between any sequence of stores and a load.  */
7827  if (arc_tune != ARC_TUNE_ARC7XX)
7828    check_store_cacheline_hazard ();
7829
7830  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7831    {
7832      succ0 = next_real_insn (insn);
7833      if (arc_store_addr_hazard_internal_p (insn, succ0))
7834	{
7835	  emit_insn_after (gen_nopv (), insn);
7836	  emit_insn_after (gen_nopv (), insn);
7837	  continue;
7838	}
7839
7840      /* Avoid adding nops if the instruction between the ST and LD is
7841	 a call or jump.  */
7842      succ1 = next_real_insn (succ0);
7843      if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
7844	  && arc_store_addr_hazard_internal_p (insn, succ1))
7845	emit_insn_after (gen_nopv (), insn);
7846    }
7847}
7848
7849/* A callback for the hw-doloop pass.  Called when a loop we have discovered
7850   turns out not to be optimizable; we have to split the loop_end pattern into
7851   a subtract and a test.  */
7852
7853static void
7854hwloop_fail (hwloop_info loop)
7855{
7856  rtx test;
7857  rtx insn = loop->loop_end;
7858
7859  if (TARGET_DBNZ
7860      && (loop->length && (loop->length <= ARC_MAX_LOOP_LENGTH))
7861      && REG_P (loop->iter_reg))
7862    {
7863      /* TARGET_V2 core3 has dbnz instructions.  */
7864      test = gen_dbnz (loop->iter_reg, loop->start_label);
7865      insn = emit_jump_insn_before (test, loop->loop_end);
7866    }
7867  else if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg) == LP_COUNT))
7868    {
7869      /* We have the lp_count as loop iterator, try to use it.  */
7870      emit_insn_before (gen_loop_fail (), loop->loop_end);
7871      test = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG),
7872			 const0_rtx);
7873      test = gen_rtx_IF_THEN_ELSE (VOIDmode, test,
7874				   gen_rtx_LABEL_REF (Pmode, loop->start_label),
7875				   pc_rtx);
7876      insn = emit_jump_insn_before (gen_rtx_SET (pc_rtx, test),
7877				     loop->loop_end);
7878    }
7879  else
7880    {
7881      emit_insn_before (gen_addsi3 (loop->iter_reg,
7882				    loop->iter_reg,
7883				    constm1_rtx),
7884			loop->loop_end);
7885      test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
7886      insn = emit_jump_insn_before (gen_cbranchsi4 (test,
7887						    loop->iter_reg,
7888						    const0_rtx,
7889						    loop->start_label),
7890				    loop->loop_end);
7891    }
7892  JUMP_LABEL (insn) = loop->start_label;
7893  LABEL_NUSES (loop->start_label)++;
7894  delete_insn (loop->loop_end);
7895}
7896
7897/* Return the next insn after INSN that is not a NOTE, but stop the
7898   search before we enter another basic block.  This routine does not
7899   look inside SEQUENCEs.  */
7900
7901static rtx_insn *
7902next_nonnote_insn_bb (rtx_insn *insn)
7903{
7904  while (insn)
7905    {
7906      insn = NEXT_INSN (insn);
7907      if (insn == 0 || !NOTE_P (insn))
7908	break;
7909      if (NOTE_INSN_BASIC_BLOCK_P (insn))
7910	return NULL;
7911    }
7912
7913  return insn;
7914}
7915
7916/* Optimize LOOP.  */
7917
7918static bool
7919hwloop_optimize (hwloop_info loop)
7920{
7921  int i;
7922  edge entry_edge;
7923  basic_block entry_bb, bb;
7924  rtx iter_reg;
7925  rtx_insn *insn, *seq, *entry_after, *last_insn, *end_label;
7926  unsigned int length;
7927  bool need_fix = false;
7928  rtx lp_reg = gen_rtx_REG (SImode, LP_COUNT);
7929
7930  if (loop->depth > 1)
7931    {
7932      if (dump_file)
7933	fprintf (dump_file, ";; loop %d is not innermost\n",
7934		 loop->loop_no);
7935      return false;
7936    }
7937
7938  if (!loop->incoming_dest)
7939    {
7940      if (dump_file)
7941	fprintf (dump_file, ";; loop %d has more than one entry\n",
7942		 loop->loop_no);
7943      return false;
7944    }
7945
7946  if (loop->incoming_dest != loop->head)
7947    {
7948      if (dump_file)
7949	fprintf (dump_file, ";; loop %d is not entered from head\n",
7950		 loop->loop_no);
7951      return false;
7952    }
7953
7954  if (loop->has_call || loop->has_asm)
7955    {
7956      if (dump_file)
7957	fprintf (dump_file, ";; loop %d has invalid insn\n",
7958		 loop->loop_no);
7959      return false;
7960    }
7961
7962  /* Scan all the blocks to make sure they don't use iter_reg.  */
7963  if (loop->iter_reg_used || loop->iter_reg_used_outside)
7964    {
7965      if (dump_file)
7966	fprintf (dump_file, ";; loop %d uses iterator\n",
7967		 loop->loop_no);
7968      return false;
7969    }
7970
7971  /* Check if start_label appears before doloop_end.  */
7972  length = 0;
7973  for (insn = loop->start_label;
7974       insn && insn != loop->loop_end;
7975       insn = NEXT_INSN (insn))
7976    {
7977      length += NONDEBUG_INSN_P (insn) ? get_attr_length (insn) : 0;
7978      if (JUMP_TABLES_IN_TEXT_SECTION
7979	  && JUMP_TABLE_DATA_P (insn))
7980	{
7981	  if (dump_file)
7982	    fprintf (dump_file, ";; loop %d has a jump table\n",
7983		     loop->loop_no);
7984	  return false;
7985	}
7986    }
7987
7988  if (!insn)
7989    {
7990      if (dump_file)
7991	fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
7992		 loop->loop_no);
7993      return false;
7994    }
7995
7996  loop->length = length;
7997  if (loop->length > ARC_MAX_LOOP_LENGTH)
7998    {
7999      if (dump_file)
8000	fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
8001      return false;
8002    }
8003  else if (!loop->length)
8004    {
8005      if (dump_file)
8006	fprintf (dump_file, ";; loop %d is empty\n", loop->loop_no);
8007      return false;
8008    }
8009
8010  /* Check if we use a register or not.	 */
8011  if (!REG_P (loop->iter_reg))
8012    {
8013      if (dump_file)
8014	fprintf (dump_file, ";; loop %d iterator is MEM\n",
8015		 loop->loop_no);
8016      return false;
8017    }
8018
8019  /* Check if we use a register or not.	 */
8020  if (!REG_P (loop->iter_reg))
8021    {
8022      if (dump_file)
8023	fprintf (dump_file, ";; loop %d iterator is MEM\n",
8024		 loop->loop_no);
8025      return false;
8026    }
8027
8028  /* Check if loop register is lpcount.  */
8029  if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg)) != LP_COUNT)
8030    {
8031      if (dump_file)
8032        fprintf (dump_file, ";; loop %d doesn't use lp_count as loop"
8033		 " iterator\n",
8034                 loop->loop_no);
8035      /* This loop doesn't use the lp_count, check though if we can
8036	 fix it.  */
8037      if (TEST_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT)
8038	  /* In very unique cases we may have LP_COUNT alive.  */
8039	  || (loop->incoming_src
8040	      && REGNO_REG_SET_P (df_get_live_out (loop->incoming_src),
8041				  LP_COUNT)))
8042	{
8043	  if (dump_file)
8044	    fprintf (dump_file, ";; loop %d, lp_count is alive", loop->loop_no);
8045	  return false;
8046	}
8047      else
8048	need_fix = true;
8049    }
8050
8051  /* Check for control like instruction as the last instruction of a
8052     ZOL.  */
8053  bb = loop->tail;
8054  last_insn = PREV_INSN (loop->loop_end);
8055
8056  while (1)
8057    {
8058      for (; last_insn != BB_HEAD (bb);
8059	   last_insn = PREV_INSN (last_insn))
8060	if (NONDEBUG_INSN_P (last_insn))
8061	  break;
8062
8063      if (last_insn != BB_HEAD (bb))
8064	break;
8065
8066      if (single_pred_p (bb)
8067	  && single_pred_edge (bb)->flags & EDGE_FALLTHRU
8068	  && single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun))
8069	{
8070	  bb = single_pred (bb);
8071	  last_insn = BB_END (bb);
8072	  continue;
8073	}
8074      else
8075	{
8076	  last_insn = NULL;
8077	  break;
8078	}
8079    }
8080
8081  if (!last_insn)
8082    {
8083      if (dump_file)
8084	fprintf (dump_file, ";; loop %d has no last instruction\n",
8085		 loop->loop_no);
8086      return false;
8087    }
8088
8089  if ((TARGET_ARC600_FAMILY || TARGET_HS)
8090      && INSN_P (last_insn)
8091      && (JUMP_P (last_insn) || CALL_P (last_insn)
8092	  || GET_CODE (PATTERN (last_insn)) == SEQUENCE
8093	  /* At this stage we can have (insn (clobber (mem:BLK
8094	     (reg)))) instructions, ignore them.  */
8095	  || (GET_CODE (PATTERN (last_insn)) != CLOBBER
8096	      && (get_attr_type (last_insn) == TYPE_BRCC
8097		  || get_attr_type (last_insn) == TYPE_BRCC_NO_DELAY_SLOT))))
8098    {
8099      if (loop->length + 2 > ARC_MAX_LOOP_LENGTH)
8100	{
8101	  if (dump_file)
8102	    fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
8103	  return false;
8104	}
8105      if (dump_file)
8106	fprintf (dump_file, ";; loop %d has a control like last insn; "
8107		 "add a nop\n",
8108		 loop->loop_no);
8109
8110      last_insn = emit_insn_after (gen_nopv (), last_insn);
8111    }
8112
8113  if (LABEL_P (last_insn))
8114    {
8115      if (dump_file)
8116	fprintf (dump_file, ";; loop %d has a label as last insn; "
8117		 "add a nop\n",
8118		 loop->loop_no);
8119      last_insn = emit_insn_after (gen_nopv (), last_insn);
8120    }
8121
8122  /* SAVE_NOTE is used by haifa scheduler.  However, we are after it
8123     and we can use it to indicate the last ZOL instruction cannot be
8124     part of a delay slot.  */
8125  add_reg_note (last_insn, REG_SAVE_NOTE, GEN_INT (2));
8126
8127  loop->last_insn = last_insn;
8128
8129  /* Get the loop iteration register.  */
8130  iter_reg = loop->iter_reg;
8131
8132  gcc_assert (REG_P (iter_reg));
8133
8134  entry_edge = NULL;
8135
8136  FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
8137    if (entry_edge->flags & EDGE_FALLTHRU)
8138      break;
8139
8140  if (entry_edge == NULL)
8141    {
8142      if (dump_file)
8143	fprintf (dump_file, ";; loop %d has no fallthru edge jumping "
8144		 "into the loop\n",
8145		 loop->loop_no);
8146      return false;
8147    }
8148  /* The loop is good.  */
8149  end_label = gen_label_rtx ();
8150  loop->end_label = end_label;
8151
8152  /* Place the zero_cost_loop_start instruction before the loop.  */
8153  entry_bb = entry_edge->src;
8154
8155  start_sequence ();
8156
8157  if (need_fix)
8158    {
8159      /* The loop uses a R-register, but the lp_count is free, thus
8160	 use lp_count.  */
8161      emit_insn (gen_rtx_SET (lp_reg, iter_reg));
8162      SET_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT);
8163      iter_reg = lp_reg;
8164      if (dump_file)
8165	{
8166	  fprintf (dump_file, ";; fix loop %d to use lp_count\n",
8167		   loop->loop_no);
8168	}
8169    }
8170
8171  insn = emit_insn (gen_arc_lp (loop->start_label,
8172				loop->end_label));
8173
8174  seq = get_insns ();
8175  end_sequence ();
8176
8177  entry_after = BB_END (entry_bb);
8178  if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1
8179      || !entry_after)
8180    {
8181      basic_block new_bb;
8182      edge e;
8183      edge_iterator ei;
8184
8185      emit_insn_before (seq, BB_HEAD (loop->head));
8186      seq = emit_label_before (gen_label_rtx (), seq);
8187      new_bb = create_basic_block (seq, insn, entry_bb);
8188      FOR_EACH_EDGE (e, ei, loop->incoming)
8189	{
8190	  if (!(e->flags & EDGE_FALLTHRU))
8191	    redirect_edge_and_branch_force (e, new_bb);
8192	  else
8193	    redirect_edge_succ (e, new_bb);
8194	}
8195
8196      make_edge (new_bb, loop->head, 0);
8197    }
8198  else
8199    {
8200#if 0
8201      while (DEBUG_INSN_P (entry_after)
8202	     || (NOTE_P (entry_after)
8203		 && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK
8204		 /* Make sure we don't split a call and its corresponding
8205		    CALL_ARG_LOCATION note.  */
8206		 && NOTE_KIND (entry_after) != NOTE_INSN_CALL_ARG_LOCATION))
8207        entry_after = NEXT_INSN (entry_after);
8208#endif
8209      entry_after = next_nonnote_insn_bb (entry_after);
8210
8211      gcc_assert (entry_after);
8212      emit_insn_before (seq, entry_after);
8213    }
8214
8215  /* Insert the loop end label before the last instruction of the
8216     loop.  */
8217  emit_label_after (end_label, loop->last_insn);
8218  /* Make sure we mark the begining and end label as used.  */
8219  LABEL_NUSES (loop->end_label)++;
8220  LABEL_NUSES (loop->start_label)++;
8221
8222  return true;
8223}
8224
8225/* A callback for the hw-doloop pass.  This function examines INSN; if
8226   it is a loop_end pattern we recognize, return the reg rtx for the
8227   loop counter.  Otherwise, return NULL_RTX.  */
8228
8229static rtx
8230hwloop_pattern_reg (rtx_insn *insn)
8231{
8232  rtx reg;
8233
8234  if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
8235    return NULL_RTX;
8236
8237  reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
8238  if (!REG_P (reg))
8239    return NULL_RTX;
8240  return reg;
8241}
8242
8243static struct hw_doloop_hooks arc_doloop_hooks =
8244{
8245  hwloop_pattern_reg,
8246  hwloop_optimize,
8247  hwloop_fail
8248};
8249
8250/* Run from machine_dependent_reorg, this pass looks for doloop_end insns
8251   and tries to rewrite the RTL of these loops so that proper Blackfin
8252   hardware loops are generated.  */
8253
8254static void
8255arc_reorg_loops (void)
8256{
8257  reorg_loops (true, &arc_doloop_hooks);
8258}
8259
8260/* Scan all calls and add symbols to be emitted in the jli section if
8261   needed.  */
8262
8263static void
8264jli_call_scan (void)
8265{
8266  rtx_insn *insn;
8267
8268  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8269    {
8270      if (!CALL_P (insn))
8271	continue;
8272
8273      rtx pat = PATTERN (insn);
8274      if (GET_CODE (pat) == COND_EXEC)
8275	pat = COND_EXEC_CODE (pat);
8276      pat =  XVECEXP (pat, 0, 0);
8277      if (GET_CODE (pat) == SET)
8278	pat = SET_SRC (pat);
8279
8280      pat = XEXP (XEXP (pat, 0), 0);
8281      if (GET_CODE (pat) == SYMBOL_REF
8282	  && arc_is_jli_call_p (pat))
8283	arc_add_jli_section (pat);
8284    }
8285}
8286
8287/* Add padding if necessary to avoid a mispredict.  A return could
8288   happen immediately after the function start.  A call/return and
8289   return/return must be 6 bytes apart to avoid mispredict.  */
8290
8291static void
8292pad_return (void)
8293{
8294  rtx_insn *insn;
8295  long offset;
8296
8297  if (!TARGET_PAD_RETURN)
8298    return;
8299
8300  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8301    {
8302      rtx_insn *prev0 = prev_active_insn (insn);
8303      bool wantlong = false;
8304
8305      if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) != SIMPLE_RETURN)
8306	continue;
8307
8308      if (!prev0)
8309	{
8310	  prev0 = emit_insn_before (gen_nopv (), insn);
8311	  /* REG_SAVE_NOTE is used by Haifa scheduler, we are in reorg
8312	     so it is safe to reuse it for forcing a particular length
8313	     for an instruction.  */
8314	  add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
8315	  emit_insn_before (gen_nopv (), insn);
8316	  continue;
8317	}
8318      offset = get_attr_length (prev0);
8319
8320      if (get_attr_length (prev0) == 2
8321	  && get_attr_iscompact (prev0) != ISCOMPACT_TRUE)
8322	{
8323	  /* Force long version of the insn.  */
8324	  wantlong = true;
8325	  offset += 2;
8326	}
8327
8328     rtx_insn *prev = prev_active_insn (prev0);
8329      if (prev)
8330	offset += get_attr_length (prev);
8331
8332      prev = prev_active_insn (prev);
8333      if (prev)
8334	offset += get_attr_length (prev);
8335
8336      switch (offset)
8337	{
8338	case 2:
8339	  prev = emit_insn_before (gen_nopv (), insn);
8340	  add_reg_note (prev, REG_SAVE_NOTE, GEN_INT (1));
8341	  break;
8342	case 4:
8343	  emit_insn_before (gen_nopv (), insn);
8344	  break;
8345	default:
8346	  continue;
8347	}
8348
8349      if (wantlong)
8350	add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
8351
8352      /* Emit a blockage to avoid delay slot scheduling.  */
8353      emit_insn_before (gen_blockage (), insn);
8354    }
8355}
8356
8357static int arc_reorg_in_progress = 0;
8358
8359/* ARC's machince specific reorg function.  */
8360
8361static void
8362arc_reorg (void)
8363{
8364  rtx_insn *insn;
8365  rtx pattern;
8366  rtx pc_target;
8367  long offset;
8368  int changed;
8369
8370  cfun->machine->arc_reorg_started = 1;
8371  arc_reorg_in_progress = 1;
8372
8373  compute_bb_for_insn ();
8374
8375  df_analyze ();
8376
8377  /* Doloop optimization.  */
8378  arc_reorg_loops ();
8379
8380  workaround_arc_anomaly ();
8381  jli_call_scan ();
8382  pad_return ();
8383
8384/* FIXME: should anticipate ccfsm action, generate special patterns for
8385   to-be-deleted branches that have no delay slot and have at least the
8386   length of the size increase forced on other insns that are conditionalized.
8387   This can also have an insn_list inside that enumerates insns which are
8388   not actually conditionalized because the destinations are dead in the
8389   not-execute case.
8390   Could also tag branches that we want to be unaligned if they get no delay
8391   slot, or even ones that we don't want to do delay slot sheduling for
8392   because we can unalign them.
8393
8394   However, there are cases when conditional execution is only possible after
8395   delay slot scheduling:
8396
8397   - If a delay slot is filled with a nocond/set insn from above, the previous
8398     basic block can become elegible for conditional execution.
8399   - If a delay slot is filled with a nocond insn from the fall-through path,
8400     the branch with that delay slot can become eligble for conditional
8401     execution (however, with the same sort of data flow analysis that dbr
8402     does, we could have figured out before that we don't need to
8403     conditionalize this insn.)
8404     - If a delay slot insn is filled with an insn from the target, the
8405       target label gets its uses decremented (even deleted if falling to zero),
8406   thus possibly creating more condexec opportunities there.
8407   Therefore, we should still be prepared to apply condexec optimization on
8408   non-prepared branches if the size increase of conditionalized insns is no
8409   more than the size saved from eliminating the branch.  An invocation option
8410   could also be used to reserve a bit of extra size for condbranches so that
8411   this'll work more often (could also test in arc_reorg if the block is
8412   'close enough' to be eligible for condexec to make this likely, and
8413   estimate required size increase).  */
8414  /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible.  */
8415  if (TARGET_NO_BRCC_SET)
8416    return;
8417
8418  do
8419    {
8420      init_insn_lengths();
8421      changed = 0;
8422
8423      if (optimize > 1 && !TARGET_NO_COND_EXEC)
8424	{
8425	  arc_ifcvt ();
8426	  unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish;
8427	  df_finish_pass ((flags & TODO_df_verify) != 0);
8428
8429	  if (dump_file)
8430	    {
8431	      fprintf (dump_file, ";; After if conversion:\n\n");
8432	      print_rtl (dump_file, get_insns ());
8433	    }
8434	}
8435
8436      /* Call shorten_branches to calculate the insn lengths.  */
8437      shorten_branches (get_insns());
8438      cfun->machine->ccfsm_current_insn = NULL_RTX;
8439
8440      if (!INSN_ADDRESSES_SET_P())
8441	  fatal_error (input_location,
8442		       "insn addresses not set after shorten_branches");
8443
8444      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8445	{
8446	  rtx label;
8447	  enum attr_type insn_type;
8448
8449	  /* If a non-jump insn (or a casesi jump table), continue.  */
8450	  if (GET_CODE (insn) != JUMP_INSN ||
8451	      GET_CODE (PATTERN (insn)) == ADDR_VEC
8452	      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
8453	    continue;
8454
8455	  /* If we already have a brcc, note if it is suitable for brcc_s.
8456	     Be a bit generous with the brcc_s range so that we can take
8457	     advantage of any code shortening from delay slot scheduling.  */
8458	  if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch)
8459	    {
8460	      rtx pat = PATTERN (insn);
8461	      rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0);
8462	      rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0);
8463
8464	      offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
8465	      if ((offset >= -140 && offset < 140)
8466		  && rtx_equal_p (XEXP (op, 1), const0_rtx)
8467		  && compact_register_operand (XEXP (op, 0), VOIDmode)
8468		  && equality_comparison_operator (op, VOIDmode))
8469		PUT_MODE (*ccp, CC_Zmode);
8470	      else if (GET_MODE (*ccp) == CC_Zmode)
8471		PUT_MODE (*ccp, CC_ZNmode);
8472	      continue;
8473	    }
8474	  if ((insn_type =  get_attr_type (insn)) == TYPE_BRCC
8475	      || insn_type == TYPE_BRCC_NO_DELAY_SLOT)
8476	    continue;
8477
8478	  /* OK. so we have a jump insn.  */
8479	  /* We need to check that it is a bcc.  */
8480	  /* Bcc => set (pc) (if_then_else ) */
8481	  pattern = PATTERN (insn);
8482	  if (GET_CODE (pattern) != SET
8483	      || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
8484	      || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1)))
8485	    continue;
8486
8487	  /* Now check if the jump is beyond the s9 range.  */
8488	  if (CROSSING_JUMP_P (insn))
8489	    continue;
8490	  offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
8491
8492	  if(offset > 253 || offset < -254)
8493	    continue;
8494
8495	  pc_target = SET_SRC (pattern);
8496
8497	  /* Avoid FPU instructions.  */
8498	  if ((GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPUmode)
8499	      || (GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPUEmode)
8500	      || (GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPU_UNEQmode))
8501	    continue;
8502
8503	  /* Now go back and search for the set cc insn.  */
8504
8505	  label = XEXP (pc_target, 1);
8506
8507	    {
8508	      rtx pat;
8509	      rtx_insn *scan, *link_insn = NULL;
8510
8511	      for (scan = PREV_INSN (insn);
8512		   scan && GET_CODE (scan) != CODE_LABEL;
8513		   scan = PREV_INSN (scan))
8514		{
8515		  if (! INSN_P (scan))
8516		    continue;
8517		  pat = PATTERN (scan);
8518		  if (GET_CODE (pat) == SET
8519		      && cc_register (SET_DEST (pat), VOIDmode))
8520		    {
8521		      link_insn = scan;
8522		      break;
8523		    }
8524		}
8525	      if (!link_insn)
8526		continue;
8527	      else
8528		{
8529		  /* Check if this is a data dependency.  */
8530		  rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
8531		  rtx cmp0, cmp1;
8532
8533		  /* Make sure we can use it for brcc insns.  */
8534		  if (find_reg_note (link_insn, REG_SAVE_NOTE, GEN_INT (3)))
8535		    continue;
8536
8537		  /* Ok this is the set cc. copy args here.  */
8538		  op = XEXP (pc_target, 0);
8539
8540		  op0 = cmp0 = XEXP (SET_SRC (pat), 0);
8541		  op1 = cmp1 = XEXP (SET_SRC (pat), 1);
8542		  if (GET_CODE (op0) == ZERO_EXTRACT
8543		      && XEXP (op0, 1) == const1_rtx
8544		      && (GET_CODE (op) == EQ
8545			  || GET_CODE (op) == NE))
8546		    {
8547		      /* btst / b{eq,ne} -> bbit{0,1} */
8548		      op0 = XEXP (cmp0, 0);
8549		      op1 = XEXP (cmp0, 2);
8550		    }
8551		  else if (!register_operand (op0, VOIDmode)
8552			  || !general_operand (op1, VOIDmode))
8553		    continue;
8554		  /* Be careful not to break what cmpsfpx_raw is
8555		     trying to create for checking equality of
8556		     single-precision floats.  */
8557		  else if (TARGET_SPFP
8558			   && GET_MODE (op0) == SFmode
8559			   && GET_MODE (op1) == SFmode)
8560		    continue;
8561
8562		  /* None of the two cmp operands should be set between the
8563		     cmp and the branch.  */
8564		  if (reg_set_between_p (op0, link_insn, insn))
8565		    continue;
8566
8567		  if (reg_set_between_p (op1, link_insn, insn))
8568		    continue;
8569
8570		  /* Since the MODE check does not work, check that this is
8571		     CC reg's last set location before insn, and also no
8572		     instruction between the cmp and branch uses the
8573		     condition codes.  */
8574		  if ((reg_set_between_p (SET_DEST (pat), link_insn, insn))
8575		      || (reg_used_between_p (SET_DEST (pat), link_insn, insn)))
8576		    continue;
8577
8578		  /* CC reg should be dead after insn.  */
8579		  if (!find_regno_note (insn, REG_DEAD, CC_REG))
8580		    continue;
8581
8582		  op = gen_rtx_fmt_ee (GET_CODE (op),
8583				       GET_MODE (op), cmp0, cmp1);
8584		  /* If we create a LIMM where there was none before,
8585		     we only benefit if we can avoid a scheduling bubble
8586		     for the ARC600.  Otherwise, we'd only forgo chances
8587		     at short insn generation, and risk out-of-range
8588		     branches.  */
8589		  if (!brcc_nolimm_operator (op, VOIDmode)
8590		      && !long_immediate_operand (op1, VOIDmode)
8591		      && (TARGET_ARC700
8592			  || next_active_insn (link_insn) != insn))
8593		    continue;
8594
8595		  /* Emit bbit / brcc (or brcc_s if possible).
8596		     CC_Zmode indicates that brcc_s is possible.  */
8597
8598		  if (op0 != cmp0)
8599		    cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG);
8600		  else if ((offset >= -140 && offset < 140)
8601			   && rtx_equal_p (op1, const0_rtx)
8602			   && compact_register_operand (op0, VOIDmode)
8603			   && (GET_CODE (op) == EQ
8604			       || GET_CODE (op) == NE))
8605		    cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG);
8606		  else
8607		    cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG);
8608
8609		  brcc_insn
8610		    = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx);
8611		  brcc_insn = gen_rtx_SET (pc_rtx, brcc_insn);
8612		  cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx);
8613		  brcc_insn
8614		    = gen_rtx_PARALLEL
8615			(VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx));
8616		  brcc_insn = emit_jump_insn_before (brcc_insn, insn);
8617
8618		  JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn);
8619		  note = find_reg_note (insn, REG_BR_PROB, 0);
8620		  if (note)
8621		    {
8622		      XEXP (note, 1) = REG_NOTES (brcc_insn);
8623		      REG_NOTES (brcc_insn) = note;
8624		    }
8625		  note = find_reg_note (link_insn, REG_DEAD, op0);
8626		  if (note)
8627		    {
8628		      remove_note (link_insn, note);
8629		      XEXP (note, 1) = REG_NOTES (brcc_insn);
8630		      REG_NOTES (brcc_insn) = note;
8631		    }
8632		  note = find_reg_note (link_insn, REG_DEAD, op1);
8633		  if (note)
8634		    {
8635		      XEXP (note, 1) = REG_NOTES (brcc_insn);
8636		      REG_NOTES (brcc_insn) = note;
8637		    }
8638
8639		  changed = 1;
8640
8641		  /* Delete the bcc insn.  */
8642		  set_insn_deleted (insn);
8643
8644		  /* Delete the cmp insn.  */
8645		  set_insn_deleted (link_insn);
8646
8647		}
8648	    }
8649	}
8650      /* Clear out insn_addresses.  */
8651      INSN_ADDRESSES_FREE ();
8652
8653    } while (changed);
8654
8655  if (INSN_ADDRESSES_SET_P())
8656    fatal_error (input_location, "insn addresses not freed");
8657
8658  arc_reorg_in_progress = 0;
8659}
8660
8661 /* Check if the operands are valid for BRcc.d generation
8662    Valid Brcc.d patterns are
8663        Brcc.d b, c, s9
8664        Brcc.d b, u6, s9
8665
8666        For cc={GT, LE, GTU, LEU}, u6=63 cannot be allowed,
8667      since they are encoded by the assembler as {GE, LT, HS, LS} 64, which
8668      does not have a delay slot
8669
8670  Assumed precondition: Second operand is either a register or a u6 value.  */
8671
8672bool
8673valid_brcc_with_delay_p (rtx *operands)
8674{
8675  if (optimize_size && GET_MODE (operands[4]) == CC_Zmode)
8676    return false;
8677  return brcc_nolimm_operator (operands[0], VOIDmode);
8678}
8679
8680/* Implement TARGET_IN_SMALL_DATA_P.  Return true if it would be safe to
8681   access DECL using %gp_rel(...)($gp).  */
8682
8683static bool
8684arc_in_small_data_p (const_tree decl)
8685{
8686  HOST_WIDE_INT size;
8687  tree attr;
8688
8689  /* Only variables are going into small data area.  */
8690  if (TREE_CODE (decl) != VAR_DECL)
8691    return false;
8692
8693  if (TARGET_NO_SDATA_SET)
8694    return false;
8695
8696  /* Disable sdata references to weak variables.  */
8697  if (DECL_WEAK (decl))
8698    return false;
8699
8700  /* Don't put constants into the small data section: we want them to
8701     be in ROM rather than RAM.  */
8702  if (TREE_READONLY (decl))
8703    return false;
8704
8705  /* To ensure -mvolatile-cache works ld.di does not have a
8706     gp-relative variant.  */
8707  if (!TARGET_VOLATILE_CACHE_SET
8708      && TREE_THIS_VOLATILE (decl))
8709    return false;
8710
8711  /* Likewise for uncached data.  */
8712  attr = TYPE_ATTRIBUTES (TREE_TYPE (decl));
8713  if (lookup_attribute ("uncached", attr))
8714    return false;
8715
8716  /* and for aux regs.  */
8717  attr = DECL_ATTRIBUTES (decl);
8718  if (lookup_attribute ("aux", attr))
8719    return false;
8720
8721  if (DECL_SECTION_NAME (decl) != 0)
8722    {
8723      const char *name = DECL_SECTION_NAME (decl);
8724      if (strcmp (name, ".sdata") == 0
8725	  || strcmp (name, ".sbss") == 0)
8726	return true;
8727    }
8728  /* If it's not public, there's no need to put it in the small data
8729     section.  */
8730  else if (TREE_PUBLIC (decl))
8731    {
8732      size = int_size_in_bytes (TREE_TYPE (decl));
8733      return (size > 0 && size <= g_switch_value);
8734    }
8735  return false;
8736}
8737
8738/* Return true if OP is an acceptable memory operand for ARCompact
8739   16-bit gp-relative load instructions.
8740*/
8741/* volatile cache option still to be handled.  */
8742
8743bool
8744compact_sda_memory_operand (rtx op, machine_mode mode, bool short_p)
8745{
8746  rtx addr;
8747  int size;
8748  int align = 0;
8749  int mask = 0;
8750
8751  /* Eliminate non-memory operations.  */
8752  if (GET_CODE (op) != MEM)
8753    return false;
8754
8755  if (mode == VOIDmode)
8756    mode = GET_MODE (op);
8757
8758  size = GET_MODE_SIZE (mode);
8759
8760  /* dword operations really put out 2 instructions, so eliminate them.  */
8761  if (size > UNITS_PER_WORD)
8762    return false;
8763
8764  /* Decode the address now.  */
8765  addr = XEXP (op, 0);
8766
8767  if (!legitimate_small_data_address_p (addr, mode))
8768    return false;
8769
8770  if (!short_p || size == 1)
8771    return true;
8772
8773  /* Now check for the alignment, the short loads using gp require the
8774     addresses to be aligned.  */
8775  align = get_symbol_alignment (addr);
8776  switch (mode)
8777    {
8778    case E_HImode:
8779      mask = 1;
8780      break;
8781    default:
8782      mask = 3;
8783      break;
8784    }
8785
8786  if (align && ((align & mask) == 0))
8787    return true;
8788  return false;
8789}
8790
8791/* Return TRUE if PAT is accessing an aux-reg.  */
8792
8793static bool
8794arc_is_aux_reg_p (rtx pat)
8795{
8796  tree attrs = NULL_TREE;
8797  tree addr;
8798
8799  if (!MEM_P (pat))
8800    return false;
8801
8802  /* Get the memory attributes.  */
8803  addr = MEM_EXPR (pat);
8804  if (!addr)
8805    return false;
8806
8807  /* Get the attributes.  */
8808  if (TREE_CODE (addr) == VAR_DECL)
8809    attrs = DECL_ATTRIBUTES (addr);
8810  else if (TREE_CODE (addr) == MEM_REF)
8811    attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 0)));
8812  else
8813    return false;
8814
8815  if (lookup_attribute ("aux", attrs))
8816    return true;
8817  return false;
8818}
8819
8820/* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL.  */
8821
8822void
8823arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name,
8824				   unsigned HOST_WIDE_INT size,
8825				   unsigned HOST_WIDE_INT align,
8826				   unsigned HOST_WIDE_INT globalize_p)
8827{
8828  int in_small_data = arc_in_small_data_p (decl);
8829  rtx mem = decl == NULL_TREE ? NULL_RTX : DECL_RTL (decl);
8830
8831  /* Don't output aux-reg symbols.  */
8832  if (mem != NULL_RTX && MEM_P (mem)
8833      && SYMBOL_REF_P (XEXP (mem, 0))
8834      && arc_is_aux_reg_p (mem))
8835    return;
8836
8837  if (in_small_data)
8838    switch_to_section (get_named_section (NULL, ".sbss", 0));
8839  /*    named_section (0,".sbss",0); */
8840  else
8841    switch_to_section (bss_section);
8842
8843  if (globalize_p)
8844    (*targetm.asm_out.globalize_label) (stream, name);
8845
8846  ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT));
8847  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8848  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8849  ASM_OUTPUT_LABEL (stream, name);
8850
8851  if (size != 0)
8852    ASM_OUTPUT_SKIP (stream, size);
8853}
8854
8855static bool
8856arc_preserve_reload_p (rtx in)
8857{
8858  return (GET_CODE (in) == PLUS
8859	  && RTX_OK_FOR_BASE_P (XEXP (in, 0), true)
8860	  && CONST_INT_P (XEXP (in, 1))
8861	  && !((INTVAL (XEXP (in, 1)) & 511)));
8862}
8863
8864/* Implement TARGET_REGISTER_MOVE_COST.  */
8865
8866static int
8867arc_register_move_cost (machine_mode,
8868			reg_class_t from_class, reg_class_t to_class)
8869{
8870  /* Force an attempt to 'mov Dy,Dx' to spill.  */
8871  if ((TARGET_ARC700 || TARGET_EM) && TARGET_DPFP
8872      && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS)
8873    return 100;
8874
8875  return 2;
8876}
8877
8878/* Emit code for an addsi3 instruction with OPERANDS.
8879   COND_P indicates if this will use conditional execution.
8880   Return the length of the instruction.
8881   If OUTPUT_P is false, don't actually output the instruction, just return
8882   its length.  */
8883int
8884arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
8885{
8886  char format[35];
8887
8888  int match = operands_match_p (operands[0], operands[1]);
8889  int match2 = operands_match_p (operands[0], operands[2]);
8890  int intval = (REG_P (operands[2]) ? 1
8891		: CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057);
8892  int neg_intval = -intval;
8893  int short_0 = satisfies_constraint_Rcq (operands[0]);
8894  int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1]));
8895  int ret = 0;
8896
8897#define REG_H_P(OP) (REG_P (OP) && ((TARGET_V2 && REGNO (OP) <= 31	\
8898				     && REGNO (OP) != 30)		\
8899				    || !TARGET_V2))
8900
8901#define ADDSI_OUTPUT1(FORMAT) do {\
8902  if (output_p) \
8903    output_asm_insn (FORMAT, operands);\
8904  return ret; \
8905} while (0)
8906#define ADDSI_OUTPUT(LIST) do {\
8907  if (output_p) \
8908    sprintf LIST;\
8909  ADDSI_OUTPUT1 (format);\
8910  return ret; \
8911} while (0)
8912
8913  /* First try to emit a 16 bit insn.  */
8914  ret = 2;
8915  if (!cond_p
8916      /* If we are actually about to output this insn, don't try a 16 bit
8917	 variant if we already decided that we don't want that
8918	 (I.e. we upsized this insn to align some following insn.)
8919	 E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM -
8920	 but add1 r0,sp,35 doesn't.  */
8921      && (!output_p || (get_attr_length (current_output_insn) & 2)))
8922    {
8923      /* Generate add_s a,b,c; add_s b,b,u7; add_s c,b,u3; add_s b,b,h
8924	 patterns.  */
8925      if (short_p
8926	  && ((REG_H_P (operands[2])
8927	       && (match || satisfies_constraint_Rcq (operands[2])))
8928	      || (CONST_INT_P (operands[2])
8929		  && ((unsigned) intval <= (match ? 127 : 7)))))
8930	ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;1");
8931
8932      /* Generate add_s b,b,h patterns.  */
8933      if (short_0 && match2 && REG_H_P (operands[1]))
8934	ADDSI_OUTPUT1 ("add%? %0,%2,%1 ;2");
8935
8936      /* Generate add_s b,sp,u7; add_s sp,sp,u7 patterns.  */
8937      if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM)
8938	  && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124))
8939	ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;3");
8940
8941      if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7))
8942	  || (REGNO (operands[0]) == STACK_POINTER_REGNUM
8943	      && match && !(neg_intval & ~124)))
8944	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2 ;4");
8945
8946      /* Generate add_s h,h,s3 patterns.  */
8947      if (REG_H_P (operands[0]) && match && TARGET_V2
8948	  && CONST_INT_P (operands[2]) && ((intval>= -1) && (intval <= 6)))
8949	ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;5");
8950
8951      /* Generate add_s r0,b,u6; add_s r1,b,u6 patterns.  */
8952      if (TARGET_CODE_DENSITY && REG_P (operands[0]) && REG_P (operands[1])
8953	  && ((REGNO (operands[0]) == 0) || (REGNO (operands[0]) == 1))
8954	  && satisfies_constraint_Rcq (operands[1])
8955	  && satisfies_constraint_L (operands[2]))
8956	ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;6");
8957    }
8958
8959  /* Now try to emit a 32 bit insn without long immediate.  */
8960  ret = 4;
8961  if (!match && match2 && REG_P (operands[1]))
8962    ADDSI_OUTPUT1 ("add%? %0,%2,%1");
8963  if (match || !cond_p)
8964    {
8965      int limit = (match && !cond_p) ? 0x7ff : 0x3f;
8966      int range_factor = neg_intval & intval;
8967      int shift;
8968
8969      if (intval == (HOST_WIDE_INT) (HOST_WIDE_INT_M1U << 31))
8970	ADDSI_OUTPUT1 ("bxor%? %0,%1,31");
8971
8972      /* If we can use a straight add / sub instead of a {add,sub}[123] of
8973	 same size, do, so - the insn latency is lower.  */
8974      /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but
8975	 0x800 is not.  */
8976      if ((intval >= 0 && intval <= limit)
8977	       || (intval == -0x800 && limit == 0x7ff))
8978	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
8979      else if ((intval < 0 && neg_intval <= limit)
8980	       || (intval == 0x800 && limit == 0x7ff))
8981	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
8982      shift = range_factor >= 8 ? 3 : (range_factor >> 1);
8983      gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3);
8984      gcc_assert ((((1 << shift) - 1) & intval) == 0);
8985      if (((intval < 0 && intval != -0x4000)
8986	   /* sub[123] is slower than add_s / sub, only use it if it
8987	      avoids a long immediate.  */
8988	   && neg_intval <= limit << shift)
8989	  || (intval == 0x4000 && limit == 0x7ff))
8990	ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d",
8991		       shift, neg_intval >> shift));
8992      else if ((intval >= 0 && intval <= limit << shift)
8993	       || (intval == -0x4000 && limit == 0x7ff))
8994	ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift));
8995    }
8996  /* Try to emit a 16 bit opcode with long immediate.  */
8997  ret = 6;
8998  if (short_p && match)
8999    ADDSI_OUTPUT1 ("add%? %0,%1,%2");
9000
9001  /* We have to use a 32 bit opcode, and with a long immediate.  */
9002  ret = 8;
9003  ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%2");
9004}
9005
9006/* Emit code for an commutative_cond_exec instruction with OPERANDS.
9007   Return the length of the instruction.
9008   If OUTPUT_P is false, don't actually output the instruction, just return
9009   its length.  */
9010int
9011arc_output_commutative_cond_exec (rtx *operands, bool output_p)
9012{
9013  enum rtx_code commutative_op = GET_CODE (operands[3]);
9014  const char *pat = NULL;
9015
9016  /* Canonical rtl should not have a constant in the first operand position.  */
9017  gcc_assert (!CONSTANT_P (operands[1]));
9018
9019  switch (commutative_op)
9020    {
9021      case AND:
9022	if (satisfies_constraint_C1p (operands[2]))
9023	  pat = "bmsk%? %0,%1,%Z2";
9024	else if (satisfies_constraint_C2p (operands[2]))
9025	  {
9026	    operands[2] = GEN_INT ((~INTVAL (operands[2])));
9027	    pat = "bmskn%? %0,%1,%Z2";
9028	  }
9029	else if (satisfies_constraint_Ccp (operands[2]))
9030	  pat = "bclr%? %0,%1,%M2";
9031	else if (satisfies_constraint_CnL (operands[2]))
9032	  pat = "bic%? %0,%1,%n2-1";
9033	break;
9034      case IOR:
9035	if (satisfies_constraint_C0p (operands[2]))
9036	  pat = "bset%? %0,%1,%z2";
9037	break;
9038      case XOR:
9039	if (satisfies_constraint_C0p (operands[2]))
9040	  pat = "bxor%? %0,%1,%z2";
9041	break;
9042      case PLUS:
9043	return arc_output_addsi (operands, true, output_p);
9044      default: break;
9045    }
9046  if (output_p)
9047    output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands);
9048  if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2]))
9049    return 4;
9050  return 8;
9051}
9052
9053/* Helper function of arc_expand_cpymem.  ADDR points to a chunk of memory.
9054   Emit code and return an potentially modified address such that offsets
9055   up to SIZE are can be added to yield a legitimate address.
9056   if REUSE is set, ADDR is a register that may be modified.  */
9057
9058static rtx
9059force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
9060{
9061  rtx base = addr;
9062  rtx offs = const0_rtx;
9063
9064  if (GET_CODE (base) == PLUS)
9065    {
9066      offs = XEXP (base, 1);
9067      base = XEXP (base, 0);
9068    }
9069  if (!REG_P (base)
9070      || (REGNO (base) != STACK_POINTER_REGNUM
9071	  && REGNO_PTR_FRAME_P (REGNO (base)))
9072      || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs))
9073      || !SMALL_INT (INTVAL (offs) + size))
9074    {
9075      if (reuse)
9076	emit_insn (gen_add2_insn (addr, offs));
9077      else
9078	addr = copy_to_mode_reg (Pmode, addr);
9079    }
9080  return addr;
9081}
9082
9083/* Like move_by_pieces, but take account of load latency, and actual
9084   offset ranges.  Return true on success.  */
9085
9086bool
9087arc_expand_cpymem (rtx *operands)
9088{
9089  rtx dst = operands[0];
9090  rtx src = operands[1];
9091  rtx dst_addr, src_addr;
9092  HOST_WIDE_INT size;
9093  int align = INTVAL (operands[3]);
9094  unsigned n_pieces;
9095  int piece = align;
9096  rtx store[2];
9097  rtx tmpx[2];
9098  int i;
9099
9100  if (!CONST_INT_P (operands[2]))
9101    return false;
9102  size = INTVAL (operands[2]);
9103  /* move_by_pieces_ninsns is static, so we can't use it.  */
9104  if (align >= 4)
9105    {
9106      if (TARGET_LL64)
9107	n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1);
9108      else
9109	n_pieces = (size + 2) / 4U + (size & 1);
9110    }
9111  else if (align == 2)
9112    n_pieces = (size + 1) / 2U;
9113  else
9114    n_pieces = size;
9115  if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
9116    return false;
9117  /* Force 32 bit aligned and larger datum to use 64 bit transfers, if
9118     possible.  */
9119  if (TARGET_LL64 && (piece >= 4) && (size >= 8))
9120    piece = 8;
9121  else if (piece > 4)
9122    piece = 4;
9123  dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
9124  src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
9125  store[0] = store[1] = NULL_RTX;
9126  tmpx[0] = tmpx[1] = NULL_RTX;
9127  for (i = 0; size > 0; i ^= 1, size -= piece)
9128    {
9129      rtx tmp;
9130      machine_mode mode;
9131
9132      while (piece > size)
9133	piece >>= 1;
9134      mode = smallest_int_mode_for_size (piece * BITS_PER_UNIT);
9135      /* If we don't re-use temporaries, the scheduler gets carried away,
9136	 and the register pressure gets unnecessarily high.  */
9137      if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode)
9138	tmp = tmpx[i];
9139      else
9140	tmpx[i] = tmp = gen_reg_rtx (mode);
9141      dst_addr = force_offsettable (dst_addr, piece, 1);
9142      src_addr = force_offsettable (src_addr, piece, 1);
9143      if (store[i])
9144	emit_insn (store[i]);
9145      emit_move_insn (tmp, change_address (src, mode, src_addr));
9146      store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp);
9147      dst_addr = plus_constant (Pmode, dst_addr, piece);
9148      src_addr = plus_constant (Pmode, src_addr, piece);
9149    }
9150  if (store[i])
9151    emit_insn (store[i]);
9152  if (store[i^1])
9153    emit_insn (store[i^1]);
9154  return true;
9155}
9156
9157static bool
9158arc_get_aux_arg (rtx pat, int *auxr)
9159{
9160  tree attr, addr = MEM_EXPR (pat);
9161  if (TREE_CODE (addr) != VAR_DECL)
9162    return false;
9163
9164  attr = DECL_ATTRIBUTES (addr);
9165  if (lookup_attribute ("aux", attr))
9166    {
9167      tree arg = TREE_VALUE (attr);
9168      if (arg)
9169	{
9170	  *auxr = TREE_INT_CST_LOW (TREE_VALUE (arg));
9171	  return true;
9172	}
9173    }
9174
9175  return false;
9176}
9177
9178/* Prepare operands for move in MODE.  Return true iff the move has
9179   been emitted.  */
9180
9181bool
9182prepare_move_operands (rtx *operands, machine_mode mode)
9183{
9184  if ((MEM_P (operands[0]) || MEM_P (operands[1]))
9185      && SCALAR_INT_MODE_P (mode))
9186    {
9187      /* First handle aux attribute.  */
9188      if (mode == SImode)
9189	{
9190	  rtx tmp;
9191	  int auxr = 0;
9192	  if (MEM_P (operands[0]) && arc_is_aux_reg_p (operands[0]))
9193	    {
9194	      /* Save operation.  */
9195	      if (arc_get_aux_arg (operands[0], &auxr))
9196		{
9197		  tmp = gen_reg_rtx (SImode);
9198		  emit_move_insn (tmp, GEN_INT (auxr));
9199		}
9200	      else
9201		tmp = XEXP (operands[0], 0);
9202
9203	      operands[1] = force_reg (SImode, operands[1]);
9204	      emit_insn (gen_rtx_UNSPEC_VOLATILE
9205			 (VOIDmode, gen_rtvec (2, operands[1], tmp),
9206			  VUNSPEC_ARC_SR));
9207	      return true;
9208	    }
9209	  if (MEM_P (operands[1]) && arc_is_aux_reg_p (operands[1]))
9210	    {
9211	      if (arc_get_aux_arg (operands[1], &auxr))
9212		{
9213		  tmp = gen_reg_rtx (SImode);
9214		  emit_move_insn (tmp, GEN_INT (auxr));
9215		}
9216	      else
9217		{
9218		  tmp = XEXP (operands[1], 0);
9219		  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
9220		}
9221	      /* Load operation.  */
9222	      gcc_assert (REG_P (operands[0]));
9223	      emit_insn (gen_rtx_SET (operands[0],
9224				      gen_rtx_UNSPEC_VOLATILE
9225				      (SImode, gen_rtvec (1, tmp),
9226				       VUNSPEC_ARC_LR)));
9227	      return true;
9228	    }
9229	}
9230      /* Second, we check for the uncached.  */
9231      if (arc_is_uncached_mem_p (operands[0]))
9232	{
9233	  if (!REG_P (operands[1]))
9234	    operands[1] = force_reg (mode, operands[1]);
9235	  emit_insn (gen_rtx_UNSPEC_VOLATILE
9236		     (VOIDmode, gen_rtvec (2, operands[0], operands[1]),
9237		      VUNSPEC_ARC_STDI));
9238	  return true;
9239	}
9240      if (arc_is_uncached_mem_p (operands[1]))
9241	{
9242	  if (MEM_P (operands[0]))
9243	    operands[0] = force_reg (mode, operands[0]);
9244	  emit_insn (gen_rtx_SET
9245		     (operands[0],
9246		      gen_rtx_UNSPEC_VOLATILE
9247		      (mode, gen_rtvec (1, operands[1]),
9248		       VUNSPEC_ARC_LDDI)));
9249	  return true;
9250	}
9251    }
9252
9253  if (GET_CODE (operands[1]) == SYMBOL_REF)
9254    {
9255      enum tls_model model = SYMBOL_REF_TLS_MODEL (operands[1]);
9256      if (MEM_P (operands[0]))
9257	operands[1] = force_reg (mode, operands[1]);
9258      else if (model)
9259	operands[1] = arc_legitimize_tls_address (operands[1], model);
9260    }
9261
9262  operands[1] = arc_legitimize_pic_address (operands[1]);
9263
9264  /* Store instructions are limited, they only accept as address an
9265     immediate, a register or a register plus a small immediate.  */
9266  if (MEM_P (operands[0])
9267      && !move_dest_operand (operands[0], mode))
9268    {
9269      rtx tmp0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
9270      rtx tmp1 = change_address (operands[0], mode, tmp0);
9271      MEM_COPY_ATTRIBUTES (tmp1, operands[0]);
9272      operands[0] = tmp1;
9273    }
9274
9275  /* Check if it is constant but it is not legitimized.  */
9276  if (CONSTANT_P (operands[1])
9277      && !arc_legitimate_constant_p (mode, operands[1]))
9278    operands[1] = force_reg (mode, XEXP (operands[1], 0));
9279  else if (MEM_P (operands[0])
9280	   && ((CONSTANT_P (operands[1])
9281		&& !satisfies_constraint_Cm3 (operands[1]))
9282	       || MEM_P (operands[1])))
9283    operands[1] = force_reg (mode, operands[1]);
9284
9285  return false;
9286}
9287
9288/* Output a library call to a function called FNAME that has been arranged
9289   to be local to any dso.  */
9290
9291const char *
9292arc_output_libcall (const char *fname)
9293{
9294  unsigned len = strlen (fname);
9295  static char buf[64];
9296
9297  gcc_assert (len < sizeof buf - 35);
9298  if (TARGET_LONG_CALLS_SET
9299     || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ()))
9300    {
9301      if (flag_pic)
9302	sprintf (buf, "add r12,pcl,@%s@pcl\n\tjl%%!%%* [r12]", fname);
9303      else
9304	sprintf (buf, "jl%%! @%s", fname);
9305    }
9306  else
9307    sprintf (buf, "bl%%!%%* @%s", fname);
9308  return buf;
9309}
9310
9311/* Return the SImode highpart of the DImode value IN.  */
9312
9313rtx
9314disi_highpart (rtx in)
9315{
9316  return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
9317}
9318
9319/* Return length adjustment for INSN.
9320   For ARC600:
9321   A write to a core reg greater or equal to 32 must not be immediately
9322   followed by a use.  Anticipate the length requirement to insert a nop
9323   between PRED and SUCC to prevent a hazard.  */
9324
9325static int
9326arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
9327{
9328  if (!TARGET_ARC600)
9329    return 0;
9330  if (GET_CODE (PATTERN (pred)) == SEQUENCE)
9331    pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
9332  if (GET_CODE (PATTERN (succ)) == SEQUENCE)
9333    succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
9334  if (recog_memoized (pred) == CODE_FOR_mulsi_600
9335      || recog_memoized (pred) == CODE_FOR_umul_600
9336      || recog_memoized (pred) == CODE_FOR_mac_600
9337      || recog_memoized (pred) == CODE_FOR_mul64_600
9338      || recog_memoized (pred) == CODE_FOR_mac64_600
9339      || recog_memoized (pred) == CODE_FOR_umul64_600
9340      || recog_memoized (pred) == CODE_FOR_umac64_600)
9341    return 0;
9342  subrtx_iterator::array_type array;
9343  FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
9344    {
9345      const_rtx x = *iter;
9346      switch (GET_CODE (x))
9347	{
9348	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
9349	  break;
9350	default:
9351	  /* This is also fine for PRE/POST_MODIFY, because they
9352	     contain a SET.  */
9353	  continue;
9354	}
9355      rtx dest = XEXP (x, 0);
9356      /* Check if this sets an extension register.  N.B. we use 61 for the
9357	 condition codes, which is definitely not an extension register.  */
9358      if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
9359	  /* Check if the same register is used by the PAT.  */
9360	  && (refers_to_regno_p
9361	      (REGNO (dest),
9362	       REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
9363	       PATTERN (succ), 0)))
9364	return 4;
9365    }
9366  return 0;
9367}
9368
9369/* Given a rtx, check if it is an assembly instruction or not.  */
9370
9371static int
9372arc_asm_insn_p (rtx x)
9373{
9374  int i, j;
9375
9376  if (x == 0)
9377    return 0;
9378
9379  switch (GET_CODE (x))
9380    {
9381    case ASM_OPERANDS:
9382    case ASM_INPUT:
9383      return 1;
9384
9385    case SET:
9386      return arc_asm_insn_p (SET_SRC (x));
9387
9388    case PARALLEL:
9389      j = 0;
9390      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
9391	j += arc_asm_insn_p (XVECEXP (x, 0, i));
9392      if ( j > 0)
9393	return 1;
9394      break;
9395
9396    default:
9397      break;
9398    }
9399
9400  return 0;
9401}
9402
9403/* For ARC600:
9404   A write to a core reg greater or equal to 32 must not be immediately
9405   followed by a use.  Anticipate the length requirement to insert a nop
9406   between PRED and SUCC to prevent a hazard.  */
9407
9408int
9409arc_hazard (rtx_insn *pred, rtx_insn *succ)
9410{
9411  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
9412    return 0;
9413
9414  if (TARGET_ARC600)
9415    return arc600_corereg_hazard (pred, succ);
9416
9417  return 0;
9418}
9419
9420/* Return length adjustment for INSN.  */
9421
9422int
9423arc_adjust_insn_length (rtx_insn *insn, int len, bool)
9424{
9425  if (!INSN_P (insn))
9426    return len;
9427  /* We already handle sequences by ignoring the delay sequence flag.  */
9428  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9429    return len;
9430
9431  /* Check for return with but one preceding insn since function
9432     start / call.  */
9433  if (TARGET_PAD_RETURN
9434      && JUMP_P (insn)
9435      && GET_CODE (PATTERN (insn)) != ADDR_VEC
9436      && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
9437      && get_attr_type (insn) == TYPE_RETURN)
9438    {
9439      rtx_insn *prev = prev_active_insn (insn);
9440
9441      if (!prev || !(prev = prev_active_insn (prev))
9442	  || ((NONJUMP_INSN_P (prev)
9443	       && GET_CODE (PATTERN (prev)) == SEQUENCE)
9444	      ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
9445			   NON_SIBCALL)
9446	      : CALL_ATTR (prev, NON_SIBCALL)))
9447	return len + 4;
9448    }
9449  if (TARGET_ARC600)
9450    {
9451      rtx_insn *succ = next_real_insn (insn);
9452
9453      /* One the ARC600, a write to an extension register must be separated
9454	 from a read.  */
9455      if (succ && INSN_P (succ))
9456	len += arc600_corereg_hazard (insn, succ);
9457    }
9458
9459  /* Restore extracted operands - otherwise splitters like the addsi3_mixed one
9460     can go awry.  */
9461  extract_constrain_insn_cached (insn);
9462
9463  return len;
9464}
9465
9466/* Return a copy of COND from *STATEP, inverted if that is indicated by the
9467   CC field of *STATEP.  */
9468
9469static rtx
9470arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse)
9471{
9472  rtx cond = statep->cond;
9473  int raw_cc = get_arc_condition_code (cond);
9474  if (reverse)
9475    raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc);
9476
9477  if (statep->cc == raw_cc)
9478    return copy_rtx (cond);
9479
9480  gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc);
9481
9482  machine_mode ccm = GET_MODE (XEXP (cond, 0));
9483  enum rtx_code code = reverse_condition (GET_CODE (cond));
9484  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
9485    code = reverse_condition_maybe_unordered (GET_CODE (cond));
9486
9487  return gen_rtx_fmt_ee (code, GET_MODE (cond),
9488			 copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1)));
9489}
9490
9491/* Return version of PAT conditionalized with COND, which is part of INSN.
9492   ANNULLED indicates if INSN is an annulled delay-slot insn.
9493   Register further changes if necessary.  */
9494static rtx
9495conditionalize_nonjump (rtx pat, rtx cond, rtx insn, bool annulled)
9496{
9497  /* For commutative operators, we generally prefer to have
9498     the first source match the destination.  */
9499  if (GET_CODE (pat) == SET)
9500    {
9501      rtx src = SET_SRC (pat);
9502
9503      if (COMMUTATIVE_P (src))
9504	{
9505	  rtx src0 = XEXP (src, 0);
9506	  rtx src1 = XEXP (src, 1);
9507	  rtx dst = SET_DEST (pat);
9508
9509	  if (rtx_equal_p (src1, dst) && !rtx_equal_p (src0, dst)
9510	      /* Leave add_n alone - the canonical form is to
9511		 have the complex summand first.  */
9512	      && REG_P (src0))
9513	    pat = gen_rtx_SET (dst,
9514			       gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src),
9515					       src1, src0));
9516	}
9517    }
9518
9519  /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know
9520     what to do with COND_EXEC.  */
9521  if (RTX_FRAME_RELATED_P (insn))
9522    {
9523      /* If this is the delay slot insn of an anulled branch,
9524	 dwarf2out.c:scan_trace understands the anulling semantics
9525	 without the COND_EXEC.  */
9526      gcc_assert (annulled);
9527      rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat,
9528				 REG_NOTES (insn));
9529      validate_change (insn, &REG_NOTES (insn), note, 1);
9530    }
9531  pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
9532  return pat;
9533}
9534
9535/* Use the ccfsm machinery to do if conversion.  */
9536
9537static unsigned
9538arc_ifcvt (void)
9539{
9540  struct arc_ccfsm *statep = &cfun->machine->ccfsm_current;
9541
9542  memset (statep, 0, sizeof *statep);
9543  for (rtx_insn *insn = get_insns (); insn; insn = next_insn (insn))
9544    {
9545      arc_ccfsm_advance (insn, statep);
9546
9547      switch (statep->state)
9548	{
9549	case 0:
9550	  break;
9551	case 1: case 2:
9552	  {
9553	    /* Deleted branch.  */
9554	    arc_ccfsm_post_advance (insn, statep);
9555	    gcc_assert (!IN_RANGE (statep->state, 1, 2));
9556	    rtx_insn *seq = NEXT_INSN (PREV_INSN (insn));
9557	    if (GET_CODE (PATTERN (seq)) == SEQUENCE)
9558	      {
9559		rtx slot = XVECEXP (PATTERN (seq), 0, 1);
9560		rtx pat = PATTERN (slot);
9561		if (INSN_ANNULLED_BRANCH_P (insn))
9562		  {
9563		    rtx cond
9564		      = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot));
9565		    pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
9566		  }
9567		if (!validate_change (seq, &PATTERN (seq), pat, 0))
9568		  gcc_unreachable ();
9569		PUT_CODE (slot, NOTE);
9570		NOTE_KIND (slot) = NOTE_INSN_DELETED;
9571	      }
9572	    else
9573	      {
9574		set_insn_deleted (insn);
9575	      }
9576	    continue;
9577	  }
9578	case 3:
9579	  if (LABEL_P (insn)
9580	      && statep->target_label == CODE_LABEL_NUMBER (insn))
9581	    {
9582	      arc_ccfsm_post_advance (insn, statep);
9583	      if (--LABEL_NUSES (insn) == 0)
9584		delete_insn (insn);
9585	      continue;
9586	    }
9587	  /* Fall through.  */
9588	case 4: case 5:
9589	  if (!NONDEBUG_INSN_P (insn))
9590	    break;
9591
9592	  /* Conditionalized insn.  */
9593
9594	  rtx_insn *prev, *pprev;
9595	  rtx *patp, pat, cond;
9596	  bool annulled; annulled = false;
9597
9598	  /* If this is a delay slot insn in a non-annulled branch,
9599	     don't conditionalize it.  N.B., this should be fine for
9600	     conditional return too.  However, don't do this for
9601	     unconditional branches, as these would be encountered when
9602	     processing an 'else' part.  */
9603	  prev = PREV_INSN (insn);
9604	  pprev = PREV_INSN (prev);
9605	  if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn)
9606	      && JUMP_P (prev) && get_attr_cond (prev) == COND_USE)
9607	    {
9608	      if (!INSN_ANNULLED_BRANCH_P (prev))
9609		break;
9610	      annulled = true;
9611	    }
9612
9613	  patp = &PATTERN (insn);
9614	  pat = *patp;
9615	  cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn));
9616	  if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9617	    {
9618	      /* ??? don't conditionalize if all side effects are dead
9619		 in the not-execute case.  */
9620
9621	      pat = conditionalize_nonjump (pat, cond, insn, annulled);
9622	    }
9623	  else if (simplejump_p (insn))
9624	    {
9625	      patp = &SET_SRC (pat);
9626	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx);
9627	    }
9628	  else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
9629	    {
9630	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx);
9631	      pat = gen_rtx_SET (pc_rtx, pat);
9632	    }
9633	  else
9634	    gcc_unreachable ();
9635	  validate_change (insn, patp, pat, 1);
9636	  if (!apply_change_group ())
9637	    gcc_unreachable ();
9638	  if (JUMP_P (insn))
9639	    {
9640	      rtx_insn *next = next_nonnote_insn (insn);
9641	      if (GET_CODE (next) == BARRIER)
9642		delete_insn (next);
9643	      if (statep->state == 3)
9644		continue;
9645	    }
9646	  break;
9647	default:
9648	  gcc_unreachable ();
9649	}
9650      arc_ccfsm_post_advance (insn, statep);
9651    }
9652  return 0;
9653}
9654
9655/* Find annulled delay insns and convert them to use the appropriate predicate.
9656   This allows branch shortening to size up these insns properly.  */
9657
9658static unsigned
9659arc_predicate_delay_insns (void)
9660{
9661  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9662    {
9663      rtx pat, jump, dlay, src, cond, *patp;
9664      int reverse;
9665
9666      if (!NONJUMP_INSN_P (insn)
9667	  || GET_CODE (pat = PATTERN (insn)) != SEQUENCE)
9668	continue;
9669      jump = XVECEXP (pat, 0, 0);
9670      dlay = XVECEXP (pat, 0, 1);
9671      if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump))
9672	continue;
9673      /* If the branch insn does the annulling, leave the delay insn alone.  */
9674      if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay))
9675	continue;
9676      /* ??? Could also leave DLAY un-conditionalized if its target is dead
9677	 on the other path.  */
9678      gcc_assert (GET_CODE (PATTERN (jump)) == SET);
9679      gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx);
9680      src = SET_SRC (PATTERN (jump));
9681      gcc_assert (GET_CODE (src) == IF_THEN_ELSE);
9682      cond = XEXP (src, 0);
9683      if (XEXP (src, 2) == pc_rtx)
9684	reverse = 0;
9685      else if (XEXP (src, 1) == pc_rtx)
9686	reverse = 1;
9687      else
9688	gcc_unreachable ();
9689      if (reverse != !INSN_FROM_TARGET_P (dlay))
9690	{
9691	  machine_mode ccm = GET_MODE (XEXP (cond, 0));
9692	  enum rtx_code code = reverse_condition (GET_CODE (cond));
9693	  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
9694	    code = reverse_condition_maybe_unordered (GET_CODE (cond));
9695
9696	  cond = gen_rtx_fmt_ee (code, GET_MODE (cond),
9697				 copy_rtx (XEXP (cond, 0)),
9698				 copy_rtx (XEXP (cond, 1)));
9699	}
9700      else
9701	cond = copy_rtx (cond);
9702      patp = &PATTERN (dlay);
9703      pat = *patp;
9704      pat = conditionalize_nonjump (pat, cond, dlay, true);
9705      validate_change (dlay, patp, pat, 1);
9706      if (!apply_change_group ())
9707	gcc_unreachable ();
9708    }
9709  return 0;
9710}
9711
9712/* For ARC600: If a write to a core reg >=32 appears in a delay slot
9713  (other than of a forward brcc), it creates a hazard when there is a read
9714  of the same register at the branch target.  We can't know what is at the
9715  branch target of calls, and for branches, we don't really know before the
9716  end of delay slot scheduling, either.  Not only can individual instruction
9717  be hoisted out into a delay slot, a basic block can also be emptied this
9718  way, and branch and/or fall through targets be redirected.  Hence we don't
9719  want such writes in a delay slot.  */
9720
9721/* Return nonzreo iff INSN writes to an extension core register.  */
9722
9723int
9724arc_write_ext_corereg (rtx insn)
9725{
9726  subrtx_iterator::array_type array;
9727  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
9728    {
9729      const_rtx x = *iter;
9730      switch (GET_CODE (x))
9731	{
9732	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
9733	  break;
9734	default:
9735	  /* This is also fine for PRE/POST_MODIFY, because they
9736	     contain a SET.  */
9737	  continue;
9738	}
9739      const_rtx dest = XEXP (x, 0);
9740      if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61)
9741	return 1;
9742    }
9743  return 0;
9744}
9745
9746/* This is like the hook, but returns NULL when it can't / won't generate
9747   a legitimate address.  */
9748
9749static rtx
9750arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9751			  machine_mode mode)
9752{
9753  rtx addr, inner;
9754
9755  addr = x;
9756  if (GET_CODE (addr) == CONST)
9757    addr = XEXP (addr, 0);
9758
9759  if (GET_CODE (addr) == PLUS
9760      && CONST_INT_P (XEXP (addr, 1))
9761      && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
9762	   && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0)))
9763	  || (REG_P (XEXP (addr, 0))
9764	      && (INTVAL (XEXP (addr, 1)) & 252))))
9765    {
9766      HOST_WIDE_INT offs, upper;
9767      int size = GET_MODE_SIZE (mode);
9768
9769      offs = INTVAL (XEXP (addr, 1));
9770      upper = (offs + 256 * size) & ~511 * size;
9771      inner = plus_constant (Pmode, XEXP (addr, 0), upper);
9772#if 0 /* ??? this produces worse code for EEMBC idctrn01  */
9773      if (GET_CODE (x) == CONST)
9774	inner = gen_rtx_CONST (Pmode, inner);
9775#endif
9776      addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper);
9777      x = addr;
9778    }
9779  else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr))
9780    x = force_reg (Pmode, x);
9781  if (memory_address_p ((machine_mode) mode, x))
9782     return x;
9783  return NULL_RTX;
9784}
9785
9786static rtx
9787arc_legitimize_address (rtx orig_x, rtx oldx, machine_mode mode)
9788{
9789  rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode);
9790
9791  if (new_x)
9792    return new_x;
9793  return orig_x;
9794}
9795
9796static rtx
9797arc_delegitimize_address_0 (rtx op)
9798{
9799  switch (GET_CODE (op))
9800    {
9801    case CONST:
9802      return arc_delegitimize_address_0 (XEXP (op, 0));
9803
9804    case UNSPEC:
9805      switch (XINT (op, 1))
9806	{
9807	case ARC_UNSPEC_GOT:
9808	case ARC_UNSPEC_GOTOFFPC:
9809	  return XVECEXP (op, 0, 0);
9810	default:
9811	  break;
9812	}
9813      break;
9814
9815    case PLUS:
9816      {
9817	rtx t1 = arc_delegitimize_address_0 (XEXP (op, 0));
9818	rtx t2 = XEXP (op, 1);
9819
9820	if (t1 && t2)
9821	  return gen_rtx_PLUS (GET_MODE (op), t1, t2);
9822	break;
9823      }
9824
9825    default:
9826      break;
9827    }
9828  return NULL_RTX;
9829}
9830
9831static rtx
9832arc_delegitimize_address (rtx orig_x)
9833{
9834  rtx x = orig_x;
9835
9836  if (MEM_P (x))
9837    x = XEXP (x, 0);
9838
9839  x = arc_delegitimize_address_0 (x);
9840  if (!x)
9841    return orig_x;
9842
9843  if (MEM_P (orig_x))
9844    x = replace_equiv_address_nv (orig_x, x);
9845  return x;
9846}
9847
9848/* Return a REG rtx for acc1.  N.B. the gcc-internal representation may
9849   differ from the hardware register number in order to allow the generic
9850   code to correctly split the concatenation of acc1 and acc2.  */
9851
9852rtx
9853gen_acc1 (void)
9854{
9855  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57);
9856}
9857
9858/* Return a REG rtx for acc2.  N.B. the gcc-internal representation may
9859   differ from the hardware register number in order to allow the generic
9860   code to correctly split the concatenation of acc1 and acc2.  */
9861
9862rtx
9863gen_acc2 (void)
9864{
9865  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56);
9866}
9867
9868/* FIXME: a parameter should be added, and code added to final.c,
9869   to reproduce this functionality in shorten_branches.  */
9870#if 0
9871/* Return nonzero iff BRANCH should be unaligned if possible by upsizing
9872   a previous instruction.  */
9873int
9874arc_unalign_branch_p (rtx branch)
9875{
9876  rtx note;
9877
9878  if (!TARGET_UNALIGN_BRANCH)
9879    return 0;
9880  /* Do not do this if we have a filled delay slot.  */
9881  if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES
9882      && !NEXT_INSN (branch)->deleted ())
9883    return 0;
9884  note = find_reg_note (branch, REG_BR_PROB, 0);
9885  return (!note
9886	  || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note))
9887	  || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold);
9888}
9889#endif
9890
9891/* When estimating sizes during arc_reorg, when optimizing for speed, there
9892   are three reasons why we need to consider branches to be length 6:
9893   - annull-false delay slot insns are implemented using conditional execution,
9894     thus preventing short insn formation where used.
9895   - for ARC600: annul-true delay slot insns are implemented where possible
9896     using conditional execution, preventing short insn formation where used.
9897   - for ARC700: likely or somewhat likely taken branches are made long and
9898     unaligned if possible to avoid branch penalty.  */
9899
9900bool
9901arc_branch_size_unknown_p (void)
9902{
9903  return !optimize_size && arc_reorg_in_progress;
9904}
9905
9906/* The usual; we set up our machine_function data.  */
9907
9908static struct machine_function *
9909arc_init_machine_status (void)
9910{
9911  struct machine_function *machine;
9912  machine = ggc_cleared_alloc<machine_function> ();
9913  machine->fn_type = ARC_FUNCTION_UNKNOWN;
9914
9915  return machine;
9916}
9917
9918/* Implements INIT_EXPANDERS.  We just set up to call the above
9919   function.  */
9920
9921void
9922arc_init_expanders (void)
9923{
9924  init_machine_status = arc_init_machine_status;
9925}
9926
9927/* Check if OP is a proper parallel of a millicode call pattern.  OFFSET
9928   indicates a number of elements to ignore - that allows to have a
9929   sibcall pattern that starts with (return).  LOAD_P is zero for store
9930   multiple (for prologues), and one for load multiples (for epilogues),
9931   and two for load multiples where no final clobber of blink is required.
9932   We also skip the first load / store element since this is supposed to
9933   be checked in the instruction pattern.  */
9934
9935int
9936arc_check_millicode (rtx op, int offset, int load_p)
9937{
9938  int len = XVECLEN (op, 0) - offset;
9939  int i;
9940
9941  if (load_p == 2)
9942    {
9943      if (len < 2 || len > 13)
9944	return 0;
9945      load_p = 1;
9946    }
9947  else
9948    {
9949      rtx elt = XVECEXP (op, 0, --len);
9950
9951      if (GET_CODE (elt) != CLOBBER
9952	  || !REG_P (XEXP (elt, 0))
9953	  || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM
9954	  || len < 3 || len > 13)
9955	return 0;
9956    }
9957  for (i = 1; i < len; i++)
9958    {
9959      rtx elt = XVECEXP (op, 0, i + offset);
9960      rtx reg, mem, addr;
9961
9962      if (GET_CODE (elt) != SET)
9963	return 0;
9964      mem = XEXP (elt, load_p);
9965      reg = XEXP (elt, 1-load_p);
9966      if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem))
9967	return 0;
9968      addr = XEXP (mem, 0);
9969      if (GET_CODE (addr) != PLUS
9970	  || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
9971	  || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4)
9972	return 0;
9973    }
9974  return 1;
9975}
9976
9977/* Accessor functions for cfun->machine->unalign.  */
9978
9979void
9980arc_clear_unalign (void)
9981{
9982  if (cfun)
9983    cfun->machine->unalign = 0;
9984}
9985
9986void
9987arc_toggle_unalign (void)
9988{
9989  cfun->machine->unalign ^= 2;
9990}
9991
9992/* Operands 0..2 are the operands of a addsi which uses a 12 bit
9993   constant in operand 2, but which would require a LIMM because of
9994   operand mismatch.
9995   operands 3 and 4 are new SET_SRCs for operands 0.  */
9996
9997void
9998split_addsi (rtx *operands)
9999{
10000  int val = INTVAL (operands[2]);
10001
10002  /* Try for two short insns first.  Lengths being equal, we prefer
10003     expansions with shorter register lifetimes.  */
10004  if (val > 127 && val <= 255
10005      && satisfies_constraint_Rcq (operands[0]))
10006    {
10007      operands[3] = operands[2];
10008      operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
10009    }
10010  else
10011    {
10012      operands[3] = operands[1];
10013      operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]);
10014    }
10015}
10016
10017/* Operands 0..2 are the operands of a subsi which uses a 12 bit
10018   constant in operand 1, but which would require a LIMM because of
10019   operand mismatch.
10020   operands 3 and 4 are new SET_SRCs for operands 0.  */
10021
10022void
10023split_subsi (rtx *operands)
10024{
10025  int val = INTVAL (operands[1]);
10026
10027  /* Try for two short insns first.  Lengths being equal, we prefer
10028     expansions with shorter register lifetimes.  */
10029  if (satisfies_constraint_Rcq (operands[0])
10030      && satisfies_constraint_Rcq (operands[2]))
10031    {
10032      if (val >= -31 && val <= 127)
10033	{
10034	  operands[3] = gen_rtx_NEG (SImode, operands[2]);
10035	  operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
10036	  return;
10037	}
10038      else if (val >= 0 && val < 255)
10039	{
10040	  operands[3] = operands[1];
10041	  operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]);
10042	  return;
10043	}
10044    }
10045  /* If the destination is not an ARCompact16 register, we might
10046     still have a chance to make a short insn if the source is;
10047      we need to start with a reg-reg move for this.  */
10048  operands[3] = operands[2];
10049  operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]);
10050}
10051
10052/* Handle DOUBLE_REGS uses.
10053   Operand 0: destination register
10054   Operand 1: source register  */
10055
10056static bool
10057arc_process_double_reg_moves (rtx *operands)
10058{
10059  enum usesDxState { none, srcDx, destDx, maxDx };
10060  enum usesDxState state = none;
10061  rtx dest = operands[0];
10062  rtx src  = operands[1];
10063
10064  if (refers_to_regno_p (40, 44, src, 0))
10065    {
10066      state = srcDx;
10067      gcc_assert (REG_P (dest));
10068    }
10069  if (refers_to_regno_p (40, 44, dest, 0))
10070    {
10071      /* Via arc_register_move_cost, we should never see D,D moves.  */
10072      gcc_assert (REG_P (src));
10073      gcc_assert (state == none);
10074      state = destDx;
10075    }
10076
10077  if (state == none)
10078    return false;
10079
10080  if (state == srcDx)
10081    {
10082      /* Without the LR insn, we need to split this into a
10083	 sequence of insns which will use the DEXCLx and DADDHxy
10084	 insns to be able to read the Dx register in question.  */
10085      if (TARGET_DPFP_DISABLE_LRSR)
10086	{
10087	  /* gen *movdf_insn_nolrsr */
10088	  rtx set = gen_rtx_SET (dest, src);
10089	  rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx);
10090	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1)));
10091	}
10092      else
10093	{
10094	  /* When we have 'mov D, r' or 'mov D, D' then get the target
10095	     register pair for use with LR insn.  */
10096	  rtx destHigh = simplify_gen_subreg (SImode, dest, DFmode,
10097					     TARGET_BIG_ENDIAN ? 0 : 4);
10098	  rtx destLow  = simplify_gen_subreg (SImode, dest, DFmode,
10099					     TARGET_BIG_ENDIAN ? 4 : 0);
10100
10101	  /* Produce the two LR insns to get the high and low parts.  */
10102	  emit_insn (gen_rtx_SET (destHigh,
10103				  gen_rtx_UNSPEC_VOLATILE (Pmode,
10104							   gen_rtvec (1, src),
10105				  VUNSPEC_ARC_LR_HIGH)));
10106	  emit_insn (gen_rtx_SET (destLow,
10107				  gen_rtx_UNSPEC_VOLATILE (Pmode,
10108							   gen_rtvec (1, src),
10109				  VUNSPEC_ARC_LR)));
10110	}
10111    }
10112  else if (state == destDx)
10113    {
10114      /* When we have 'mov r, D' or 'mov D, D' and we have access to the
10115	 LR insn get the target register pair.  */
10116      rtx srcHigh = simplify_gen_subreg (SImode, src, DFmode,
10117					TARGET_BIG_ENDIAN ? 0 : 4);
10118      rtx srcLow  = simplify_gen_subreg (SImode, src, DFmode,
10119					TARGET_BIG_ENDIAN ? 4 : 0);
10120
10121      emit_insn (gen_dexcl_2op (dest, srcHigh, srcLow));
10122    }
10123  else
10124    gcc_unreachable ();
10125
10126  return true;
10127}
10128
10129/* operands 0..1 are the operands of a 64 bit move instruction.
10130   split it into two moves with operands 2/3 and 4/5.  */
10131
10132void
10133arc_split_move (rtx *operands)
10134{
10135  machine_mode mode = GET_MODE (operands[0]);
10136  int i;
10137  int swap = 0;
10138  rtx xop[4];
10139
10140  if (TARGET_DPFP)
10141  {
10142    if (arc_process_double_reg_moves (operands))
10143      return;
10144  }
10145
10146  if (TARGET_LL64
10147      && ((memory_operand (operands[0], mode)
10148	   && (even_register_operand (operands[1], mode)
10149	       || satisfies_constraint_Cm3 (operands[1])))
10150	  || (memory_operand (operands[1], mode)
10151	      && even_register_operand (operands[0], mode))))
10152    {
10153      emit_move_insn (operands[0], operands[1]);
10154      return;
10155    }
10156
10157  if (TARGET_PLUS_QMACW
10158      && GET_CODE (operands[1]) == CONST_VECTOR)
10159    {
10160      HOST_WIDE_INT intval0, intval1;
10161      if (GET_MODE (operands[1]) == V2SImode)
10162	{
10163	  intval0 = INTVAL (XVECEXP (operands[1], 0, 0));
10164	  intval1 = INTVAL (XVECEXP (operands[1], 0, 1));
10165	}
10166      else
10167	{
10168	  intval1  = INTVAL (XVECEXP (operands[1], 0, 3)) << 16;
10169	  intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF;
10170	  intval0  = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
10171	  intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
10172	}
10173      xop[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
10174      xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
10175      xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode));
10176      xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode));
10177      emit_move_insn (xop[0], xop[2]);
10178      emit_move_insn (xop[3], xop[1]);
10179      return;
10180    }
10181
10182  for (i = 0; i < 2; i++)
10183    {
10184      if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
10185	{
10186	  rtx addr = XEXP (operands[i], 0);
10187	  rtx r, o;
10188	  enum rtx_code code;
10189
10190	  gcc_assert (!reg_overlap_mentioned_p (operands[0], addr));
10191	  switch (GET_CODE (addr))
10192	    {
10193	    case PRE_DEC: o = GEN_INT (-8); goto pre_modify;
10194	    case PRE_INC: o = GEN_INT (8); goto pre_modify;
10195	    case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1);
10196	    pre_modify:
10197	      code = PRE_MODIFY;
10198	      break;
10199	    case POST_DEC: o = GEN_INT (-8); goto post_modify;
10200	    case POST_INC: o = GEN_INT (8); goto post_modify;
10201	    case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1);
10202	    post_modify:
10203	      code = POST_MODIFY;
10204	      swap = 2;
10205	      break;
10206	    default:
10207	      gcc_unreachable ();
10208	    }
10209	  r = XEXP (addr, 0);
10210	  xop[0+i] = adjust_automodify_address_nv
10211		      (operands[i], SImode,
10212		       gen_rtx_fmt_ee (code, Pmode, r,
10213				       gen_rtx_PLUS (Pmode, r, o)),
10214		       0);
10215	  xop[2+i] = adjust_automodify_address_nv
10216		      (operands[i], SImode, plus_constant (Pmode, r, 4), 4);
10217	}
10218      else
10219	{
10220	  xop[0+i] = operand_subword (operands[i], 0, 0, mode);
10221	  xop[2+i] = operand_subword (operands[i], 1, 0, mode);
10222	}
10223    }
10224  if (reg_overlap_mentioned_p (xop[0], xop[3]))
10225    {
10226      swap = 2;
10227      gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
10228    }
10229
10230  emit_move_insn (xop[0 + swap], xop[1 + swap]);
10231  emit_move_insn (xop[2 - swap], xop[3 - swap]);
10232
10233}
10234
10235/* Select between the instruction output templates s_tmpl (for short INSNs)
10236   and l_tmpl (for long INSNs).  */
10237
10238const char *
10239arc_short_long (rtx_insn *insn, const char *s_tmpl, const char *l_tmpl)
10240{
10241  int is_short = arc_verify_short (insn, cfun->machine->unalign, -1);
10242
10243  extract_constrain_insn_cached (insn);
10244  return is_short ? s_tmpl : l_tmpl;
10245}
10246
10247/* Searches X for any reference to REGNO, returning the rtx of the
10248   reference found if any.  Otherwise, returns NULL_RTX.  */
10249
10250rtx
10251arc_regno_use_in (unsigned int regno, rtx x)
10252{
10253  const char *fmt;
10254  int i, j;
10255  rtx tem;
10256
10257  if (REG_P (x) && refers_to_regno_p (regno, x))
10258    return x;
10259
10260  fmt = GET_RTX_FORMAT (GET_CODE (x));
10261  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10262    {
10263      if (fmt[i] == 'e')
10264	{
10265	  if ((tem = regno_use_in (regno, XEXP (x, i))))
10266	    return tem;
10267	}
10268      else if (fmt[i] == 'E')
10269	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10270	  if ((tem = regno_use_in (regno , XVECEXP (x, i, j))))
10271	    return tem;
10272    }
10273
10274  return NULL_RTX;
10275}
10276
10277/* Return the integer value of the "type" attribute for INSN, or -1 if
10278   INSN can't have attributes.  */
10279
10280static int
10281arc_attr_type (rtx_insn *insn)
10282{
10283  if (NONJUMP_INSN_P (insn)
10284      ? (GET_CODE (PATTERN (insn)) == USE
10285	 || GET_CODE (PATTERN (insn)) == CLOBBER)
10286      : JUMP_P (insn)
10287      ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
10288	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
10289      : !CALL_P (insn))
10290    return -1;
10291  return get_attr_type (insn);
10292}
10293
10294/* Code has a minimum p2 alignment of 1, which we must restore after
10295   an ADDR_DIFF_VEC.  */
10296
10297int
10298arc_label_align (rtx_insn *label)
10299{
10300  if (align_labels.levels[0].log < 1)
10301    {
10302      rtx_insn *next = next_nonnote_nondebug_insn (label);
10303      if (INSN_P (next) && recog_memoized (next) >= 0)
10304	return 1;
10305    }
10306  return align_labels.levels[0].log;
10307}
10308
10309/* Return true if LABEL is in executable code.  */
10310
10311bool
10312arc_text_label (rtx_insn *label)
10313{
10314  rtx_insn *next;
10315
10316  /* ??? We use deleted labels like they were still there, see
10317     gcc.c-torture/compile/20000326-2.c .  */
10318  gcc_assert (GET_CODE (label) == CODE_LABEL
10319	      || (GET_CODE (label) == NOTE
10320		  && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL));
10321  next = next_nonnote_insn (label);
10322  if (next)
10323    return (!JUMP_TABLE_DATA_P (next)
10324	    || GET_CODE (PATTERN (next)) != ADDR_VEC);
10325  else if (!PREV_INSN (label))
10326    /* ??? sometimes text labels get inserted very late, see
10327       gcc.dg/torture/stackalign/comp-goto-1.c */
10328    return true;
10329  return false;
10330}
10331
10332/* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble
10333  when compiling with -O2 -freorder-blocks-and-partition -fprofile-use
10334  -D_PROFILE_USE; delay branch scheduling then follows a crossing jump
10335  to redirect two breqs.  */
10336
10337static bool
10338arc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
10339{
10340  /* ??? get_attr_type is declared to take an rtx.  */
10341  union { const rtx_insn *c; rtx_insn *r; } u;
10342
10343  u.c = follower;
10344  if (CROSSING_JUMP_P (followee))
10345    switch (get_attr_type (u.r))
10346      {
10347      case TYPE_BRANCH:
10348	if (get_attr_length (u.r) != 2)
10349	  break;
10350      /*  Fall through. */
10351      case TYPE_BRCC:
10352      case TYPE_BRCC_NO_DELAY_SLOT:
10353	return false;
10354      default:
10355	return true;
10356      }
10357  return true;
10358}
10359
10360
10361/* Implement EPILOGUE_USES.
10362   Return true if REGNO should be added to the deemed uses of the epilogue.
10363
10364   We have to make sure all the register restore instructions are
10365   known to be live in interrupt functions, plus the blink register if
10366   it is clobbered by the isr.  */
10367
10368bool
10369arc_epilogue_uses (int regno)
10370{
10371  unsigned int fn_type;
10372  fn_type = arc_compute_function_type (cfun);
10373
10374  if (regno == arc_tp_regno)
10375    return true;
10376
10377  if (regno == RETURN_ADDR_REGNUM)
10378    return true;
10379
10380  if (regno == arc_return_address_register (fn_type))
10381    return true;
10382
10383  if (epilogue_completed && ARC_INTERRUPT_P (fn_type))
10384    {
10385      /* An interrupt function restores more registers.  */
10386      if (df_regs_ever_live_p (regno) || call_used_or_fixed_reg_p (regno))
10387	return true;
10388    }
10389
10390  return false;
10391}
10392
10393/* Helper for EH_USES macro.  */
10394
10395bool
10396arc_eh_uses (int regno)
10397{
10398  if (regno == arc_tp_regno)
10399    return true;
10400  return false;
10401}
10402
10403/* Return true if we use LRA instead of reload pass.  */
10404
10405bool
10406arc_lra_p (void)
10407{
10408  return arc_lra_flag;
10409}
10410
10411/* ??? Should we define TARGET_REGISTER_PRIORITY?  We might perfer to use
10412   Rcq registers, because some insn are shorter with them.  OTOH we already
10413   have separate alternatives for this purpose, and other insns don't
10414   mind, so maybe we should rather prefer the other registers?
10415   We need more data, and we can only get that if we allow people to
10416   try all options.  */
10417static int
10418arc_register_priority (int r)
10419{
10420  switch (arc_lra_priority_tag)
10421    {
10422    case ARC_LRA_PRIORITY_NONE:
10423      return 0;
10424    case ARC_LRA_PRIORITY_NONCOMPACT:
10425      return ((((r & 7) ^ 4) - 4) & 15) != r;
10426    case ARC_LRA_PRIORITY_COMPACT:
10427      return ((((r & 7) ^ 4) - 4) & 15) == r;
10428    default:
10429      gcc_unreachable ();
10430    }
10431}
10432
10433static reg_class_t
10434arc_spill_class (reg_class_t /* orig_class */, machine_mode)
10435{
10436  return GENERAL_REGS;
10437}
10438
10439bool
10440arc_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10441			       int itype)
10442{
10443  rtx x = *p;
10444  enum reload_type type = (enum reload_type) itype;
10445
10446  if (GET_CODE (x) == PLUS
10447      && CONST_INT_P (XEXP (x, 1))
10448      && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true)
10449	  || (REG_P (XEXP (x, 0))
10450	      && reg_equiv_constant (REGNO (XEXP (x, 0))))))
10451    {
10452      int scale = GET_MODE_SIZE (mode);
10453      int shift;
10454      rtx index_rtx = XEXP (x, 1);
10455      HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
10456      rtx reg, sum, sum2;
10457
10458      if (scale > 4)
10459	scale = 4;
10460      if ((scale-1) & offset)
10461	scale = 1;
10462      shift = scale >> 1;
10463      offset_base
10464	= ((offset + (256 << shift))
10465	   & ((HOST_WIDE_INT)((unsigned HOST_WIDE_INT) -512 << shift)));
10466      /* Sometimes the normal form does not suit DImode.  We
10467	 could avoid that by using smaller ranges, but that
10468	 would give less optimized code when SImode is
10469	 prevalent.  */
10470      if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift))
10471	{
10472	  int regno;
10473
10474	  reg = XEXP (x, 0);
10475	  regno = REGNO (reg);
10476	  sum2 = sum = plus_constant (Pmode, reg, offset_base);
10477
10478	  if (reg_equiv_constant (regno))
10479	    {
10480	      sum2 = plus_constant (Pmode, reg_equiv_constant (regno),
10481				    offset_base);
10482	      if (GET_CODE (sum2) == PLUS)
10483		sum2 = gen_rtx_CONST (Pmode, sum2);
10484	    }
10485	  *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
10486	  push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL,
10487		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum,
10488		       type);
10489	  return true;
10490	}
10491    }
10492  /* We must re-recognize what we created before.  */
10493  else if (GET_CODE (x) == PLUS
10494	   && GET_CODE (XEXP (x, 0)) == PLUS
10495	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10496	   && REG_P  (XEXP (XEXP (x, 0), 0))
10497	   && CONST_INT_P (XEXP (x, 1)))
10498    {
10499      /* Because this address is so complex, we know it must have
10500	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10501	 it is already unshared, and needs no further unsharing.  */
10502      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10503		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10504      return true;
10505    }
10506  return false;
10507}
10508
10509/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
10510
10511static bool
10512arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
10513				    unsigned int align,
10514				    enum by_pieces_operation op,
10515				    bool speed_p)
10516{
10517  /* Let the cpymem expander handle small block moves.  */
10518  if (op == MOVE_BY_PIECES)
10519    return false;
10520
10521  return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10522}
10523
10524/* Emit a (pre) memory barrier around an atomic sequence according to
10525   MODEL.  */
10526
10527static void
10528arc_pre_atomic_barrier (enum memmodel model)
10529{
10530  if (need_atomic_barrier_p (model, true))
10531    emit_insn (gen_memory_barrier ());
10532}
10533
10534/* Emit a (post) memory barrier around an atomic sequence according to
10535   MODEL.  */
10536
10537static void
10538arc_post_atomic_barrier (enum memmodel model)
10539{
10540  if (need_atomic_barrier_p (model, false))
10541    emit_insn (gen_memory_barrier ());
10542}
10543
10544/* Expand a compare and swap pattern.  */
10545
10546static void
10547emit_unlikely_jump (rtx insn)
10548{
10549  rtx_insn *jump = emit_jump_insn (insn);
10550  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
10551}
10552
10553/* Expand code to perform a 8 or 16-bit compare and swap by doing
10554   32-bit compare and swap on the word containing the byte or
10555   half-word.  The difference between a weak and a strong CAS is that
10556   the weak version may simply fail.  The strong version relies on two
10557   loops, one checks if the SCOND op is succsfully or not, the other
10558   checks if the 32 bit accessed location which contains the 8 or 16
10559   bit datum is not changed by other thread.  The first loop is
10560   implemented by the atomic_compare_and_swapsi_1 pattern.  The second
10561   loops is implemented by this routine.  */
10562
10563static void
10564arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem,
10565				rtx oldval, rtx newval, rtx weak,
10566				rtx mod_s, rtx mod_f)
10567{
10568  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
10569  rtx addr = gen_reg_rtx (Pmode);
10570  rtx off = gen_reg_rtx (SImode);
10571  rtx oldv = gen_reg_rtx (SImode);
10572  rtx newv = gen_reg_rtx (SImode);
10573  rtx oldvalue = gen_reg_rtx (SImode);
10574  rtx newvalue = gen_reg_rtx (SImode);
10575  rtx res = gen_reg_rtx (SImode);
10576  rtx resv = gen_reg_rtx (SImode);
10577  rtx memsi, val, mask, end_label, loop_label, cc, x;
10578  machine_mode mode;
10579  bool is_weak = (weak != const0_rtx);
10580
10581  /* Truncate the address.  */
10582  emit_insn (gen_rtx_SET (addr,
10583			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
10584
10585  /* Compute the datum offset.  */
10586  emit_insn (gen_rtx_SET (off,
10587			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
10588  if (TARGET_BIG_ENDIAN)
10589    emit_insn (gen_rtx_SET (off,
10590			    gen_rtx_MINUS (SImode,
10591					   (GET_MODE (mem) == QImode) ?
10592					   GEN_INT (3) : GEN_INT (2), off)));
10593
10594  /* Normal read from truncated address.  */
10595  memsi = gen_rtx_MEM (SImode, addr);
10596  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
10597  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
10598
10599  val = copy_to_reg (memsi);
10600
10601  /* Convert the offset in bits.  */
10602  emit_insn (gen_rtx_SET (off,
10603			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
10604
10605  /* Get the proper mask.  */
10606  if (GET_MODE (mem) == QImode)
10607    mask = force_reg (SImode, GEN_INT (0xff));
10608  else
10609    mask = force_reg (SImode, GEN_INT (0xffff));
10610
10611  emit_insn (gen_rtx_SET (mask,
10612			  gen_rtx_ASHIFT (SImode, mask, off)));
10613
10614  /* Prepare the old and new values.  */
10615  emit_insn (gen_rtx_SET (val,
10616			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
10617				       val)));
10618
10619  oldval = gen_lowpart (SImode, oldval);
10620  emit_insn (gen_rtx_SET (oldv,
10621			  gen_rtx_ASHIFT (SImode, oldval, off)));
10622
10623  newval = gen_lowpart_common (SImode, newval);
10624  emit_insn (gen_rtx_SET (newv,
10625			  gen_rtx_ASHIFT (SImode, newval, off)));
10626
10627  emit_insn (gen_rtx_SET (oldv,
10628			  gen_rtx_AND (SImode, oldv, mask)));
10629
10630  emit_insn (gen_rtx_SET (newv,
10631			  gen_rtx_AND (SImode, newv, mask)));
10632
10633  if (!is_weak)
10634    {
10635      end_label = gen_label_rtx ();
10636      loop_label = gen_label_rtx ();
10637      emit_label (loop_label);
10638    }
10639
10640  /* Make the old and new values.  */
10641  emit_insn (gen_rtx_SET (oldvalue,
10642			  gen_rtx_IOR (SImode, oldv, val)));
10643
10644  emit_insn (gen_rtx_SET (newvalue,
10645			  gen_rtx_IOR (SImode, newv, val)));
10646
10647  /* Try an 32bit atomic compare and swap.  It clobbers the CC
10648     register.  */
10649  emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue,
10650					      weak, mod_s, mod_f));
10651
10652  /* Regardless of the weakness of the operation, a proper boolean
10653     result needs to be provided.  */
10654  x = gen_rtx_REG (CC_Zmode, CC_REG);
10655  x = gen_rtx_EQ (SImode, x, const0_rtx);
10656  emit_insn (gen_rtx_SET (bool_result, x));
10657
10658  if (!is_weak)
10659    {
10660      /* Check the results: if the atomic op is successfully the goto
10661	 to end label.  */
10662      x = gen_rtx_REG (CC_Zmode, CC_REG);
10663      x = gen_rtx_EQ (VOIDmode, x, const0_rtx);
10664      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10665				gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx);
10666      emit_jump_insn (gen_rtx_SET (pc_rtx, x));
10667
10668      /* Wait for the right moment when the accessed 32-bit location
10669	 is stable.  */
10670      emit_insn (gen_rtx_SET (resv,
10671			      gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
10672					   res)));
10673      mode = SELECT_CC_MODE (NE, resv, val);
10674      cc = gen_rtx_REG (mode, CC_REG);
10675      emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val)));
10676
10677      /* Set the new value of the 32 bit location, proper masked.  */
10678      emit_insn (gen_rtx_SET (val, resv));
10679
10680      /* Try again if location is unstable.  Fall through if only
10681	 scond op failed.  */
10682      x = gen_rtx_NE (VOIDmode, cc, const0_rtx);
10683      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10684				gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx);
10685      emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10686
10687      emit_label (end_label);
10688    }
10689
10690  /* End: proper return the result for the given mode.  */
10691  emit_insn (gen_rtx_SET (res,
10692			  gen_rtx_AND (SImode, res, mask)));
10693
10694  emit_insn (gen_rtx_SET (res,
10695			  gen_rtx_LSHIFTRT (SImode, res, off)));
10696
10697  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
10698}
10699
10700/* Helper function used by "atomic_compare_and_swap" expand
10701   pattern.  */
10702
10703void
10704arc_expand_compare_and_swap (rtx operands[])
10705{
10706  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
10707  machine_mode mode;
10708
10709  bval = operands[0];
10710  rval = operands[1];
10711  mem = operands[2];
10712  oldval = operands[3];
10713  newval = operands[4];
10714  is_weak = operands[5];
10715  mod_s = operands[6];
10716  mod_f = operands[7];
10717  mode = GET_MODE (mem);
10718
10719  if (reg_overlap_mentioned_p (rval, oldval))
10720    oldval = copy_to_reg (oldval);
10721
10722  if (mode == SImode)
10723    {
10724      emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval,
10725						  is_weak, mod_s, mod_f));
10726      x = gen_rtx_REG (CC_Zmode, CC_REG);
10727      x = gen_rtx_EQ (SImode, x, const0_rtx);
10728      emit_insn (gen_rtx_SET (bval, x));
10729    }
10730  else
10731    {
10732      arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval,
10733				      is_weak, mod_s, mod_f);
10734    }
10735}
10736
10737/* Helper function used by the "atomic_compare_and_swapsi_1"
10738   pattern.  */
10739
10740void
10741arc_split_compare_and_swap (rtx operands[])
10742{
10743  rtx rval, mem, oldval, newval;
10744  machine_mode mode;
10745  enum memmodel mod_s, mod_f;
10746  bool is_weak;
10747  rtx label1, label2, x, cond;
10748
10749  rval = operands[0];
10750  mem = operands[1];
10751  oldval = operands[2];
10752  newval = operands[3];
10753  is_weak = (operands[4] != const0_rtx);
10754  mod_s = (enum memmodel) INTVAL (operands[5]);
10755  mod_f = (enum memmodel) INTVAL (operands[6]);
10756  mode = GET_MODE (mem);
10757
10758  /* ARC atomic ops work only with 32-bit aligned memories.  */
10759  gcc_assert (mode == SImode);
10760
10761  arc_pre_atomic_barrier (mod_s);
10762
10763  label1 = NULL_RTX;
10764  if (!is_weak)
10765    {
10766      label1 = gen_label_rtx ();
10767      emit_label (label1);
10768    }
10769  label2 = gen_label_rtx ();
10770
10771  /* Load exclusive.  */
10772  emit_insn (gen_arc_load_exclusivesi (rval, mem));
10773
10774  /* Check if it is oldval.  */
10775  mode = SELECT_CC_MODE (NE, rval, oldval);
10776  cond = gen_rtx_REG (mode, CC_REG);
10777  emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode, rval, oldval)));
10778
10779  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10780  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10781			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
10782  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10783
10784  /* Exclusively store new item.  Store clobbers CC reg.  */
10785  emit_insn (gen_arc_store_exclusivesi (mem, newval));
10786
10787  if (!is_weak)
10788    {
10789      /* Check the result of the store.  */
10790      cond = gen_rtx_REG (CC_Zmode, CC_REG);
10791      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10792      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10793				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
10794      emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10795    }
10796
10797  if (mod_f != MEMMODEL_RELAXED)
10798    emit_label (label2);
10799
10800  arc_post_atomic_barrier (mod_s);
10801
10802  if (mod_f == MEMMODEL_RELAXED)
10803    emit_label (label2);
10804}
10805
10806/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
10807   to perform.  MEM is the memory on which to operate.  VAL is the second
10808   operand of the binary operator.  BEFORE and AFTER are optional locations to
10809   return the value of MEM either before of after the operation.  MODEL_RTX
10810   is a CONST_INT containing the memory model to use.  */
10811
10812void
10813arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
10814			 rtx orig_before, rtx orig_after, rtx model_rtx)
10815{
10816  enum memmodel model = (enum memmodel) INTVAL (model_rtx);
10817  machine_mode mode = GET_MODE (mem);
10818  rtx label, x, cond;
10819  rtx before = orig_before, after = orig_after;
10820
10821  /* ARC atomic ops work only with 32-bit aligned memories.  */
10822  gcc_assert (mode == SImode);
10823
10824  arc_pre_atomic_barrier (model);
10825
10826  label = gen_label_rtx ();
10827  emit_label (label);
10828  label = gen_rtx_LABEL_REF (VOIDmode, label);
10829
10830  if (before == NULL_RTX)
10831    before = gen_reg_rtx (mode);
10832
10833  if (after == NULL_RTX)
10834    after = gen_reg_rtx (mode);
10835
10836  /* Load exclusive.  */
10837  emit_insn (gen_arc_load_exclusivesi (before, mem));
10838
10839  switch (code)
10840    {
10841    case NOT:
10842      x = gen_rtx_AND (mode, before, val);
10843      emit_insn (gen_rtx_SET (after, x));
10844      x = gen_rtx_NOT (mode, after);
10845      emit_insn (gen_rtx_SET (after, x));
10846      break;
10847
10848    case MINUS:
10849      if (CONST_INT_P (val))
10850	{
10851	  val = GEN_INT (-INTVAL (val));
10852	  code = PLUS;
10853	}
10854
10855      /* FALLTHRU.  */
10856    default:
10857      x = gen_rtx_fmt_ee (code, mode, before, val);
10858      emit_insn (gen_rtx_SET (after, x));
10859      break;
10860   }
10861
10862  /* Exclusively store new item.  Store clobbers CC reg.  */
10863  emit_insn (gen_arc_store_exclusivesi (mem, after));
10864
10865  /* Check the result of the store.  */
10866  cond = gen_rtx_REG (CC_Zmode, CC_REG);
10867  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10868  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10869			    label, pc_rtx);
10870  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10871
10872  arc_post_atomic_barrier (model);
10873}
10874
10875/* Implement TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P.  */
10876
10877static bool
10878arc_no_speculation_in_delay_slots_p ()
10879{
10880  return true;
10881}
10882
10883/* Return a parallel of registers to represent where to find the
10884   register pieces if required, otherwise NULL_RTX.  */
10885
10886static rtx
10887arc_dwarf_register_span (rtx rtl)
10888{
10889   machine_mode mode = GET_MODE (rtl);
10890   unsigned regno;
10891   rtx p;
10892
10893   if (GET_MODE_SIZE (mode) != 8)
10894     return NULL_RTX;
10895
10896   p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
10897   regno = REGNO (rtl);
10898   XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
10899   XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
10900
10901   return p;
10902}
10903
10904/* Return true if OP is an acceptable memory operand for ARCompact
10905   16-bit load instructions of MODE.
10906
10907   AV2SHORT: TRUE if address needs to fit into the new ARCv2 short
10908   non scaled instructions.
10909
10910   SCALED: TRUE if address can be scaled.  */
10911
10912bool
10913compact_memory_operand_p (rtx op, machine_mode mode,
10914			  bool av2short, bool scaled)
10915{
10916  rtx addr, plus0, plus1;
10917  int size, off;
10918
10919  /* Eliminate non-memory operations.  */
10920  if (GET_CODE (op) != MEM)
10921    return 0;
10922
10923  /* .di instructions have no 16-bit form.  */
10924  if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET)
10925    return false;
10926
10927  /* likewise for uncached types.  */
10928  if (arc_is_uncached_mem_p (op))
10929    return false;
10930
10931  if (mode == VOIDmode)
10932    mode = GET_MODE (op);
10933
10934  size = GET_MODE_SIZE (mode);
10935
10936  /* dword operations really put out 2 instructions, so eliminate
10937     them.  */
10938  if (size > UNITS_PER_WORD)
10939    return false;
10940
10941  /* Decode the address now.  */
10942  addr = XEXP (op, 0);
10943  switch (GET_CODE (addr))
10944    {
10945    case REG:
10946      return (REGNO (addr) >= FIRST_PSEUDO_REGISTER
10947	      || COMPACT_GP_REG_P (REGNO (addr))
10948	      || (SP_REG_P (REGNO (addr)) && (size != 2)));
10949    case PLUS:
10950      plus0 = XEXP (addr, 0);
10951      plus1 = XEXP (addr, 1);
10952
10953      if ((GET_CODE (plus0) == REG)
10954	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
10955	      || COMPACT_GP_REG_P (REGNO (plus0)))
10956	  && ((GET_CODE (plus1) == REG)
10957	      && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER)
10958		  || COMPACT_GP_REG_P (REGNO (plus1)))))
10959	{
10960	  return !av2short;
10961	}
10962
10963      if ((GET_CODE (plus0) == REG)
10964	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
10965	      || (COMPACT_GP_REG_P (REGNO (plus0)) && !av2short)
10966	      || (IN_RANGE (REGNO (plus0), 0, 31) && av2short))
10967	  && (GET_CODE (plus1) == CONST_INT))
10968	{
10969	  bool valid = false;
10970
10971	  off = INTVAL (plus1);
10972
10973	  /* Negative offset is not supported in 16-bit load/store insns.  */
10974	  if (off < 0)
10975	    return 0;
10976
10977	  /* Only u5 immediates allowed in code density instructions.  */
10978	  if (av2short)
10979	    {
10980	      switch (size)
10981		{
10982		case 1:
10983		  return false;
10984		case 2:
10985		  /* This is an ldh_s.x instruction, check the u6
10986		     immediate.  */
10987		  if (COMPACT_GP_REG_P (REGNO (plus0)))
10988		    valid = true;
10989		  break;
10990		case 4:
10991		  /* Only u5 immediates allowed in 32bit access code
10992		     density instructions.  */
10993		  if (REGNO (plus0) <= 31)
10994		    return ((off < 32) && (off % 4 == 0));
10995		  break;
10996		default:
10997		  return false;
10998		}
10999	    }
11000	  else
11001	    if (COMPACT_GP_REG_P (REGNO (plus0)))
11002	      valid = true;
11003
11004	  if (valid)
11005	    {
11006
11007	      switch (size)
11008		{
11009		case 1:
11010		  return (off < 32);
11011		case 2:
11012		  /* The 6-bit constant get shifted to fit the real
11013		     5-bits field.  Check also for the alignment.  */
11014		  return ((off < 64) && (off % 2 == 0));
11015		case 4:
11016		  return ((off < 128) && (off % 4 == 0));
11017		default:
11018		  return false;
11019		}
11020	    }
11021	}
11022
11023      if (REG_P (plus0) && CONST_INT_P (plus1)
11024	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
11025	      || SP_REG_P (REGNO (plus0)))
11026	  && !av2short)
11027	{
11028	  off = INTVAL (plus1);
11029	  return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0));
11030	}
11031
11032      if ((GET_CODE (plus0) == MULT)
11033	  && (GET_CODE (XEXP (plus0, 0)) == REG)
11034	  && ((REGNO (XEXP (plus0, 0)) >= FIRST_PSEUDO_REGISTER)
11035	      || COMPACT_GP_REG_P (REGNO (XEXP (plus0, 0))))
11036	  && (GET_CODE (plus1) == REG)
11037	  && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER)
11038	      || COMPACT_GP_REG_P (REGNO (plus1))))
11039	return scaled;
11040    default:
11041      break ;
11042      /* TODO: 'gp' and 'pcl' are to supported as base address operand
11043	 for 16-bit load instructions.  */
11044    }
11045  return false;
11046}
11047
11048/* Return nonzero if a jli call should be generated for a call from
11049   the current function to DECL.  */
11050
11051bool
11052arc_is_jli_call_p (rtx pat)
11053{
11054  tree attrs;
11055  tree decl = SYMBOL_REF_DECL (pat);
11056
11057  /* If it is not a well defined public function then return false.  */
11058  if (!decl || !SYMBOL_REF_FUNCTION_P (pat) || !TREE_PUBLIC (decl))
11059    return false;
11060
11061  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
11062  if (lookup_attribute ("jli_always", attrs))
11063    return true;
11064
11065  if (lookup_attribute ("jli_fixed", attrs))
11066    return true;
11067
11068  return TARGET_JLI_ALWAYS;
11069}
11070
11071/* Handle and "jli" attribute; arguments as in struct
11072   attribute_spec.handler.  */
11073
11074static tree
11075arc_handle_jli_attribute (tree *node ATTRIBUTE_UNUSED,
11076			  tree name, tree args, int,
11077			  bool *no_add_attrs)
11078{
11079  if (!TARGET_V2)
11080    {
11081      warning (OPT_Wattributes,
11082	       "%qE attribute only valid for ARCv2 architecture",
11083	       name);
11084      *no_add_attrs = true;
11085    }
11086
11087  if (args == NULL_TREE)
11088    {
11089      warning (OPT_Wattributes,
11090	       "argument of %qE attribute is missing",
11091	       name);
11092      *no_add_attrs = true;
11093    }
11094  else
11095    {
11096      if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
11097	TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
11098      tree arg = TREE_VALUE (args);
11099      if (TREE_CODE (arg) != INTEGER_CST)
11100	{
11101	  warning (0, "%qE attribute allows only an integer constant argument",
11102		   name);
11103	  *no_add_attrs = true;
11104	}
11105      /* FIXME! add range check.  TREE_INT_CST_LOW (arg) */
11106    }
11107   return NULL_TREE;
11108}
11109
11110/* Handle and "scure" attribute; arguments as in struct
11111   attribute_spec.handler.  */
11112
11113static tree
11114arc_handle_secure_attribute (tree *node ATTRIBUTE_UNUSED,
11115			  tree name, tree args, int,
11116			  bool *no_add_attrs)
11117{
11118  if (!TARGET_EM)
11119    {
11120      warning (OPT_Wattributes,
11121	       "%qE attribute only valid for ARC EM architecture",
11122	       name);
11123      *no_add_attrs = true;
11124    }
11125
11126  if (args == NULL_TREE)
11127    {
11128      warning (OPT_Wattributes,
11129	       "argument of %qE attribute is missing",
11130	       name);
11131      *no_add_attrs = true;
11132    }
11133  else
11134    {
11135      if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
11136	TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
11137      tree arg = TREE_VALUE (args);
11138      if (TREE_CODE (arg) != INTEGER_CST)
11139	{
11140	  warning (0, "%qE attribute allows only an integer constant argument",
11141		   name);
11142	  *no_add_attrs = true;
11143	}
11144    }
11145   return NULL_TREE;
11146}
11147
11148/* Return nonzero if the symbol is a secure function.  */
11149
11150bool
11151arc_is_secure_call_p (rtx pat)
11152{
11153  tree attrs;
11154  tree decl = SYMBOL_REF_DECL (pat);
11155
11156  if (!decl)
11157    return false;
11158
11159  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
11160  if (lookup_attribute ("secure_call", attrs))
11161    return true;
11162
11163  return false;
11164}
11165
11166/* Handle "uncached" qualifier.  */
11167
11168static tree
11169arc_handle_uncached_attribute (tree *node,
11170			       tree name, tree args,
11171			       int flags ATTRIBUTE_UNUSED,
11172			       bool *no_add_attrs)
11173{
11174  if (DECL_P (*node) && TREE_CODE (*node) != TYPE_DECL)
11175    {
11176      error ("%qE attribute only applies to types",
11177	     name);
11178      *no_add_attrs = true;
11179    }
11180  else if (args)
11181    {
11182      warning (OPT_Wattributes, "argument of %qE attribute ignored", name);
11183    }
11184  return NULL_TREE;
11185}
11186
11187/* Return TRUE if PAT is a memory addressing an uncached data.  */
11188
11189bool
11190arc_is_uncached_mem_p (rtx pat)
11191{
11192  tree attrs = NULL_TREE;
11193  tree addr;
11194
11195  if (!MEM_P (pat))
11196    return false;
11197
11198  /* Get the memory attributes.  */
11199  addr = MEM_EXPR (pat);
11200  if (!addr)
11201    return false;
11202
11203  /* Get the attributes.  */
11204  if (TREE_CODE (addr) == MEM_REF
11205      || TREE_CODE (addr) == VAR_DECL)
11206    {
11207      attrs = TYPE_ATTRIBUTES (TREE_TYPE (addr));
11208      if (lookup_attribute ("uncached", attrs))
11209	return true;
11210    }
11211  if (TREE_CODE (addr) == MEM_REF)
11212    {
11213      attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 0)));
11214      if (lookup_attribute ("uncached", attrs))
11215	return true;
11216      attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 1)));
11217      if (lookup_attribute ("uncached", attrs))
11218	return true;
11219    }
11220
11221  /* Check the definitions of the structs.  */
11222  while (handled_component_p (addr))
11223    {
11224      if (TREE_CODE (addr) == COMPONENT_REF)
11225	{
11226	  attrs = TYPE_ATTRIBUTES (TREE_TYPE (addr));
11227	  if (lookup_attribute ("uncached", attrs))
11228	    return true;
11229	  attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 0)));
11230	  if (lookup_attribute ("uncached", attrs))
11231	    return true;
11232	  attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 1)));
11233	  if (lookup_attribute ("uncached", attrs))
11234	    return true;
11235	}
11236      addr = TREE_OPERAND (addr, 0);
11237    }
11238  return false;
11239}
11240
11241/* Handle aux attribute.  The auxiliary registers are addressed using
11242   special instructions lr and sr.  The attribute 'aux' indicates if a
11243   variable refers to the aux-regs and what is the register number
11244   desired.  */
11245
11246static tree
11247arc_handle_aux_attribute (tree *node,
11248			  tree name, tree args, int,
11249			  bool *no_add_attrs)
11250{
11251  /* Isn't it better to use address spaces for the aux-regs?  */
11252  if (DECL_P (*node))
11253    {
11254      if (TREE_CODE (*node) != VAR_DECL)
11255	{
11256	  error ("%qE attribute only applies to variables",  name);
11257	  *no_add_attrs = true;
11258	}
11259      else if (args)
11260	{
11261	  if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
11262	    TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
11263	  tree arg = TREE_VALUE (args);
11264	  if (TREE_CODE (arg) != INTEGER_CST)
11265	    {
11266	      warning (OPT_Wattributes, "%qE attribute allows only an integer "
11267		       "constant argument", name);
11268	      *no_add_attrs = true;
11269	    }
11270	  /* FIXME! add range check.  TREE_INT_CST_LOW (arg) */
11271	}
11272
11273      if (TREE_CODE (*node) == VAR_DECL)
11274	{
11275	  tree fntype = TREE_TYPE (*node);
11276	  if (fntype && TREE_CODE (fntype) == POINTER_TYPE)
11277	    {
11278	      tree attrs = tree_cons (get_identifier ("aux"), NULL_TREE,
11279				      TYPE_ATTRIBUTES (fntype));
11280	      TYPE_ATTRIBUTES (fntype) = attrs;
11281	    }
11282	}
11283    }
11284  return NULL_TREE;
11285}
11286
11287/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P.  We don't want to use
11288   anchors for small data: the GP register acts as an anchor in that
11289   case.  We also don't want to use them for PC-relative accesses,
11290   where the PC acts as an anchor.  Prohibit also TLS symbols to use
11291   anchors.  */
11292
11293static bool
11294arc_use_anchors_for_symbol_p (const_rtx symbol)
11295{
11296  if (SYMBOL_REF_TLS_MODEL (symbol))
11297    return false;
11298
11299  if (flag_pic)
11300    return false;
11301
11302  if (SYMBOL_REF_SMALL_P (symbol))
11303    return false;
11304
11305  return default_use_anchors_for_symbol_p (symbol);
11306}
11307
11308/* Return true if SUBST can't safely replace its equivalent during RA.  */
11309static bool
11310arc_cannot_substitute_mem_equiv_p (rtx)
11311{
11312  /* If SUBST is mem[base+index], the address may not fit ISA,
11313     thus return true.  */
11314  return true;
11315}
11316
11317/* Checks whether the operands are valid for use in an LDD/STD
11318   instruction.  Assumes that RT, and RT2 are REG.  This is guaranteed
11319   by the patterns.  Assumes that the address in the base register RN
11320   is word aligned.  Pattern guarantees that both memory accesses use
11321   the same base register, the offsets are constants within the range,
11322   and the gap between the offsets is 4.  If reload complete then
11323   check that registers are legal.  */
11324
11325static bool
11326operands_ok_ldd_std (rtx rt, rtx rt2, HOST_WIDE_INT offset)
11327{
11328  unsigned int t, t2;
11329
11330  if (!reload_completed)
11331    return true;
11332
11333  if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & (~0x03),
11334			 (offset & (GET_MODE_SIZE (DImode) - 1) & 3
11335			  ? 0 : -(-GET_MODE_SIZE (DImode) | (~0x03)) >> 1))))
11336    return false;
11337
11338  t = REGNO (rt);
11339  t2 = REGNO (rt2);
11340
11341  if ((t2 == PCL_REG)
11342      || (t % 2 != 0)	/* First destination register is not even.  */
11343      || (t2 != t + 1))
11344      return false;
11345
11346  return true;
11347}
11348
11349/* Helper for gen_operands_ldd_std.  Returns true iff the memory
11350   operand MEM's address contains an immediate offset from the base
11351   register and has no side effects, in which case it sets BASE and
11352   OFFSET accordingly.  */
11353
11354static bool
11355mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset)
11356{
11357  rtx addr;
11358
11359  gcc_assert (base != NULL && offset != NULL);
11360
11361  /* TODO: Handle more general memory operand patterns, such as
11362     PRE_DEC and PRE_INC.  */
11363
11364  if (side_effects_p (mem))
11365    return false;
11366
11367  /* Can't deal with subregs.  */
11368  if (GET_CODE (mem) == SUBREG)
11369    return false;
11370
11371  gcc_assert (MEM_P (mem));
11372
11373  *offset = const0_rtx;
11374
11375  addr = XEXP (mem, 0);
11376
11377  /* If addr isn't valid for DImode, then we can't handle it.  */
11378  if (!arc_legitimate_address_p (DImode, addr,
11379				reload_in_progress || reload_completed))
11380    return false;
11381
11382  if (REG_P (addr))
11383    {
11384      *base = addr;
11385      return true;
11386    }
11387  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
11388    {
11389      *base = XEXP (addr, 0);
11390      *offset = XEXP (addr, 1);
11391      return (REG_P (*base) && CONST_INT_P (*offset));
11392    }
11393
11394  return false;
11395}
11396
11397/* Called from peephole2 to replace two word-size accesses with a
11398   single LDD/STD instruction.  Returns true iff we can generate a new
11399   instruction sequence.  That is, both accesses use the same base
11400   register and the gap between constant offsets is 4.  OPERANDS are
11401   the operands found by the peephole matcher; OPERANDS[0,1] are
11402   register operands, and OPERANDS[2,3] are the corresponding memory
11403   operands.  LOAD indicates whether the access is load or store.  */
11404
11405bool
11406gen_operands_ldd_std (rtx *operands, bool load, bool commute)
11407{
11408  int i, gap;
11409  HOST_WIDE_INT offsets[2], offset;
11410  int nops = 2;
11411  rtx cur_base, cur_offset, tmp;
11412  rtx base = NULL_RTX;
11413
11414  /* Check that the memory references are immediate offsets from the
11415     same base register.  Extract the base register, the destination
11416     registers, and the corresponding memory offsets.  */
11417  for (i = 0; i < nops; i++)
11418    {
11419      if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset))
11420	return false;
11421
11422      if (i == 0)
11423	base = cur_base;
11424      else if (REGNO (base) != REGNO (cur_base))
11425	return false;
11426
11427      offsets[i] = INTVAL (cur_offset);
11428      if (GET_CODE (operands[i]) == SUBREG)
11429	{
11430	  tmp = SUBREG_REG (operands[i]);
11431	  gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
11432	  operands[i] = tmp;
11433	}
11434    }
11435
11436  /* Make sure there is no dependency between the individual loads.  */
11437  if (load && REGNO (operands[0]) == REGNO (base))
11438    return false; /* RAW.  */
11439
11440  if (load && REGNO (operands[0]) == REGNO (operands[1]))
11441    return false; /* WAW.  */
11442
11443  /* Make sure the instructions are ordered with lower memory access first.  */
11444  if (offsets[0] > offsets[1])
11445    {
11446      gap = offsets[0] - offsets[1];
11447      offset = offsets[1];
11448
11449      /* Swap the instructions such that lower memory is accessed first.  */
11450      std::swap (operands[0], operands[1]);
11451      std::swap (operands[2], operands[3]);
11452    }
11453  else
11454    {
11455      gap = offsets[1] - offsets[0];
11456      offset = offsets[0];
11457    }
11458
11459  /* Make sure accesses are to consecutive memory locations.  */
11460  if (gap != 4)
11461    return false;
11462
11463  /* Make sure we generate legal instructions.  */
11464  if (operands_ok_ldd_std (operands[0], operands[1], offset))
11465    return true;
11466
11467  if (load && commute)
11468    {
11469      /* Try reordering registers.  */
11470      std::swap (operands[0], operands[1]);
11471      if (operands_ok_ldd_std (operands[0], operands[1], offset))
11472	return true;
11473    }
11474
11475  return false;
11476}
11477
11478/* This order of allocation is used when we compile for size.  It
11479   allocates first the registers which are most probably to end up in
11480   a short instruction.  */
11481static const int size_alloc_order[] =
11482{
11483 0, 1, 2, 3, 12, 13, 14, 15,
11484 4, 5, 6, 7, 8, 9, 10, 11
11485};
11486
11487/* Adjust register allocation order when compiling for size.  */
11488void
11489arc_adjust_reg_alloc_order (void)
11490{
11491  const int arc_default_alloc_order[] = REG_ALLOC_ORDER;
11492  memcpy (reg_alloc_order, arc_default_alloc_order, sizeof (reg_alloc_order));
11493  if (optimize_size)
11494    memcpy (reg_alloc_order, size_alloc_order, sizeof (size_alloc_order));
11495}
11496
11497/* Implement TARGET_MEMORY_MOVE_COST.  */
11498
11499static int
11500arc_memory_move_cost (machine_mode mode,
11501		      reg_class_t rclass ATTRIBUTE_UNUSED,
11502		      bool in ATTRIBUTE_UNUSED)
11503{
11504  if ((GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
11505      || ((GET_MODE_SIZE (mode) <= UNITS_PER_WORD * 2) && TARGET_LL64))
11506    return 6;
11507
11508  return (2 * GET_MODE_SIZE (mode));
11509}
11510
11511/* Split an OR instruction into multiple BSET/OR instructions in a
11512   attempt to avoid long immediate constants.  The next strategies are
11513   employed when destination is 'q' reg.
11514
11515   1. if there are up to three bits set in the mask, a succession of
11516   three bset instruction will be emitted:
11517   OR rA, rB, mask ->
11518   BSET(_S) rA,rB,mask1/BSET_S rA,rA,mask2/BSET_S rA,rA,mask3
11519
11520   2. if the lower 6 bits of the mask is set and there is only one
11521   bit set in the upper remaining bits then we will emit one bset and
11522   one OR instruction:
11523   OR rA, rB, mask -> OR rA,rB,mask1/BSET_S rA,mask2
11524
11525   3. otherwise an OR with limm will be emmitted.  */
11526
11527void
11528arc_split_ior (rtx *operands)
11529{
11530  unsigned HOST_WIDE_INT mask, maskx;
11531  rtx op1 = operands[1];
11532
11533  gcc_assert (CONST_INT_P (operands[2]));
11534  mask =  INTVAL (operands[2]) & 0xffffffff;
11535
11536  if (__builtin_popcount (mask) > 3 || (mask & 0x3f))
11537    {
11538      maskx = mask & 0x3f;
11539      emit_insn (gen_rtx_SET (operands[0],
11540			      gen_rtx_IOR (SImode, op1, GEN_INT (maskx))));
11541      op1 = operands[0];
11542      mask &= ~maskx;
11543    }
11544
11545  switch (__builtin_popcount (mask))
11546    {
11547    case 3:
11548      maskx = 1 << (__builtin_ffs (mask) - 1);
11549      emit_insn (gen_rtx_SET (operands[0],
11550			      gen_rtx_IOR (SImode, op1, GEN_INT (maskx))));
11551      mask &= ~maskx;
11552      op1 = operands[0];
11553      /* FALLTHRU */
11554    case 2:
11555      maskx = 1 << (__builtin_ffs (mask) - 1);
11556      emit_insn (gen_rtx_SET (operands[0],
11557			      gen_rtx_IOR (SImode, op1, GEN_INT (maskx))));
11558      mask &= ~maskx;
11559      op1 = operands[0];
11560      /* FALLTHRU */
11561    case 1:
11562      maskx = 1 << (__builtin_ffs (mask) - 1);
11563      emit_insn (gen_rtx_SET (operands[0],
11564			      gen_rtx_IOR (SImode, op1, GEN_INT (maskx))));
11565      break;
11566    case 0:
11567      break;
11568    default:
11569      gcc_unreachable ();
11570    }
11571}
11572
11573/* Helper to check C0x constraint.  */
11574
11575bool
11576arc_check_ior_const (HOST_WIDE_INT ival)
11577{
11578  unsigned int mask = (unsigned int) (ival & 0xffffffff);
11579
11580  if (UNSIGNED_INT6 (ival)
11581      || IS_POWEROF2_P (mask))
11582    return false;
11583  if (__builtin_popcount (mask) <= 3)
11584    return true;
11585  if (__builtin_popcount (mask & ~0x3f) <= 1)
11586    return true;
11587  return false;
11588}
11589
11590/* Split a mov with long immediate instruction into smaller, size
11591   friendly instructions.  */
11592
11593bool
11594arc_split_mov_const (rtx *operands)
11595{
11596  unsigned HOST_WIDE_INT ival;
11597  HOST_WIDE_INT shimm;
11598  machine_mode mode = GET_MODE (operands[0]);
11599
11600  /* Manage a constant.  */
11601  gcc_assert (CONST_INT_P (operands[1]));
11602  ival = INTVAL (operands[1]) & 0xffffffff;
11603
11604  /* 1. Check if we can just rotate limm by 8 but using ROR8.  */
11605  if (TARGET_BARREL_SHIFTER && TARGET_V2
11606      && ((ival & ~0x3f000000) == 0))
11607    {
11608      shimm = (ival >> 24) & 0x3f;
11609      emit_insn (gen_rtx_SET (operands[0],
11610			      gen_rtx_ROTATERT (mode, GEN_INT (shimm),
11611						GEN_INT (8))));
11612      return true;
11613    }
11614  /* 2. Check if we can just shift by 8 to fit into the u6 of LSL8.  */
11615  if (TARGET_BARREL_SHIFTER && TARGET_V2
11616      && ((ival & ~0x3f00) == 0))
11617    {
11618      shimm = (ival >> 8) & 0x3f;
11619      emit_insn (gen_rtx_SET (operands[0],
11620			      gen_rtx_ASHIFT (mode, GEN_INT (shimm),
11621					      GEN_INT (8))));
11622      return true;
11623    }
11624
11625  /* 3. Check if we can just shift by 16 to fit into the u6 of LSL16.  */
11626  if (TARGET_BARREL_SHIFTER && TARGET_V2
11627      && ((ival & ~0x3f0000) == 0))
11628    {
11629      shimm = (ival >> 16) & 0x3f;
11630      emit_insn (gen_rtx_SET (operands[0],
11631			      gen_rtx_ASHIFT (mode, GEN_INT (shimm),
11632					      GEN_INT (16))));
11633      return true;
11634    }
11635
11636  /* 4. Check if we can do something like mov_s h,u8 / asl_s ra,h,#nb.  */
11637  if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0
11638      && TARGET_BARREL_SHIFTER)
11639    {
11640      HOST_WIDE_INT shift = __builtin_ffs (ival);
11641      shimm = (ival >> (shift - 1)) & 0xff;
11642      emit_insn (gen_rtx_SET (operands[0], GEN_INT (shimm)));
11643      emit_insn (gen_rtx_SET (operands[0],
11644			      gen_rtx_ASHIFT (mode, operands[0],
11645					      GEN_INT (shift - 1))));
11646      return true;
11647    }
11648
11649  /* 5. Check if we can just rotate the limm, useful when no barrel
11650     shifter is present.  */
11651  if ((ival & ~0x8000001f) == 0)
11652    {
11653      shimm = (ival * 2 + 1) & 0x3f;
11654      emit_insn (gen_rtx_SET (operands[0],
11655			      gen_rtx_ROTATERT (mode, GEN_INT (shimm),
11656						const1_rtx)));
11657      return true;
11658    }
11659
11660  /* 6. Check if we can do something with bmask.  */
11661  if (IS_POWEROF2_P (ival + 1))
11662    {
11663      emit_insn (gen_rtx_SET (operands[0], constm1_rtx));
11664      emit_insn (gen_rtx_SET (operands[0],
11665			      gen_rtx_AND (mode, operands[0],
11666					   GEN_INT (ival))));
11667      return true;
11668    }
11669
11670  gcc_unreachable ();
11671}
11672
11673/* Helper to check Cax constraint.  */
11674
11675bool
11676arc_check_mov_const (HOST_WIDE_INT ival)
11677{
11678  ival = ival & 0xffffffff;
11679
11680  if (SIGNED_INT12 (ival))
11681    return false;
11682
11683  if ((ival & ~0x8000001f) == 0)
11684    return true;
11685
11686  if (IS_POWEROF2_P (ival + 1))
11687    return true;
11688
11689  /* The next rules requires a barrel shifter.  */
11690  if (!TARGET_BARREL_SHIFTER)
11691    return false;
11692
11693  if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0)
11694    return true;
11695
11696  if ((ival & ~0x3f00) == 0)
11697    return true;
11698
11699  if ((ival & ~0x3f0000) == 0)
11700    return true;
11701
11702  if ((ival & ~0x3f000000) == 0)
11703    return true;
11704
11705  return false;
11706}
11707
11708/* Return nonzero if this function is known to have a null epilogue.
11709   This allows the optimizer to omit jumps to jumps if no stack
11710   was created.  */
11711
11712bool
11713arc_can_use_return_insn (void)
11714{
11715  return (reload_completed && cfun->machine->frame_info.total_size == 0
11716	  && !ARC_INTERRUPT_P (arc_compute_function_type (cfun)));
11717}
11718
11719/* Helper for INSN_COST.
11720
11721   Per Segher Boessenkool: rtx_costs computes the cost for any rtx (an
11722   insn, a set, a set source, any random piece of one).  set_src_cost,
11723   set_rtx_cost, etc. are helper functions that use that.
11724
11725   Those functions do not work for parallels.  Also, costs are not
11726   additive like this simplified model assumes.  Also, more complex
11727   backends tend to miss many cases in their rtx_costs function.
11728
11729   Many passes that want costs want to know the cost of a full insn.  Like
11730   combine.  That's why I created insn_cost: it solves all of the above
11731   problems.  */
11732
11733static int
11734arc_insn_cost (rtx_insn *insn, bool speed)
11735{
11736  int cost;
11737  if (recog_memoized (insn) < 0)
11738    return 0;
11739
11740  /* If optimizing for size, we want the insn size.  */
11741  if (!speed)
11742    return get_attr_length (insn);
11743
11744  /* Use cost if provided.  */
11745  cost = get_attr_cost (insn);
11746  if (cost > 0)
11747    return cost;
11748
11749  /* For speed make a simple cost model: memory access is more
11750     expensive than any other instruction.  */
11751  enum attr_type type = get_attr_type (insn);
11752
11753  switch (type)
11754    {
11755    case TYPE_LOAD:
11756    case TYPE_STORE:
11757      cost = COSTS_N_INSNS (2);
11758      break;
11759
11760    default:
11761      cost = COSTS_N_INSNS (1);
11762      break;
11763    }
11764
11765  return cost;
11766}
11767
11768#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
11769#define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p
11770
11771#undef TARGET_CONSTANT_ALIGNMENT
11772#define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
11773
11774#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
11775#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P arc_cannot_substitute_mem_equiv_p
11776
11777#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11778#define TARGET_ASM_TRAMPOLINE_TEMPLATE arc_asm_trampoline_template
11779
11780#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
11781#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
11782
11783#undef TARGET_REGISTER_MOVE_COST
11784#define TARGET_REGISTER_MOVE_COST arc_register_move_cost
11785
11786#undef TARGET_MEMORY_MOVE_COST
11787#define TARGET_MEMORY_MOVE_COST arc_memory_move_cost
11788
11789#undef  TARGET_INSN_COST
11790#define TARGET_INSN_COST arc_insn_cost
11791
11792struct gcc_target targetm = TARGET_INITIALIZER;
11793
11794#include "gt-arc.h"
11795