1/* Subroutines used for code generation on the Synopsys DesignWare ARC cpu.
2   Copyright (C) 1994-2022 Free Software Foundation, Inc.
3
4   Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
5   behalf of Synopsys Inc.
6
7   Position Independent Code support added,Code cleaned up,
8   Comments and Support For ARC700 instructions added by
9   Saurabh Verma (saurabh.verma@codito.com)
10   Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
11
12   Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines,
13   profiling support added by Joern Rennecke <joern.rennecke@embecosm.com>
14
15This file is part of GCC.
16
17GCC is free software; you can redistribute it and/or modify
18it under the terms of the GNU General Public License as published by
19the Free Software Foundation; either version 3, or (at your option)
20any later version.
21
22GCC is distributed in the hope that it will be useful,
23but WITHOUT ANY WARRANTY; without even the implied warranty of
24MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25GNU General Public License for more details.
26
27You should have received a copy of the GNU General Public License
28along with GCC; see the file COPYING3.  If not see
29<http://www.gnu.org/licenses/>.  */
30
31#define IN_TARGET_CODE 1
32
33#include "config.h"
34#include "system.h"
35#include "coretypes.h"
36#include "memmodel.h"
37#include "backend.h"
38#include "target.h"
39#include "rtl.h"
40#include "tree.h"
41#include "cfghooks.h"
42#include "df.h"
43#include "tm_p.h"
44#include "stringpool.h"
45#include "attribs.h"
46#include "optabs.h"
47#include "regs.h"
48#include "emit-rtl.h"
49#include "recog.h"
50#include "diagnostic.h"
51#include "fold-const.h"
52#include "varasm.h"
53#include "stor-layout.h"
54#include "calls.h"
55#include "output.h"
56#include "insn-attr.h"
57#include "flags.h"
58#include "explow.h"
59#include "expr.h"
60#include "langhooks.h"
61#include "tm-constrs.h"
62#include "reload.h" /* For operands_match_p */
63#include "cfgrtl.h"
64#include "tree-pass.h"
65#include "context.h"
66#include "builtins.h"
67#include "rtl-iter.h"
68#include "alias.h"
69#include "opts.h"
70#include "hw-doloop.h"
71
72/* Which cpu we're compiling for (ARC600, ARC601, ARC700).  */
73static char arc_cpu_name[10] = "";
74static const char *arc_cpu_string = arc_cpu_name;
75
76typedef struct GTY (()) _arc_jli_section
77{
78  const char *name;
79  struct _arc_jli_section *next;
80} arc_jli_section;
81
82static arc_jli_section *arc_jli_sections = NULL;
83
84/* Track which regs are set fixed/call saved/call used from commnad line.  */
85HARD_REG_SET overrideregs;
86
87/* Maximum size of a loop.  */
88#define ARC_MAX_LOOP_LENGTH 4095
89
90/* Check if an rtx fits in the store instruction format.  Loads can
91   handle any constant.  */
92#define RTX_OK_FOR_OFFSET_P(MODE, X)					\
93  (GET_CODE (X) == CONST_INT						\
94   && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & (~0x03), \
95		       (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3	\
96			? 0						\
97			: -(-GET_MODE_SIZE (MODE) | (~0x03)) >> 1)))
98
99/* Array of valid operand punctuation characters.  */
100char arc_punct_chars[256];
101
102/* State used by arc_ccfsm_advance to implement conditional execution.  */
103struct GTY (()) arc_ccfsm
104{
105  int state;
106  int cc;
107  rtx cond;
108  rtx_insn *target_insn;
109  int target_label;
110};
111
112/* Status of the IRQ_CTRL_AUX register.  */
113typedef struct irq_ctrl_saved_t
114{
115  /* Last register number used by IRQ_CTRL_SAVED aux_reg.  */
116  short irq_save_last_reg;
117  /* True if BLINK is automatically saved.  */
118  bool  irq_save_blink;
119  /* True if LPCOUNT is automatically saved.  */
120  bool  irq_save_lpcount;
121} irq_ctrl_saved_t;
122static irq_ctrl_saved_t irq_ctrl_saved;
123
124#define ARC_AUTOBLINK_IRQ_P(FNTYPE)				\
125  ((ARC_INTERRUPT_P (FNTYPE)					\
126    && irq_ctrl_saved.irq_save_blink)				\
127   || (ARC_FAST_INTERRUPT_P (FNTYPE)				\
128       && rgf_banked_register_count > 8))
129
130#define ARC_AUTOFP_IRQ_P(FNTYPE)				\
131  ((ARC_INTERRUPT_P (FNTYPE)					\
132    && (irq_ctrl_saved.irq_save_last_reg > 26))			\
133  || (ARC_FAST_INTERRUPT_P (FNTYPE)				\
134      && rgf_banked_register_count > 8))
135
136#define ARC_AUTO_IRQ_P(FNTYPE)					\
137  (ARC_INTERRUPT_P (FNTYPE) && !ARC_FAST_INTERRUPT_P (FNTYPE)	\
138   && (irq_ctrl_saved.irq_save_blink				\
139       || (irq_ctrl_saved.irq_save_last_reg >= 0)))
140
141/* Number of registers in second bank for FIRQ support.  */
142static int rgf_banked_register_count;
143
144#define arc_ccfsm_current cfun->machine->ccfsm_current
145
146#define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
147  ((STATE)->state == 1 || (STATE)->state == 2)
148
149/* Indicate we're conditionalizing insns now.  */
150#define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \
151  ((STATE)->state += 2)
152
153#define ARC_CCFSM_COND_EXEC_P(STATE) \
154  ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \
155   || current_insn_predicate)
156
157/* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE.  */
158#define CCFSM_ISCOMPACT(INSN,STATE) \
159  (ARC_CCFSM_COND_EXEC_P (STATE) \
160   ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
161      || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
162   : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
163
164/* Likewise, but also consider that INSN might be in a delay slot of JUMP.  */
165#define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \
166  ((ARC_CCFSM_COND_EXEC_P (STATE) \
167    || (JUMP_P (JUMP) \
168	&& INSN_ANNULLED_BRANCH_P (JUMP) \
169	&& (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \
170   ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
171      || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
172   : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
173
174/* Start enter/leave register range.  */
175#define ENTER_LEAVE_START_REG 13
176
177/* End enter/leave register range.  */
178#define ENTER_LEAVE_END_REG 26
179
180/* The maximum number of insns skipped which will be conditionalised if
181   possible.  */
182/* When optimizing for speed:
183    Let p be the probability that the potentially skipped insns need to
184    be executed, pn the cost of a correctly predicted non-taken branch,
185    mt the cost of a mis/non-predicted taken branch,
186    mn mispredicted non-taken, pt correctly predicted taken ;
187    costs expressed in numbers of instructions like the ones considered
188    skipping.
189    Unfortunately we don't have a measure of predictability - this
190    is linked to probability only in that in the no-eviction-scenario
191    there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger
192    value that can be assumed *if* the distribution is perfectly random.
193    A predictability of 1 is perfectly plausible not matter what p is,
194    because the decision could be dependent on an invocation parameter
195    of the program.
196    For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn
197    For small p, we want MAX_INSNS_SKIPPED == pt
198
199   When optimizing for size:
200    We want to skip insn unless we could use 16 opcodes for the
201    non-conditionalized insn to balance the branch length or more.
202    Performance can be tie-breaker.  */
203/* If the potentially-skipped insns are likely to be executed, we'll
204   generally save one non-taken branch
205   o
206   this to be no less than the 1/p  */
207#define MAX_INSNS_SKIPPED 3
208
209/* ZOL control registers.  */
210#define AUX_LP_START 0x02
211#define AUX_LP_END 0x03
212
213/* FPX AUX registers.  */
214#define AUX_DPFP_START 0x301
215
216/* ARC600 MULHI register.  */
217#define AUX_MULHI 0x12
218
219/* A nop is needed between a 4 byte insn that sets the condition codes and
220   a branch that uses them (the same isn't true for an 8 byte insn that sets
221   the condition codes).  Set by arc_ccfsm_advance.  Used by
222   arc_print_operand.  */
223
224static int get_arc_condition_code (rtx);
225
226static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
227static tree arc_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
228static tree arc_handle_jli_attribute (tree *, tree, tree, int, bool *);
229static tree arc_handle_secure_attribute (tree *, tree, tree, int, bool *);
230static tree arc_handle_uncached_attribute (tree *, tree, tree, int, bool *);
231static tree arc_handle_aux_attribute (tree *, tree, tree, int, bool *);
232
233/* Initialized arc_attribute_table to NULL since arc doesnot have any
234   machine specific supported attributes.  */
235const struct attribute_spec arc_attribute_table[] =
236{
237 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
238      affects_type_identity, handler, exclude } */
239  { "interrupt", 1, 1, true, false, false, true,
240    arc_handle_interrupt_attribute, NULL },
241  /* Function calls made to this symbol must be done indirectly, because
242     it may lie outside of the 21/25 bit addressing range of a normal function
243     call.  */
244  { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
245  /* Whereas these functions are always known to reside within the 25 bit
246     addressing range of unconditionalized bl.  */
247  { "medium_call",   0, 0, false, true,  true, false, NULL, NULL },
248  /* And these functions are always known to reside within the 21 bit
249     addressing range of blcc.  */
250  { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
251  /* Function which are not having the prologue and epilogue generated
252     by the compiler.  */
253  { "naked", 0, 0, true, false, false,  false, arc_handle_fndecl_attribute,
254    NULL },
255  /* Functions calls made using jli instruction.  The pointer in JLI
256     table is found latter.  */
257  { "jli_always",    0, 0, false, true,  true, false,  NULL, NULL },
258  /* Functions calls made using jli instruction.  The pointer in JLI
259     table is given as input parameter.  */
260  { "jli_fixed",    1, 1, false, true,  true, false, arc_handle_jli_attribute,
261    NULL },
262  /* Call a function using secure-mode.  */
263  { "secure_call",  1, 1, false, true, true, false, arc_handle_secure_attribute,
264    NULL },
265   /* Bypass caches using .di flag.  */
266  { "uncached", 0, 0, false, true, false, false, arc_handle_uncached_attribute,
267    NULL },
268  { "aux", 0, 1, true, false, false, false, arc_handle_aux_attribute, NULL },
269  { NULL, 0, 0, false, false, false, false, NULL, NULL }
270};
271static int arc_comp_type_attributes (const_tree, const_tree);
272static void arc_file_start (void);
273static void arc_internal_label (FILE *, const char *, unsigned long);
274static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
275				 tree);
276static int arc_address_cost (rtx, machine_mode, addr_space_t, bool);
277static void arc_encode_section_info (tree decl, rtx rtl, int first);
278
279static void arc_init_builtins (void);
280static rtx arc_expand_builtin (tree, rtx, rtx, machine_mode, int);
281
282static int branch_dest (rtx);
283
284static void  arc_output_pic_addr_const (FILE *,  rtx, int);
285static bool arc_function_ok_for_sibcall (tree, tree);
286static rtx arc_function_value (const_tree, const_tree, bool);
287const char * output_shift (rtx *);
288static void arc_reorg (void);
289static bool arc_in_small_data_p (const_tree);
290
291static void arc_init_reg_tables (void);
292static bool arc_return_in_memory (const_tree, const_tree);
293static bool arc_vector_mode_supported_p (machine_mode);
294
295static bool arc_can_use_doloop_p (const widest_int &, const widest_int &,
296				  unsigned int, bool);
297static const char *arc_invalid_within_doloop (const rtx_insn *);
298
299static void output_short_suffix (FILE *file);
300
301static bool arc_frame_pointer_required (void);
302
303static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
304						unsigned int,
305						enum by_pieces_operation op,
306						bool);
307
308/* Globally visible information about currently selected cpu.  */
309const arc_cpu_t *arc_selected_cpu;
310
311/* Traditionally, we push saved registers first in the prologue,
312   then we allocate the rest of the frame - and reverse in the epilogue.
313   This has still its merits for ease of debugging, or saving code size
314   or even execution time if the stack frame is so large that some accesses
315   can't be encoded anymore with offsets in the instruction code when using
316   a different scheme.
317   Also, it would be a good starting point if we got instructions to help
318   with register save/restore.
319
320   However, often stack frames are small, and the pushing / popping has
321   some costs:
322   - the stack modification prevents a lot of scheduling.
323   - frame allocation / deallocation may need extra instructions.
324   - we need to place a memory barrier after frame allocation to avoid
325     the delay slot scheduler to reschedule a frame related info and
326     messing up with dwarf unwinding.  The barrier before deallocation
327     is for flushing all pending sp operations.
328
329   Thus, for small frames, we'd like to use a different scheme:
330   - The frame is allocated in full with the first prologue instruction,
331     and deallocated in full with the last epilogue instruction.
332     Thus, the instructions in-between can be freely scheduled.
333   - If the function has no outgoing arguments on the stack, we can allocate
334     one register save slot at the top of the stack.  This register can then
335     be saved simultaneously with frame allocation, and restored with
336     frame deallocation.
337     This register can be picked depending on scheduling considerations,
338     although same though should go into having some set of registers
339     to be potentially lingering after a call, and others to be available
340     immediately - i.e. in the absence of interprocedual optimization, we
341     can use an ABI-like convention for register allocation to reduce
342     stalls after function return.  */
343
344/* ARCompact stack frames look like:
345
346           Before call                     After call
347  high  +-----------------------+       +-----------------------+
348  mem   |  reg parm save area   |       | reg parm save area    |
349        |  only created for     |       | only created for      |
350        |  variable arg fns     |       | variable arg fns      |
351    AP  +-----------------------+       +-----------------------+
352        |  return addr register |       | return addr register  |
353        |  (if required)        |       | (if required)         |
354        +-----------------------+       +-----------------------+
355        |                       |       |                       |
356        |  reg save area        |       | reg save area         |
357        |                       |       |                       |
358        +-----------------------+       +-----------------------+
359        |  frame pointer        |       | frame pointer         |
360        |  (if required)        |       | (if required)         |
361    FP  +-----------------------+       +-----------------------+
362        |                       |       |                       |
363        |  local/temp variables |       | local/temp variables  |
364        |                       |       |                       |
365        +-----------------------+       +-----------------------+
366        |                       |       |                       |
367        |  arguments on stack   |       | arguments on stack    |
368        |                       |       |                       |
369    SP  +-----------------------+       +-----------------------+
370                                        | reg parm save area    |
371                                        | only created for      |
372                                        | variable arg fns      |
373                                    AP  +-----------------------+
374                                        | return addr register  |
375                                        | (if required)         |
376                                        +-----------------------+
377                                        |                       |
378                                        | reg save area         |
379                                        |                       |
380                                        +-----------------------+
381                                        | frame pointer         |
382                                        | (if required)         |
383                                    FP  +-----------------------+
384                                        |                       |
385                                        | local/temp variables  |
386                                        |                       |
387                                        +-----------------------+
388                                        |                       |
389                                        | arguments on stack    |
390  low                                   |                       |
391  mem                               SP  +-----------------------+
392
393Notes:
3941) The "reg parm save area" does not exist for non variable argument fns.
395   The "reg parm save area" can be eliminated completely if we created our
396   own va-arc.h, but that has tradeoffs as well (so it's not done).  */
397
398/* Structure to be filled in by arc_compute_frame_size with register
399   save masks, and offsets for the current function.  */
400struct GTY (()) arc_frame_info
401{
402  unsigned int total_size;	/* # bytes that the entire frame takes up.  */
403  unsigned int extra_size;	/* # bytes of extra stuff.  */
404  unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
405  unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
406  unsigned int reg_size;	/* # bytes needed to store regs.  */
407  unsigned int var_size;	/* # bytes that variables take up.  */
408  uint64_t gmask;		/* Mask of saved gp registers.  */
409  bool initialized; /* FALSE if frame size already calculated.  */
410  short millicode_start_reg;
411  short millicode_end_reg;
412  bool save_return_addr;
413};
414
415/* GMASK bit length -1.  */
416#define GMASK_LEN 63
417
418/* Defining data structures for per-function information.  */
419
420typedef struct GTY (()) machine_function
421{
422  unsigned int fn_type;
423  struct arc_frame_info frame_info;
424  /* To keep track of unalignment caused by short insns.  */
425  int unalign;
426  struct arc_ccfsm ccfsm_current;
427  /* Map from uid to ccfsm state during branch shortening.  */
428  rtx ccfsm_current_insn;
429  char arc_reorg_started;
430  char prescan_initialized;
431} machine_function;
432
433
434/* Given a symbol RTX (const (symb <+ const_int>), returns its
435   alignment.  */
436
437static int
438get_symbol_alignment (rtx x)
439{
440  tree decl = NULL_TREE;
441  int align = 0;
442
443  switch (GET_CODE (x))
444    {
445    case SYMBOL_REF:
446      decl = SYMBOL_REF_DECL (x);
447      break;
448    case CONST:
449      return get_symbol_alignment (XEXP (x, 0));
450    case PLUS:
451      gcc_assert (CONST_INT_P (XEXP (x, 1)));
452      return get_symbol_alignment (XEXP (x, 0));
453    default:
454      return 0;
455    }
456
457  if (decl)
458    align = DECL_ALIGN (decl);
459  align = align / BITS_PER_UNIT;
460  return align;
461}
462
463/* Return true if x is ok to be used as a small data address.  */
464
465static bool
466legitimate_small_data_address_p (rtx x, machine_mode mode)
467{
468  switch (GET_CODE (x))
469    {
470    case CONST:
471      return legitimate_small_data_address_p (XEXP (x, 0), mode);
472    case SYMBOL_REF:
473      return SYMBOL_REF_SMALL_P (x);
474    case PLUS:
475      {
476	bool p0 = (GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
477	  && SYMBOL_REF_SMALL_P (XEXP (x, 0));
478
479	/* If no constant then we cannot do small data.  */
480	if (!CONST_INT_P (XEXP (x, 1)))
481	  return false;
482
483	/* Small data relocs works with scalled addresses, check if
484	   the immediate fits the requirements.  */
485	switch (GET_MODE_SIZE (mode))
486	  {
487	  case 1:
488	    return p0;
489	  case 2:
490	    return p0 && ((INTVAL (XEXP (x, 1)) & 0x1) == 0);
491	  case 4:
492	  case 8:
493	    return p0 && ((INTVAL (XEXP (x, 1)) & 0x3) == 0);
494	  default:
495	    return false;
496	  }
497      }
498    default:
499      return false;
500    }
501}
502
503/* TRUE if op is an scaled address.  */
504static bool
505legitimate_scaled_address_p (machine_mode mode, rtx op, bool strict)
506{
507  if (GET_CODE (op) != PLUS)
508    return false;
509
510  if (GET_CODE (XEXP (op, 0)) != MULT)
511    return false;
512
513  /* Check multiplication operands.  */
514  if (!RTX_OK_FOR_INDEX_P (XEXP (XEXP (op, 0), 0), strict))
515    return false;
516
517  if (!CONST_INT_P (XEXP (XEXP (op, 0), 1)))
518    return false;
519
520  switch (GET_MODE_SIZE (mode))
521    {
522    case 2:
523      if (INTVAL (XEXP (XEXP (op, 0), 1)) != 2)
524	return false;
525      break;
526    case 8:
527      if (!TARGET_LL64)
528	return false;
529      /*  Fall through. */
530    case 4:
531      if (INTVAL (XEXP (XEXP (op, 0), 1)) != 4)
532	return false;
533      /*  Fall through. */
534    default:
535      return false;
536    }
537
538  /* Check the base.  */
539  if (RTX_OK_FOR_BASE_P (XEXP (op, 1), (strict)))
540    return true;
541
542  if (flag_pic)
543    {
544      if (CONST_INT_P (XEXP (op, 1)))
545	return true;
546      return false;
547    }
548
549  /* Scalled addresses for sdata is done other places.  */
550  if (legitimate_small_data_address_p (op, mode))
551    return false;
552
553  if (CONSTANT_P (XEXP (op, 1)))
554      return true;
555
556  return false;
557}
558
559/* Check for constructions like REG + OFFS, where OFFS can be a
560   register, an immediate or an long immediate. */
561
562static bool
563legitimate_offset_address_p (machine_mode mode, rtx x, bool index, bool strict)
564{
565  if (GET_CODE (x) != PLUS)
566    return false;
567
568  if (!RTX_OK_FOR_BASE_P (XEXP (x, 0), (strict)))
569    return false;
570
571  /* Check for: [Rx + small offset] or [Rx + Ry].  */
572  if (((index && RTX_OK_FOR_INDEX_P (XEXP (x, 1), (strict))
573	&& GET_MODE_SIZE ((mode)) <= 4)
574       || RTX_OK_FOR_OFFSET_P (mode, XEXP (x, 1))))
575    return true;
576
577  /* Check for [Rx + symbol].  */
578  if (!flag_pic
579      && (GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
580      /* Avoid this type of address for double or larger modes.  */
581      && (GET_MODE_SIZE (mode) <= 4)
582      /* Avoid small data which ends in something like GP +
583	 symb@sda.  */
584      && (!SYMBOL_REF_SMALL_P (XEXP (x, 1))))
585    return true;
586
587  return false;
588}
589
590/* Implements target hook vector_mode_supported_p.  */
591
592static bool
593arc_vector_mode_supported_p (machine_mode mode)
594{
595  switch (mode)
596    {
597    case E_V2HImode:
598      return TARGET_PLUS_DMPY;
599    case E_V4HImode:
600    case E_V2SImode:
601      return TARGET_PLUS_QMACW;
602    case E_V4SImode:
603    case E_V8HImode:
604      return TARGET_SIMD_SET;
605
606    default:
607      return false;
608    }
609}
610
611/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
612
613static machine_mode
614arc_preferred_simd_mode (scalar_mode mode)
615{
616  switch (mode)
617    {
618    case E_HImode:
619      return TARGET_PLUS_QMACW ? V4HImode : V2HImode;
620    case E_SImode:
621      return V2SImode;
622
623    default:
624      return word_mode;
625    }
626}
627
628/* Implements target hook
629   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.  */
630
631static unsigned int
632arc_autovectorize_vector_modes (vector_modes *modes, bool)
633{
634  if (TARGET_PLUS_QMACW)
635    {
636      modes->quick_push (V4HImode);
637      modes->quick_push (V2HImode);
638    }
639  return 0;
640}
641
642
643/* Implements target hook TARGET_SCHED_ISSUE_RATE.  */
644static int
645arc_sched_issue_rate (void)
646{
647  switch (arc_tune)
648    {
649    case TUNE_ARCHS4X:
650    case TUNE_ARCHS4XD:
651      return 3;
652    default:
653      break;
654    }
655  return 1;
656}
657
658/* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review.  */
659static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
660static rtx arc_delegitimize_address (rtx);
661static bool arc_can_follow_jump (const rtx_insn *follower,
662				 const rtx_insn *followee);
663
664static rtx frame_insn (rtx);
665static void arc_function_arg_advance (cumulative_args_t,
666				      const function_arg_info &);
667static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
668
669/* initialize the GCC target structure.  */
670#undef  TARGET_COMP_TYPE_ATTRIBUTES
671#define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes
672#undef TARGET_ASM_FILE_START
673#define TARGET_ASM_FILE_START arc_file_start
674#undef TARGET_ATTRIBUTE_TABLE
675#define TARGET_ATTRIBUTE_TABLE arc_attribute_table
676#undef TARGET_ASM_INTERNAL_LABEL
677#define TARGET_ASM_INTERNAL_LABEL arc_internal_label
678#undef TARGET_RTX_COSTS
679#define TARGET_RTX_COSTS arc_rtx_costs
680#undef TARGET_ADDRESS_COST
681#define TARGET_ADDRESS_COST arc_address_cost
682
683#undef TARGET_ENCODE_SECTION_INFO
684#define TARGET_ENCODE_SECTION_INFO arc_encode_section_info
685
686#undef TARGET_CANNOT_FORCE_CONST_MEM
687#define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem
688
689#undef  TARGET_INIT_BUILTINS
690#define TARGET_INIT_BUILTINS  arc_init_builtins
691
692#undef  TARGET_EXPAND_BUILTIN
693#define TARGET_EXPAND_BUILTIN arc_expand_builtin
694
695#undef  TARGET_BUILTIN_DECL
696#define TARGET_BUILTIN_DECL arc_builtin_decl
697
698#undef  TARGET_ASM_OUTPUT_MI_THUNK
699#define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk
700
701#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
702#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
703
704#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
705#define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall
706
707#undef  TARGET_MACHINE_DEPENDENT_REORG
708#define TARGET_MACHINE_DEPENDENT_REORG arc_reorg
709
710#undef TARGET_IN_SMALL_DATA_P
711#define TARGET_IN_SMALL_DATA_P arc_in_small_data_p
712
713#undef TARGET_PROMOTE_FUNCTION_MODE
714#define TARGET_PROMOTE_FUNCTION_MODE \
715  default_promote_function_mode_always_promote
716
717#undef TARGET_PROMOTE_PROTOTYPES
718#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
719
720#undef TARGET_RETURN_IN_MEMORY
721#define TARGET_RETURN_IN_MEMORY arc_return_in_memory
722#undef TARGET_PASS_BY_REFERENCE
723#define TARGET_PASS_BY_REFERENCE arc_pass_by_reference
724
725#undef TARGET_SETUP_INCOMING_VARARGS
726#define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs
727
728#undef TARGET_ARG_PARTIAL_BYTES
729#define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes
730
731#undef TARGET_MUST_PASS_IN_STACK
732#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
733
734#undef TARGET_FUNCTION_VALUE
735#define TARGET_FUNCTION_VALUE arc_function_value
736
737#undef  TARGET_SCHED_ADJUST_PRIORITY
738#define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
739
740#undef TARGET_SCHED_ISSUE_RATE
741#define TARGET_SCHED_ISSUE_RATE arc_sched_issue_rate
742
743#undef TARGET_VECTOR_MODE_SUPPORTED_P
744#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
745
746#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
747#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
748
749#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
750#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES arc_autovectorize_vector_modes
751
752#undef TARGET_CAN_USE_DOLOOP_P
753#define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
754
755#undef TARGET_INVALID_WITHIN_DOLOOP
756#define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop
757
758#undef TARGET_PRESERVE_RELOAD_P
759#define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p
760
761#undef TARGET_CAN_FOLLOW_JUMP
762#define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump
763
764#undef TARGET_DELEGITIMIZE_ADDRESS
765#define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
766
767#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
768#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
769  arc_use_by_pieces_infrastructure_p
770
771/* Usually, we will be able to scale anchor offsets.
772   When this fails, we want LEGITIMIZE_ADDRESS to kick in.  */
773#undef TARGET_MIN_ANCHOR_OFFSET
774#define TARGET_MIN_ANCHOR_OFFSET (-1024)
775#undef TARGET_MAX_ANCHOR_OFFSET
776#define TARGET_MAX_ANCHOR_OFFSET (1020)
777
778#undef TARGET_SECONDARY_RELOAD
779#define TARGET_SECONDARY_RELOAD arc_secondary_reload
780
781#define TARGET_OPTION_OVERRIDE arc_override_options
782
783#define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage
784
785#define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline
786
787#define TARGET_CAN_ELIMINATE arc_can_eliminate
788
789#define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required
790
791#define TARGET_FUNCTION_ARG arc_function_arg
792
793#define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance
794
795#define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p
796
797#define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p
798
799#define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p
800
801#define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address
802
803#undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
804#define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P	\
805  arc_no_speculation_in_delay_slots_p
806
807#undef TARGET_LRA_P
808#define TARGET_LRA_P arc_lra_p
809#define TARGET_REGISTER_PRIORITY arc_register_priority
810/* Stores with scaled offsets have different displacement ranges.  */
811#define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
812#define TARGET_SPILL_CLASS arc_spill_class
813
814#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
815#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arc_allocate_stack_slots_for_args
816
817#undef TARGET_WARN_FUNC_RETURN
818#define TARGET_WARN_FUNC_RETURN arc_warn_func_return
819
820#include "target-def.h"
821
822#undef TARGET_ASM_ALIGNED_HI_OP
823#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
824#undef TARGET_ASM_ALIGNED_SI_OP
825#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
826
827#ifdef HAVE_AS_TLS
828#undef TARGET_HAVE_TLS
829#define TARGET_HAVE_TLS HAVE_AS_TLS
830#endif
831
832#undef TARGET_DWARF_REGISTER_SPAN
833#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
834
835#undef TARGET_HARD_REGNO_NREGS
836#define TARGET_HARD_REGNO_NREGS arc_hard_regno_nregs
837#undef TARGET_HARD_REGNO_MODE_OK
838#define TARGET_HARD_REGNO_MODE_OK arc_hard_regno_mode_ok
839
840#undef TARGET_MODES_TIEABLE_P
841#define TARGET_MODES_TIEABLE_P arc_modes_tieable_p
842
843/* Try to keep the (mov:DF _, reg) as early as possible so
844   that the d<add/sub/mul>h-lr insns appear together and can
845   use the peephole2 pattern.  */
846
847static int
848arc_sched_adjust_priority (rtx_insn *insn, int priority)
849{
850  rtx set = single_set (insn);
851  if (set
852      && GET_MODE (SET_SRC(set)) == DFmode
853      && GET_CODE (SET_SRC(set)) == REG)
854    {
855      /* Incrementing priority by 20 (empirically derived).  */
856      return priority + 20;
857    }
858
859  return priority;
860}
861
862/* For ARC base register + offset addressing, the validity of the
863   address is mode-dependent for most of the offset range, as the
864   offset can be scaled by the access size.
865   We don't expose these as mode-dependent addresses in the
866   mode_dependent_address_p target hook, because that would disable
867   lots of optimizations, and most uses of these addresses are for 32
868   or 64 bit accesses anyways, which are fine.
869   However, that leaves some addresses for 8 / 16 bit values not
870   properly reloaded by the generic code, which is why we have to
871   schedule secondary reloads for these.  */
872
873static reg_class_t
874arc_secondary_reload (bool in_p,
875		      rtx x,
876		      reg_class_t cl,
877		      machine_mode mode,
878		      secondary_reload_info *sri)
879{
880  enum rtx_code code = GET_CODE (x);
881
882  if (cl == DOUBLE_REGS)
883    return GENERAL_REGS;
884
885 /* If we have a subreg (reg), where reg is a pseudo (that will end in
886    a memory location), then we may need a scratch register to handle
887    the fp/sp+largeoffset address.  */
888  if (code == SUBREG)
889    {
890      rtx addr = NULL_RTX;
891      x = SUBREG_REG (x);
892
893      if (REG_P (x))
894	{
895	  int regno = REGNO (x);
896	  if (regno >= FIRST_PSEUDO_REGISTER)
897	    regno = reg_renumber[regno];
898
899	  if (regno != -1)
900	    return NO_REGS;
901
902	  /* It is a pseudo that ends in a stack location.  This
903	     procedure only works with the old reload step.  */
904	  if (!lra_in_progress && reg_equiv_mem (REGNO (x)))
905	    {
906	      /* Get the equivalent address and check the range of the
907		 offset.  */
908	      rtx mem = reg_equiv_mem (REGNO (x));
909	      addr = find_replacement (&XEXP (mem, 0));
910	    }
911	}
912      else
913	{
914	  gcc_assert (MEM_P (x));
915	  addr = XEXP (x, 0);
916	  addr = simplify_rtx (addr);
917	}
918      if (addr && GET_CODE (addr) == PLUS
919	  && CONST_INT_P (XEXP (addr, 1))
920	  && (!RTX_OK_FOR_OFFSET_P (mode, XEXP (addr, 1))))
921	{
922	  switch (mode)
923	    {
924	    case E_QImode:
925	      sri->icode =
926		in_p ? CODE_FOR_reload_qi_load : CODE_FOR_reload_qi_store;
927	      break;
928	    case E_HImode:
929	      sri->icode =
930		in_p ? CODE_FOR_reload_hi_load : CODE_FOR_reload_hi_store;
931	      break;
932	    default:
933	      break;
934	    }
935	}
936    }
937  return NO_REGS;
938}
939
940/* Convert reloads using offsets that are too large to use indirect
941   addressing.  */
942
943void
944arc_secondary_reload_conv (rtx reg, rtx mem, rtx scratch, bool store_p)
945{
946  rtx addr;
947
948  gcc_assert (GET_CODE (mem) == MEM);
949  addr = XEXP (mem, 0);
950
951  /* Large offset: use a move.  FIXME: ld ops accepts limms as
952     offsets.  Hence, the following move insn is not required.  */
953  emit_move_insn (scratch, addr);
954  mem = replace_equiv_address_nv (mem, scratch);
955
956  /* Now create the move.  */
957  if (store_p)
958    emit_insn (gen_rtx_SET (mem, reg));
959  else
960    emit_insn (gen_rtx_SET (reg, mem));
961
962  return;
963}
964
965static unsigned arc_ifcvt (void);
966
967namespace {
968
969const pass_data pass_data_arc_ifcvt =
970{
971  RTL_PASS,
972  "arc_ifcvt",				/* name */
973  OPTGROUP_NONE,			/* optinfo_flags */
974  TV_IFCVT2,				/* tv_id */
975  0,					/* properties_required */
976  0,					/* properties_provided */
977  0,					/* properties_destroyed */
978  0,					/* todo_flags_start */
979  TODO_df_finish			/* todo_flags_finish */
980};
981
982class pass_arc_ifcvt : public rtl_opt_pass
983{
984 public:
985 pass_arc_ifcvt (gcc::context *ctxt)
986   : rtl_opt_pass (pass_data_arc_ifcvt, ctxt)
987    {}
988
989  /* opt_pass methods: */
990  opt_pass * clone ()
991    {
992      return new pass_arc_ifcvt (m_ctxt);
993    }
994  virtual unsigned int execute (function *)
995  {
996    return arc_ifcvt ();
997  }
998  virtual bool gate (function *)
999  {
1000    return (optimize > 1 && !TARGET_NO_COND_EXEC);
1001  }
1002};
1003
1004} // anon namespace
1005
1006rtl_opt_pass *
1007make_pass_arc_ifcvt (gcc::context *ctxt)
1008{
1009  return new pass_arc_ifcvt (ctxt);
1010}
1011
1012static unsigned arc_predicate_delay_insns (void);
1013
1014namespace {
1015
1016const pass_data pass_data_arc_predicate_delay_insns =
1017{
1018  RTL_PASS,
1019  "arc_predicate_delay_insns",		/* name */
1020  OPTGROUP_NONE,			/* optinfo_flags */
1021  TV_IFCVT2,				/* tv_id */
1022  0,					/* properties_required */
1023  0,					/* properties_provided */
1024  0,					/* properties_destroyed */
1025  0,					/* todo_flags_start */
1026  TODO_df_finish			/* todo_flags_finish */
1027};
1028
1029class pass_arc_predicate_delay_insns : public rtl_opt_pass
1030{
1031 public:
1032 pass_arc_predicate_delay_insns(gcc::context *ctxt)
1033   : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt)
1034    {}
1035
1036  /* opt_pass methods: */
1037  virtual unsigned int execute (function *)
1038  {
1039    return arc_predicate_delay_insns ();
1040  }
1041  virtual bool gate (function *)
1042  {
1043    return flag_delayed_branch;
1044  }
1045};
1046
1047} // anon namespace
1048
1049rtl_opt_pass *
1050make_pass_arc_predicate_delay_insns (gcc::context *ctxt)
1051{
1052  return new pass_arc_predicate_delay_insns (ctxt);
1053}
1054
1055/* Called by OVERRIDE_OPTIONS to initialize various things.  */
1056
1057static void
1058arc_init (void)
1059{
1060  if (TARGET_V2)
1061    {
1062      /* I have the multiplier, then use it*/
1063      if (TARGET_MPYW || TARGET_MULTI)
1064	  arc_multcost = COSTS_N_INSNS (1);
1065    }
1066  /* Note: arc_multcost is only used in rtx_cost if speed is true.  */
1067  if (arc_multcost < 0)
1068    switch (arc_tune)
1069      {
1070      case ARC_TUNE_ARC700_4_2_STD:
1071	/* latency 7;
1072	   max throughput (1 multiply + 4 other insns) / 5 cycles.  */
1073	arc_multcost = COSTS_N_INSNS (4);
1074	if (TARGET_NOMPY_SET)
1075	  arc_multcost = COSTS_N_INSNS (30);
1076	break;
1077      case ARC_TUNE_ARC700_4_2_XMAC:
1078	/* latency 5;
1079	   max throughput (1 multiply + 2 other insns) / 3 cycles.  */
1080	arc_multcost = COSTS_N_INSNS (3);
1081	if (TARGET_NOMPY_SET)
1082	  arc_multcost = COSTS_N_INSNS (30);
1083	break;
1084      case ARC_TUNE_ARC600:
1085	if (TARGET_MUL64_SET)
1086	  {
1087	    arc_multcost = COSTS_N_INSNS (4);
1088	    break;
1089	  }
1090	/* Fall through.  */
1091      default:
1092	arc_multcost = COSTS_N_INSNS (30);
1093	break;
1094      }
1095
1096  /* MPY instructions valid only for ARC700 or ARCv2.  */
1097  if (TARGET_NOMPY_SET && TARGET_ARC600_FAMILY)
1098      error ("%<-mno-mpy%> supported only for ARC700 or ARCv2");
1099
1100  if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR)
1101      error ("%<-mno-dpfp-lrsr%> supported only with %<-mdpfp%>");
1102
1103  /* FPX-1. No fast and compact together.  */
1104  if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET)
1105      || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET))
1106    error ("FPX fast and compact options cannot be specified together");
1107
1108  /* FPX-2. No fast-spfp for arc600 or arc601.  */
1109  if (TARGET_SPFP_FAST_SET && TARGET_ARC600_FAMILY)
1110    error ("%<-mspfp_fast%> not available on ARC600 or ARC601");
1111
1112  /* FPX-4.  No FPX extensions mixed with FPU extensions.  */
1113  if ((TARGET_DPFP_FAST_SET || TARGET_DPFP_COMPACT_SET || TARGET_SPFP)
1114      && TARGET_HARD_FLOAT)
1115    error ("no FPX/FPU mixing allowed");
1116
1117  /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic.  */
1118  if (flag_pic && TARGET_ARC600_FAMILY)
1119    {
1120      warning (0, "PIC is not supported for %qs",
1121	       arc_cpu_string);
1122      flag_pic = 0;
1123    }
1124
1125  arc_init_reg_tables ();
1126
1127  /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
1128  memset (arc_punct_chars, 0, sizeof (arc_punct_chars));
1129  arc_punct_chars['#'] = 1;
1130  arc_punct_chars['*'] = 1;
1131  arc_punct_chars['?'] = 1;
1132  arc_punct_chars['!'] = 1;
1133  arc_punct_chars['^'] = 1;
1134  arc_punct_chars['&'] = 1;
1135  arc_punct_chars['+'] = 1;
1136  arc_punct_chars['_'] = 1;
1137}
1138
1139/* Parse -mirq-ctrl-saved=RegisterRange, blink, lp_copunt.  The
1140   register range is specified as two registers separated by a dash.
1141   It always starts with r0, and its upper limit is fp register.
1142   blink and lp_count registers are optional.  */
1143
1144static void
1145irq_range (const char *cstr)
1146{
1147  int i, first, last, blink, lpcount, xreg;
1148  char *str, *dash, *comma;
1149
1150  i = strlen (cstr);
1151  str = (char *) alloca (i + 1);
1152  memcpy (str, cstr, i + 1);
1153  blink = -1;
1154  lpcount = -1;
1155
1156  dash = strchr (str, '-');
1157  if (!dash)
1158    {
1159      warning (OPT_mirq_ctrl_saved_, "missing dash");
1160      return;
1161    }
1162  *dash = '\0';
1163
1164  comma = strchr (dash + 1, ',');
1165  if (comma)
1166    *comma = '\0';
1167
1168  first = decode_reg_name (str);
1169  if (first != 0)
1170    {
1171      warning (OPT_mirq_ctrl_saved_, "first register must be R0");
1172      return;
1173    }
1174
1175  /* At this moment we do not have the register names initialized
1176     accordingly.  */
1177  if (!strcmp (dash + 1, "ilink"))
1178    last = 29;
1179  else
1180    last = decode_reg_name (dash + 1);
1181
1182  if (last < 0)
1183    {
1184      warning (OPT_mirq_ctrl_saved_, "unknown register name: %s", dash + 1);
1185      return;
1186    }
1187
1188  if (!(last & 0x01))
1189    {
1190      warning (OPT_mirq_ctrl_saved_,
1191	       "last register name %s must be an odd register", dash + 1);
1192      return;
1193    }
1194
1195  *dash = '-';
1196
1197  if (first > last)
1198    {
1199      warning (OPT_mirq_ctrl_saved_,
1200	       "%s-%s is an empty range", str, dash + 1);
1201      return;
1202    }
1203
1204  while (comma)
1205    {
1206      *comma = ',';
1207      str = comma + 1;
1208
1209      comma = strchr (str, ',');
1210      if (comma)
1211	*comma = '\0';
1212
1213      xreg = decode_reg_name (str);
1214      switch (xreg)
1215	{
1216	case 31:
1217	  blink = 31;
1218	  break;
1219
1220	case 60:
1221	  lpcount = 60;
1222	  break;
1223
1224	default:
1225	  warning (OPT_mirq_ctrl_saved_,
1226		   "unknown register name: %s", str);
1227	  return;
1228	}
1229    }
1230
1231  irq_ctrl_saved.irq_save_last_reg = last;
1232  irq_ctrl_saved.irq_save_blink    = (blink == 31) || (last == 31);
1233  irq_ctrl_saved.irq_save_lpcount  = (lpcount == 60);
1234}
1235
1236/* Parse -mrgf-banked-regs=NUM option string.  Valid values for NUM are 4,
1237   8, 16, or 32.  */
1238
1239static void
1240parse_mrgf_banked_regs_option (const char *arg)
1241{
1242  long int val;
1243  char *end_ptr;
1244
1245  errno = 0;
1246  val = strtol (arg, &end_ptr, 10);
1247  if (errno != 0 || *arg == '\0' || *end_ptr != '\0'
1248      || (val != 0 && val != 4 && val != 8 && val != 16 && val != 32))
1249    {
1250      error ("invalid number in %<-mrgf-banked-regs=%s%> "
1251	     "valid values are 0, 4, 8, 16, or 32", arg);
1252      return;
1253    }
1254  rgf_banked_register_count = (int) val;
1255}
1256
1257/* Check ARC options, generate derived target attributes.  */
1258
1259static void
1260arc_override_options (void)
1261{
1262  unsigned int i;
1263  cl_deferred_option *opt;
1264  vec<cl_deferred_option> *vopt
1265    = (vec<cl_deferred_option> *) arc_deferred_options;
1266
1267  if (arc_cpu == PROCESSOR_NONE)
1268    arc_cpu = TARGET_CPU_DEFAULT;
1269
1270  /* Set the default cpu options.  */
1271  arc_selected_cpu = &arc_cpu_types[(int) arc_cpu];
1272
1273  /* Set the architectures.  */
1274  switch (arc_selected_cpu->arch_info->arch_id)
1275    {
1276    case BASE_ARCH_em:
1277      arc_cpu_string = "EM";
1278      break;
1279    case BASE_ARCH_hs:
1280      arc_cpu_string = "HS";
1281      break;
1282    case BASE_ARCH_700:
1283      if (arc_selected_cpu->processor == PROCESSOR_nps400)
1284	arc_cpu_string = "NPS400";
1285      else
1286	arc_cpu_string = "ARC700";
1287      break;
1288    case BASE_ARCH_6xx:
1289      arc_cpu_string = "ARC600";
1290      break;
1291    default:
1292      gcc_unreachable ();
1293    }
1294
1295  irq_ctrl_saved.irq_save_last_reg = -1;
1296  irq_ctrl_saved.irq_save_blink    = false;
1297  irq_ctrl_saved.irq_save_lpcount  = false;
1298
1299  rgf_banked_register_count = 0;
1300
1301  /* Handle the deferred options.  */
1302  if (vopt)
1303    FOR_EACH_VEC_ELT (*vopt, i, opt)
1304      {
1305	switch (opt->opt_index)
1306	  {
1307	  case OPT_mirq_ctrl_saved_:
1308	    if (TARGET_V2)
1309	      irq_range (opt->arg);
1310	    else
1311	      warning (OPT_mirq_ctrl_saved_,
1312		       "option %<-mirq-ctrl-saved%> valid only "
1313		       "for ARC v2 processors");
1314	    break;
1315
1316	  case OPT_mrgf_banked_regs_:
1317	    if (TARGET_V2)
1318	      parse_mrgf_banked_regs_option (opt->arg);
1319	    else
1320	      warning (OPT_mrgf_banked_regs_,
1321		       "option %<-mrgf-banked-regs%> valid only for "
1322		       "ARC v2 processors");
1323	    break;
1324
1325	  default:
1326	    gcc_unreachable();
1327	  }
1328      }
1329
1330  CLEAR_HARD_REG_SET (overrideregs);
1331  if (common_deferred_options)
1332    {
1333      vec<cl_deferred_option> v =
1334	*((vec<cl_deferred_option> *) common_deferred_options);
1335      int reg, nregs, j;
1336
1337      FOR_EACH_VEC_ELT (v, i, opt)
1338	{
1339	  switch (opt->opt_index)
1340	    {
1341	    case OPT_ffixed_:
1342	    case OPT_fcall_used_:
1343	    case OPT_fcall_saved_:
1344	      if ((reg = decode_reg_name_and_count (opt->arg, &nregs)) >= 0)
1345		for (j = reg;  j < reg + nregs; j++)
1346		  SET_HARD_REG_BIT (overrideregs, j);
1347	      break;
1348	    default:
1349	      break;
1350	    }
1351	}
1352    }
1353
1354  /* Check options against architecture options.  Throw an error if
1355     option is not allowed.  Extra, check options against default
1356     architecture/cpu flags and throw an warning if we find a
1357     mismatch.  */
1358  /* TRANSLATORS: the DOC/DOC0/DOC1 are strings which shouldn't be
1359     translated.  They are like keywords which one can relate with the
1360     architectural choices taken for an ARC CPU implementation.  */
1361#define ARC_OPTX(NAME, CODE, VAR, VAL, DOC0, DOC1)		\
1362  do {								\
1363    if ((!(arc_selected_cpu->arch_info->flags & CODE))		\
1364	&& (VAR == VAL))					\
1365      error ("option %<%s=%s%> is not available for %qs CPU",	\
1366	     DOC0, DOC1, arc_selected_cpu->name);		\
1367    if ((arc_selected_cpu->arch_info->dflags & CODE)		\
1368	&& (VAR != DEFAULT_##VAR)				\
1369	&& (VAR != VAL))					\
1370      warning (0, "option %qs is ignored, the default value %qs"	\
1371	       " is considered for %qs CPU", DOC0, DOC1,		\
1372	       arc_selected_cpu->name);				\
1373 } while (0);
1374#define ARC_OPT(NAME, CODE, MASK, DOC)				\
1375  do {								\
1376    if ((!(arc_selected_cpu->arch_info->flags & CODE))		\
1377	&& (target_flags & MASK))				\
1378      error ("option %qs is not available for %qs CPU",		\
1379	     DOC, arc_selected_cpu->name);			\
1380    if ((arc_selected_cpu->arch_info->dflags & CODE)		\
1381	&& (target_flags_explicit & MASK)			\
1382	&& (!(target_flags & MASK)))				\
1383      warning (0, "unset option %qs is ignored, it is always"	\
1384	       " enabled for %qs CPU", DOC,			\
1385	       arc_selected_cpu->name);				\
1386  } while (0);
1387
1388#include "arc-options.def"
1389
1390#undef ARC_OPTX
1391#undef ARC_OPT
1392
1393  /* Set cpu flags accordingly to architecture/selected cpu.  The cpu
1394     specific flags are set in arc-common.cc.  The architecture forces
1395     the default hardware configurations in, regardless what command
1396     line options are saying.  The CPU optional hw options can be
1397     turned on or off.  */
1398#define ARC_OPT(NAME, CODE, MASK, DOC)			\
1399  do {							\
1400    if ((arc_selected_cpu->flags & CODE)		\
1401	&& ((target_flags_explicit & MASK) == 0))	\
1402      target_flags |= MASK;				\
1403    if (arc_selected_cpu->arch_info->dflags & CODE)	\
1404      target_flags |= MASK;				\
1405  } while (0);
1406#define ARC_OPTX(NAME, CODE, VAR, VAL, DOC0, DOC1)	\
1407  do {							\
1408    if ((arc_selected_cpu->flags & CODE)		\
1409	&& (VAR == DEFAULT_##VAR))			\
1410      VAR = VAL;					\
1411    if (arc_selected_cpu->arch_info->dflags & CODE)	\
1412      VAR = VAL;					\
1413  } while (0);
1414
1415#include "arc-options.def"
1416
1417#undef ARC_OPTX
1418#undef ARC_OPT
1419
1420  /* Set extras.  */
1421  switch (arc_selected_cpu->extra)
1422    {
1423    case HAS_LPCOUNT_16:
1424      arc_lpcwidth = 16;
1425      break;
1426    default:
1427      break;
1428    }
1429
1430  /* Set Tune option.  */
1431  if (arc_tune == ARC_TUNE_NONE)
1432    arc_tune = (enum arc_tune_attr) arc_selected_cpu->tune;
1433
1434  if (arc_size_opt_level == 3)
1435    optimize_size = 1;
1436
1437  if (TARGET_V2 && optimize_size && (ATTRIBUTE_PCS == 2))
1438    TARGET_CODE_DENSITY_FRAME = 1;
1439
1440  if (flag_pic)
1441    target_flags |= MASK_NO_SDATA_SET;
1442
1443  /* Check for small data option */
1444  if (!OPTION_SET_P (g_switch_value) && !TARGET_NO_SDATA_SET)
1445    g_switch_value = TARGET_LL64 ? 8 : 4;
1446
1447  /* A7 has an issue with delay slots.  */
1448  if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
1449    flag_delayed_branch = 0;
1450
1451  /* Millicode thunks doesn't work for long calls.  */
1452  if (TARGET_LONG_CALLS_SET
1453      /* neither for RF16.  */
1454      || TARGET_RF16)
1455    target_flags &= ~MASK_MILLICODE_THUNK_SET;
1456
1457  /* Set unaligned to all HS cpus.  */
1458  if (!OPTION_SET_P (unaligned_access) && TARGET_HS)
1459    unaligned_access = 1;
1460
1461  /* These need to be done at start up.  It's convenient to do them here.  */
1462  arc_init ();
1463}
1464
1465/* The condition codes of the ARC, and the inverse function.  */
1466/* For short branches, the "c" / "nc" names are not defined in the ARC
1467   Programmers manual, so we have to use "lo" / "hs"" instead.  */
1468static const char *arc_condition_codes[] =
1469{
1470  "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv",
1471  "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
1472};
1473
1474enum arc_cc_code_index
1475{
1476  ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N,
1477  ARC_CC_C,  ARC_CC_NC, ARC_CC_V, ARC_CC_NV,
1478  ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ,
1479  ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC
1480};
1481
1482#define ARC_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
1483
1484/* Returns the index of the ARC condition code string in
1485   `arc_condition_codes'.  COMPARISON should be an rtx like
1486   `(eq (...) (...))'.  */
1487
1488static int
1489get_arc_condition_code (rtx comparison)
1490{
1491  switch (GET_MODE (XEXP (comparison, 0)))
1492    {
1493    case E_CCmode:
1494    case E_SImode: /* For BRcc.  */
1495      switch (GET_CODE (comparison))
1496	{
1497	case EQ : return ARC_CC_EQ;
1498	case NE : return ARC_CC_NE;
1499	case GT : return ARC_CC_GT;
1500	case LE : return ARC_CC_LE;
1501	case GE : return ARC_CC_GE;
1502	case LT : return ARC_CC_LT;
1503	case GTU : return ARC_CC_HI;
1504	case LEU : return ARC_CC_LS;
1505	case LTU : return ARC_CC_LO;
1506	case GEU : return ARC_CC_HS;
1507	default : gcc_unreachable ();
1508	}
1509    case E_CC_ZNmode:
1510      switch (GET_CODE (comparison))
1511	{
1512	case EQ : return ARC_CC_EQ;
1513	case NE : return ARC_CC_NE;
1514	case GE: return ARC_CC_P;
1515	case LT: return ARC_CC_N;
1516	case GT : return ARC_CC_PNZ;
1517	default : gcc_unreachable ();
1518	}
1519    case E_CC_Zmode:
1520      switch (GET_CODE (comparison))
1521	{
1522	case EQ : return ARC_CC_EQ;
1523	case NE : return ARC_CC_NE;
1524	default : gcc_unreachable ();
1525	}
1526    case E_CC_Cmode:
1527      switch (GET_CODE (comparison))
1528	{
1529	case LTU : return ARC_CC_C;
1530	case GEU : return ARC_CC_NC;
1531	default : gcc_unreachable ();
1532	}
1533    case E_CC_FP_GTmode:
1534      if (TARGET_ARGONAUT_SET && TARGET_SPFP)
1535	switch (GET_CODE (comparison))
1536	  {
1537	  case GT  : return ARC_CC_N;
1538	  case UNLE: return ARC_CC_P;
1539	  default : gcc_unreachable ();
1540	}
1541      else
1542	switch (GET_CODE (comparison))
1543	  {
1544	  case GT   : return ARC_CC_HI;
1545	  case UNLE : return ARC_CC_LS;
1546	  default : gcc_unreachable ();
1547	}
1548    case E_CC_FP_GEmode:
1549      /* Same for FPX and non-FPX.  */
1550      switch (GET_CODE (comparison))
1551	{
1552	case GE   : return ARC_CC_HS;
1553	case UNLT : return ARC_CC_LO;
1554	default : gcc_unreachable ();
1555	}
1556    case E_CC_FP_UNEQmode:
1557      switch (GET_CODE (comparison))
1558	{
1559	case UNEQ : return ARC_CC_EQ;
1560	case LTGT : return ARC_CC_NE;
1561	default : gcc_unreachable ();
1562	}
1563    case E_CC_FP_ORDmode:
1564      switch (GET_CODE (comparison))
1565	{
1566	case UNORDERED : return ARC_CC_C;
1567	case ORDERED   : return ARC_CC_NC;
1568	default : gcc_unreachable ();
1569	}
1570    case E_CC_FPXmode:
1571      switch (GET_CODE (comparison))
1572	{
1573	case EQ        : return ARC_CC_EQ;
1574	case NE        : return ARC_CC_NE;
1575	case UNORDERED : return ARC_CC_C;
1576	case ORDERED   : return ARC_CC_NC;
1577	case LTGT      : return ARC_CC_HI;
1578	case UNEQ      : return ARC_CC_LS;
1579	default : gcc_unreachable ();
1580	}
1581    case E_CC_FPUmode:
1582    case E_CC_FPUEmode:
1583      switch (GET_CODE (comparison))
1584	{
1585	case EQ	       : return ARC_CC_EQ;
1586	case NE	       : return ARC_CC_NE;
1587	case GT	       : return ARC_CC_GT;
1588	case GE	       : return ARC_CC_GE;
1589	case LT	       : return ARC_CC_C;
1590	case LE	       : return ARC_CC_LS;
1591	case UNORDERED : return ARC_CC_V;
1592	case ORDERED   : return ARC_CC_NV;
1593	case UNGT      : return ARC_CC_HI;
1594	case UNGE      : return ARC_CC_HS;
1595	case UNLT      : return ARC_CC_LT;
1596	case UNLE      : return ARC_CC_LE;
1597	  /* UNEQ and LTGT do not have representation.  */
1598	case LTGT      : /* Fall through.  */
1599	case UNEQ      : /* Fall through.  */
1600	default : gcc_unreachable ();
1601	}
1602    case E_CC_FPU_UNEQmode:
1603      switch (GET_CODE (comparison))
1604	{
1605	case LTGT : return ARC_CC_NE;
1606	case UNEQ : return ARC_CC_EQ;
1607	default : gcc_unreachable ();
1608	}
1609    default : gcc_unreachable ();
1610    }
1611  /*NOTREACHED*/
1612  return (42);
1613}
1614
1615/* Return true if COMPARISON has a short form that can accomodate OFFSET.  */
1616
1617bool
1618arc_short_comparison_p (rtx comparison, int offset)
1619{
1620  gcc_assert (ARC_CC_NC == ARC_CC_HS);
1621  gcc_assert (ARC_CC_C == ARC_CC_LO);
1622  switch (get_arc_condition_code (comparison))
1623    {
1624    case ARC_CC_EQ: case ARC_CC_NE:
1625      return offset >= -512 && offset <= 506;
1626    case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT:
1627    case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS:
1628      return offset >= -64 && offset <= 58;
1629    default:
1630      return false;
1631    }
1632}
1633
1634/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1635   return the mode to be used for the comparison.  */
1636
1637machine_mode
1638arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
1639{
1640  machine_mode mode = GET_MODE (x);
1641  rtx x1;
1642
1643  /* For an operation that sets the condition codes as a side-effect, the
1644     C and V flags is not set as for cmp, so we can only use comparisons where
1645     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
1646     instead.)  */
1647  /* ??? We could use "pnz" for greater than zero, however, we could then
1648     get into trouble because the comparison could not be reversed.  */
1649  if (GET_MODE_CLASS (mode) == MODE_INT
1650      && y == const0_rtx
1651      && (op == EQ || op == NE
1652	  || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x)) <= 4)))
1653    return CC_ZNmode;
1654
1655  /* add.f for if (a+b) */
1656  if (mode == SImode
1657      && GET_CODE (y) == NEG
1658      && (op == EQ || op == NE))
1659    return CC_ZNmode;
1660
1661  /* Check if this is a test suitable for bxor.f .  */
1662  if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1663      && ((INTVAL (y) - 1) & INTVAL (y)) == 0
1664      && INTVAL (y))
1665    return CC_Zmode;
1666
1667  /* Check if this is a test suitable for add / bmsk.f .  */
1668  if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1669      && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1)))
1670      && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0
1671      && (~INTVAL (x1) | INTVAL (y)) < 0
1672      && (~INTVAL (x1) | INTVAL (y)) > -0x800)
1673    return CC_Zmode;
1674
1675  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
1676      && GET_CODE (x) == PLUS
1677      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
1678    return CC_Cmode;
1679
1680  if (TARGET_ARGONAUT_SET
1681      && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP)))
1682    switch (op)
1683      {
1684      case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1685	return CC_FPXmode;
1686      case LT: case UNGE: case GT: case UNLE:
1687	return CC_FP_GTmode;
1688      case LE: case UNGT: case GE: case UNLT:
1689	return CC_FP_GEmode;
1690      default: gcc_unreachable ();
1691      }
1692  else if (TARGET_HARD_FLOAT
1693	   && ((mode == SFmode && TARGET_FP_SP_BASE)
1694	       || (mode == DFmode && TARGET_FP_DP_BASE)))
1695    switch (op)
1696      {
1697      case EQ:
1698      case NE:
1699      case UNORDERED:
1700      case ORDERED:
1701      case UNLT:
1702      case UNLE:
1703      case UNGT:
1704      case UNGE:
1705	return CC_FPUmode;
1706
1707      case LT:
1708      case LE:
1709      case GT:
1710      case GE:
1711	return CC_FPUEmode;
1712
1713      case LTGT:
1714      case UNEQ:
1715	return CC_FPU_UNEQmode;
1716
1717      default:
1718	gcc_unreachable ();
1719      }
1720  else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE)
1721    {
1722      switch (op)
1723	{
1724	case EQ: case NE: return CC_Zmode;
1725	case LT: case UNGE:
1726	case GT: case UNLE: return CC_FP_GTmode;
1727	case LE: case UNGT:
1728	case GE: case UNLT: return CC_FP_GEmode;
1729	case UNEQ: case LTGT: return CC_FP_UNEQmode;
1730	case ORDERED: case UNORDERED: return CC_FP_ORDmode;
1731	default: gcc_unreachable ();
1732	}
1733    }
1734  return CCmode;
1735}
1736
1737/* Vectors to keep interesting information about registers where it can easily
1738   be got.  We use to use the actual mode value as the bit number, but there
1739   is (or may be) more than 32 modes now.  Instead we use two tables: one
1740   indexed by hard register number, and one indexed by mode.  */
1741
1742/* The purpose of arc_mode_class is to shrink the range of modes so that
1743   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
1744   mapped into one arc_mode_class mode.  */
1745
1746enum arc_mode_class {
1747  C_MODE,
1748  S_MODE, D_MODE, T_MODE, O_MODE,
1749  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
1750  V_MODE
1751};
1752
1753/* Modes for condition codes.  */
1754#define C_MODES (1 << (int) C_MODE)
1755
1756/* Modes for single-word and smaller quantities.  */
1757#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
1758
1759/* Modes for double-word and smaller quantities.  */
1760#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
1761
1762/* Mode for 8-byte DF values only.  */
1763#define DF_MODES (1 << DF_MODE)
1764
1765/* Modes for quad-word and smaller quantities.  */
1766#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
1767
1768/* Modes for 128-bit vectors.  */
1769#define V_MODES (1 << (int) V_MODE)
1770
1771/* Value is 1 if register/mode pair is acceptable on arc.  */
1772
1773static unsigned int arc_hard_regno_modes[] = {
1774  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1775  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1776  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES,
1777  D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1778
1779  /* ??? Leave these as S_MODES for now.  */
1780  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1781  DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES,
1782  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1783  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES,
1784
1785  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1786  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1787  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1788  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1789
1790  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1791  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1792  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1793  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1794
1795  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1796  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1797  S_MODES, S_MODES
1798};
1799
1800static unsigned int arc_mode_class [NUM_MACHINE_MODES];
1801
1802enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
1803
1804enum reg_class
1805arc_preferred_reload_class (rtx, enum reg_class cl)
1806{
1807  return cl;
1808}
1809
1810/* Initialize the arc_mode_class array.  */
1811
1812static void
1813arc_init_reg_tables (void)
1814{
1815  int i;
1816
1817  for (i = 0; i < NUM_MACHINE_MODES; i++)
1818    {
1819      machine_mode m = (machine_mode) i;
1820
1821      switch (GET_MODE_CLASS (m))
1822	{
1823	case MODE_INT:
1824	case MODE_PARTIAL_INT:
1825	case MODE_COMPLEX_INT:
1826	  if (GET_MODE_SIZE (m) <= 4)
1827	    arc_mode_class[i] = 1 << (int) S_MODE;
1828	  else if (GET_MODE_SIZE (m) == 8)
1829	    arc_mode_class[i] = 1 << (int) D_MODE;
1830	  else if (GET_MODE_SIZE (m) == 16)
1831	    arc_mode_class[i] = 1 << (int) T_MODE;
1832	  else if (GET_MODE_SIZE (m) == 32)
1833	    arc_mode_class[i] = 1 << (int) O_MODE;
1834	  else
1835	    arc_mode_class[i] = 0;
1836	  break;
1837	case MODE_FLOAT:
1838	case MODE_COMPLEX_FLOAT:
1839	  if (GET_MODE_SIZE (m) <= 4)
1840	    arc_mode_class[i] = 1 << (int) SF_MODE;
1841	  else if (GET_MODE_SIZE (m) == 8)
1842	    arc_mode_class[i] = 1 << (int) DF_MODE;
1843	  else if (GET_MODE_SIZE (m) == 16)
1844	    arc_mode_class[i] = 1 << (int) TF_MODE;
1845	  else if (GET_MODE_SIZE (m) == 32)
1846	    arc_mode_class[i] = 1 << (int) OF_MODE;
1847	  else
1848	    arc_mode_class[i] = 0;
1849	  break;
1850	case MODE_VECTOR_INT:
1851	  if (GET_MODE_SIZE (m) == 4)
1852	    arc_mode_class[i] = (1 << (int) S_MODE);
1853	  else if (GET_MODE_SIZE (m) == 8)
1854	    arc_mode_class[i] = (1 << (int) D_MODE);
1855	  else
1856	    arc_mode_class[i] = (1 << (int) V_MODE);
1857	  break;
1858	case MODE_CC:
1859	default:
1860	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
1861	     we must explicitly check for them here.  */
1862	  if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode
1863	      || i == (int) CC_Cmode
1864	      || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode
1865	      || i == CC_FPUmode || i == CC_FPUEmode || i == CC_FPU_UNEQmode)
1866	    arc_mode_class[i] = 1 << (int) C_MODE;
1867	  else
1868	    arc_mode_class[i] = 0;
1869	  break;
1870	}
1871    }
1872}
1873
1874/* Core registers 56..59 are used for multiply extension options.
1875   The dsp option uses r56 and r57, these are then named acc1 and acc2.
1876   acc1 is the highpart, and acc2 the lowpart, so which register gets which
1877   number depends on endianness.
1878   The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi.
1879   Because mlo / mhi form a 64 bit value, we use different gcc internal
1880   register numbers to make them form a register pair as the gcc internals
1881   know it.  mmid gets number 57, if still available, and mlo / mhi get
1882   number 58 and 59, depending on endianness.  We use DBX_REGISTER_NUMBER
1883   to map this back.  */
1884  char rname56[5] = "r56";
1885  char rname57[5] = "r57";
1886  char rname58[5] = "r58";
1887  char rname59[5] = "r59";
1888  char rname29[7] = "ilink1";
1889  char rname30[7] = "ilink2";
1890
1891static void
1892arc_conditional_register_usage (void)
1893{
1894  int regno;
1895  int i;
1896  int fix_start = 60, fix_end = 55;
1897
1898  if (TARGET_V2)
1899    {
1900      /* For ARCv2 the core register set is changed.  */
1901      strcpy (rname29, "ilink");
1902      strcpy (rname30, "r30");
1903
1904      if (!TEST_HARD_REG_BIT (overrideregs, R30_REG))
1905	{
1906	  /* No user interference.  Set the r30 to be used by the
1907	     compiler.  */
1908	  call_used_regs[R30_REG] = 1;
1909	  fixed_regs[R30_REG] = 0;
1910
1911	  arc_regno_reg_class[R30_REG] = GENERAL_REGS;
1912	}
1913   }
1914
1915  if (TARGET_MUL64_SET)
1916    {
1917      fix_start = R57_REG;
1918      fix_end = R59_REG;
1919
1920      /* We don't provide a name for mmed.  In rtl / assembly resource lists,
1921	 you are supposed to refer to it as mlo & mhi, e.g
1922	 (zero_extract:SI (reg:DI 58) (const_int 32) (16)) .
1923	 In an actual asm instruction, you are of course use mmed.
1924	 The point of avoiding having a separate register for mmed is that
1925	 this way, we don't have to carry clobbers of that reg around in every
1926	 isntruction that modifies mlo and/or mhi.  */
1927      strcpy (rname57, "");
1928      strcpy (rname58, "mlo");
1929      strcpy (rname59, "mhi");
1930    }
1931
1932  /* The nature of arc_tp_regno is actually something more like a global
1933     register, however globalize_reg requires a declaration.
1934     We use EPILOGUE_USES to compensate so that sets from
1935     __builtin_set_frame_pointer are not deleted.  */
1936  if (arc_tp_regno != -1)
1937    fixed_regs[arc_tp_regno] = call_used_regs[arc_tp_regno] = 1;
1938
1939  if (TARGET_MULMAC_32BY16_SET)
1940    {
1941      fix_start = MUL32x16_REG;
1942      fix_end = fix_end > R57_REG ? fix_end : R57_REG;
1943      strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2");
1944      strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1");
1945    }
1946  for (regno = fix_start; regno <= fix_end; regno++)
1947    {
1948      if (!fixed_regs[regno])
1949	warning (0, "multiply option implies r%d is fixed", regno);
1950      fixed_regs [regno] = call_used_regs[regno] = 1;
1951    }
1952
1953  /* Reduced configuration: don't use r4-r9, r16-r25.  */
1954  if (TARGET_RF16)
1955    {
1956      for (i = R4_REG; i <= R9_REG; i++)
1957	fixed_regs[i] = call_used_regs[i] = 1;
1958      for (i = R16_REG; i <= R25_REG; i++)
1959	fixed_regs[i] = call_used_regs[i] = 1;
1960    }
1961
1962  /* ARCHS has 64-bit data-path which makes use of the even-odd paired
1963     registers.  */
1964  if (TARGET_HS)
1965    for (regno = R1_REG; regno < R32_REG; regno +=2)
1966      arc_hard_regno_modes[regno] = S_MODES;
1967
1968  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1969    if (i < ILINK1_REG)
1970      {
1971	if ((i <= R3_REG) || ((i >= R12_REG) && (i <= R15_REG)))
1972	  arc_regno_reg_class[i] = ARCOMPACT16_REGS;
1973	else
1974	  arc_regno_reg_class[i] = GENERAL_REGS;
1975      }
1976    else if (i < LP_COUNT)
1977      arc_regno_reg_class[i] = GENERAL_REGS;
1978    else
1979      arc_regno_reg_class[i] = NO_REGS;
1980
1981  /* Handle Special Registers.  */
1982  arc_regno_reg_class[CC_REG] = NO_REGS;      /* CC_REG: must be NO_REGS.  */
1983  arc_regno_reg_class[FRAME_POINTER_REGNUM] = GENERAL_REGS;
1984  arc_regno_reg_class[ARG_POINTER_REGNUM] = GENERAL_REGS;
1985
1986  if (TARGET_DPFP)
1987    for (i = R40_REG; i < R44_REG; ++i)
1988      {
1989	arc_regno_reg_class[i] = DOUBLE_REGS;
1990	if (!TARGET_ARGONAUT_SET)
1991	  CLEAR_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i);
1992      }
1993  else
1994    {
1995      /* Disable all DOUBLE_REGISTER settings, if not generating DPFP
1996	 code.  */
1997      arc_regno_reg_class[R40_REG] = ALL_REGS;
1998      arc_regno_reg_class[R41_REG] = ALL_REGS;
1999      arc_regno_reg_class[R42_REG] = ALL_REGS;
2000      arc_regno_reg_class[R43_REG] = ALL_REGS;
2001
2002      fixed_regs[R40_REG] = 1;
2003      fixed_regs[R41_REG] = 1;
2004      fixed_regs[R42_REG] = 1;
2005      fixed_regs[R43_REG] = 1;
2006
2007      arc_hard_regno_modes[R40_REG] = 0;
2008      arc_hard_regno_modes[R42_REG] = 0;
2009    }
2010
2011  if (TARGET_SIMD_SET)
2012    {
2013      gcc_assert (ARC_FIRST_SIMD_VR_REG == 64);
2014      gcc_assert (ARC_LAST_SIMD_VR_REG  == 127);
2015
2016      for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
2017	arc_regno_reg_class [i] =  SIMD_VR_REGS;
2018
2019      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128);
2020      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128);
2021      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136);
2022      gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG  == 143);
2023
2024      for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
2025	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
2026	arc_regno_reg_class [i] =  SIMD_DMA_CONFIG_REGS;
2027    }
2028
2029  /* pc : r63 */
2030  arc_regno_reg_class[PCL_REG] = NO_REGS;
2031
2032  /*ARCV2 Accumulator.  */
2033  if ((TARGET_V2
2034       && (TARGET_FP_DP_FUSED || TARGET_FP_SP_FUSED))
2035      || TARGET_PLUS_DMPY)
2036  {
2037    arc_regno_reg_class[ACCL_REGNO] = GENERAL_REGS;
2038    arc_regno_reg_class[ACCH_REGNO] = GENERAL_REGS;
2039
2040    /* Allow the compiler to freely use them.  */
2041    if (!TEST_HARD_REG_BIT (overrideregs, ACCL_REGNO))
2042      fixed_regs[ACCL_REGNO] = 0;
2043    if (!TEST_HARD_REG_BIT (overrideregs, ACCH_REGNO))
2044      fixed_regs[ACCH_REGNO] = 0;
2045
2046    if (!fixed_regs[ACCH_REGNO] && !fixed_regs[ACCL_REGNO])
2047      arc_hard_regno_modes[ACC_REG_FIRST] = D_MODES;
2048  }
2049}
2050
2051/* Implement TARGET_HARD_REGNO_NREGS.  */
2052
2053static unsigned int
2054arc_hard_regno_nregs (unsigned int regno, machine_mode mode)
2055{
2056  if (GET_MODE_SIZE (mode) == 16
2057      && regno >= ARC_FIRST_SIMD_VR_REG
2058      && regno <= ARC_LAST_SIMD_VR_REG)
2059    return 1;
2060
2061  return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
2062}
2063
2064/* Implement TARGET_HARD_REGNO_MODE_OK.  */
2065
2066static bool
2067arc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2068{
2069  return (arc_hard_regno_modes[regno] & arc_mode_class[mode]) != 0;
2070}
2071
2072/* Implement TARGET_MODES_TIEABLE_P.  Tie QI/HI/SI modes together.  */
2073
2074static bool
2075arc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2076{
2077  return (GET_MODE_CLASS (mode1) == MODE_INT
2078	  && GET_MODE_CLASS (mode2) == MODE_INT
2079	  && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD
2080	  && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD);
2081}
2082
2083/* Handle an "interrupt" attribute; arguments as in
2084   struct attribute_spec.handler.  */
2085
2086static tree
2087arc_handle_interrupt_attribute (tree *, tree name, tree args, int,
2088				bool *no_add_attrs)
2089{
2090  gcc_assert (args);
2091
2092  tree value = TREE_VALUE (args);
2093
2094  if (TREE_CODE (value) != STRING_CST)
2095    {
2096      warning (OPT_Wattributes,
2097	       "argument of %qE attribute is not a string constant",
2098	       name);
2099      *no_add_attrs = true;
2100    }
2101  else if (!TARGET_V2
2102	   && strcmp (TREE_STRING_POINTER (value), "ilink1")
2103	   && strcmp (TREE_STRING_POINTER (value), "ilink2"))
2104    {
2105      warning (OPT_Wattributes,
2106	       "argument of %qE attribute is not \"ilink1\" or \"ilink2\"",
2107	       name);
2108      *no_add_attrs = true;
2109    }
2110  else if (TARGET_V2
2111	   && strcmp (TREE_STRING_POINTER (value), "ilink")
2112	   && strcmp (TREE_STRING_POINTER (value), "firq"))
2113    {
2114      warning (OPT_Wattributes,
2115	       "argument of %qE attribute is not \"ilink\" or \"firq\"",
2116	       name);
2117      *no_add_attrs = true;
2118    }
2119
2120  return NULL_TREE;
2121}
2122
2123static tree
2124arc_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
2125			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
2126{
2127  if (TREE_CODE (*node) != FUNCTION_DECL)
2128    {
2129      warning (OPT_Wattributes, "%qE attribute only applies to functions",
2130	       name);
2131      *no_add_attrs = true;
2132    }
2133
2134  return NULL_TREE;
2135}
2136
2137/* Type of function DECL.
2138
2139   The result is cached.  To reset the cache at the end of a function,
2140   call with DECL = NULL_TREE.  */
2141
2142static unsigned int
2143arc_compute_function_type (struct function *fun)
2144{
2145  tree attr, decl = fun->decl;
2146  unsigned int fn_type = fun->machine->fn_type;
2147
2148  if (fn_type != ARC_FUNCTION_UNKNOWN)
2149    return fn_type;
2150
2151  /* Check if it is a naked function.  */
2152  if (lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) != NULL_TREE)
2153    fn_type |= ARC_FUNCTION_NAKED;
2154  else
2155    fn_type |= ARC_FUNCTION_NORMAL;
2156
2157  /* Now see if this is an interrupt handler.  */
2158  attr = lookup_attribute ("interrupt", DECL_ATTRIBUTES (decl));
2159  if (attr != NULL_TREE)
2160    {
2161      tree value, args = TREE_VALUE (attr);
2162
2163      gcc_assert (list_length (args) == 1);
2164      value = TREE_VALUE (args);
2165      gcc_assert (TREE_CODE (value) == STRING_CST);
2166
2167      if (!strcmp (TREE_STRING_POINTER (value), "ilink1")
2168	  || !strcmp (TREE_STRING_POINTER (value), "ilink"))
2169	fn_type |= ARC_FUNCTION_ILINK1;
2170      else if (!strcmp (TREE_STRING_POINTER (value), "ilink2"))
2171	fn_type |= ARC_FUNCTION_ILINK2;
2172      else if (!strcmp (TREE_STRING_POINTER (value), "firq"))
2173	fn_type |= ARC_FUNCTION_FIRQ;
2174      else
2175	gcc_unreachable ();
2176    }
2177
2178  return fun->machine->fn_type = fn_type;
2179}
2180
2181/* Implement `TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS' */
2182
2183static bool
2184arc_allocate_stack_slots_for_args (void)
2185{
2186  /* Naked functions should not allocate stack slots for arguments.  */
2187  unsigned int fn_type = arc_compute_function_type (cfun);
2188
2189  return !ARC_NAKED_P(fn_type);
2190}
2191
2192/* Implement `TARGET_WARN_FUNC_RETURN'.  */
2193
2194static bool
2195arc_warn_func_return (tree decl)
2196{
2197  struct function *func = DECL_STRUCT_FUNCTION (decl);
2198  unsigned int fn_type = arc_compute_function_type (func);
2199
2200  return !ARC_NAKED_P (fn_type);
2201}
2202
2203/* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible,
2204   and two if they are nearly compatible (which causes a warning to be
2205   generated).  */
2206
2207static int
2208arc_comp_type_attributes (const_tree type1,
2209			  const_tree type2)
2210{
2211  int l1, l2, m1, m2, s1, s2;
2212
2213  /* Check for mismatch of non-default calling convention.  */
2214  if (TREE_CODE (type1) != FUNCTION_TYPE)
2215    return 1;
2216
2217  /* Check for mismatched call attributes.  */
2218  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
2219  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
2220  m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL;
2221  m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL;
2222  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
2223  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
2224
2225  /* Only bother to check if an attribute is defined.  */
2226  if (l1 | l2 | m1 | m2 | s1 | s2)
2227    {
2228      /* If one type has an attribute, the other must have the same attribute.  */
2229      if ((l1 != l2) || (m1 != m2) || (s1 != s2))
2230	return 0;
2231
2232      /* Disallow mixed attributes.  */
2233      if (l1 + m1 + s1 > 1)
2234	return 0;
2235    }
2236
2237
2238  return 1;
2239}
2240
2241/* Misc. utilities.  */
2242
2243/* X and Y are two things to compare using CODE.  Emit the compare insn and
2244   return the rtx for the cc reg in the proper mode.  */
2245
2246rtx
2247gen_compare_reg (rtx comparison, machine_mode omode)
2248{
2249  enum rtx_code code = GET_CODE (comparison);
2250  rtx x = XEXP (comparison, 0);
2251  rtx y = XEXP (comparison, 1);
2252  rtx tmp, cc_reg;
2253  machine_mode mode, cmode;
2254
2255
2256  cmode = GET_MODE (x);
2257  if (cmode == VOIDmode)
2258    cmode = GET_MODE (y);
2259
2260  /* If ifcvt passed us a MODE_CC comparison we can
2261     just return it.  It should be in the proper form already.   */
2262  if (GET_MODE_CLASS (cmode) == MODE_CC)
2263    return comparison;
2264
2265  if (cmode != SImode && cmode != SFmode && cmode != DFmode)
2266    return NULL_RTX;
2267  if (cmode == SImode)
2268    {
2269      if (!register_operand (x, SImode))
2270	{
2271	  if (register_operand (y, SImode))
2272	    {
2273	      tmp = x;
2274	      x = y;
2275	      y = tmp;
2276	      code = swap_condition (code);
2277	    }
2278	  else
2279	    x = copy_to_mode_reg (SImode, x);
2280	}
2281      if (GET_CODE (y) == SYMBOL_REF && flag_pic)
2282	y = copy_to_mode_reg (SImode, y);
2283    }
2284  else
2285    {
2286      x = force_reg (cmode, x);
2287      y = force_reg (cmode, y);
2288    }
2289  mode = SELECT_CC_MODE (code, x, y);
2290
2291  cc_reg = gen_rtx_REG (mode, CC_REG);
2292
2293  /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and
2294     cmpdfpx_raw, is not a correct comparison for floats:
2295        http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
2296   */
2297  if (TARGET_ARGONAUT_SET
2298      && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP)))
2299    {
2300      switch (code)
2301	{
2302	case NE: case EQ: case LT: case UNGE: case LE: case UNGT:
2303	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
2304	  break;
2305	case GT: case UNLE: case GE: case UNLT:
2306	  code = swap_condition (code);
2307	  tmp = x;
2308	  x = y;
2309	  y = tmp;
2310	  break;
2311	default:
2312	  gcc_unreachable ();
2313	}
2314      if (cmode == SFmode)
2315      {
2316	emit_insn (gen_cmpsfpx_raw (x, y));
2317      }
2318      else /* DFmode */
2319      {
2320	/* Accepts Dx regs directly by insns.  */
2321	emit_insn (gen_cmpdfpx_raw (x, y));
2322      }
2323
2324      if (mode != CC_FPXmode)
2325	emit_insn (gen_rtx_SET (cc_reg,
2326				gen_rtx_COMPARE (mode,
2327						 gen_rtx_REG (CC_FPXmode, 61),
2328						 const0_rtx)));
2329    }
2330  else if (TARGET_FPX_QUARK && (cmode == SFmode))
2331    {
2332      switch (code)
2333	{
2334	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
2335	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
2336	  break;
2337	case LT: case UNGE: case LE: case UNGT:
2338	  code = swap_condition (code);
2339	  tmp = x;
2340	  x = y;
2341	  y = tmp;
2342	  break;
2343	default:
2344	  gcc_unreachable ();
2345	}
2346
2347      emit_insn (gen_cmp_quark (cc_reg,
2348				gen_rtx_COMPARE (mode, x, y)));
2349    }
2350  else if (TARGET_HARD_FLOAT
2351	   && ((cmode == SFmode && TARGET_FP_SP_BASE)
2352	       || (cmode == DFmode && TARGET_FP_DP_BASE)))
2353    emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2354  else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE)
2355    {
2356      rtx op0 = gen_rtx_REG (cmode, 0);
2357      rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD);
2358      bool swap = false;
2359
2360      switch (code)
2361	{
2362	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
2363	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
2364	  break;
2365	case LT: case UNGE: case LE: case UNGT:
2366	  code = swap_condition (code);
2367	  swap = true;
2368	  break;
2369	default:
2370	  gcc_unreachable ();
2371	}
2372      if (currently_expanding_to_rtl)
2373	{
2374	  if (swap)
2375	    {
2376	      tmp = x;
2377	      x = y;
2378	      y = tmp;
2379	    }
2380	  emit_move_insn (op0, x);
2381	  emit_move_insn (op1, y);
2382	}
2383      else
2384	{
2385	  gcc_assert (rtx_equal_p (op0, x));
2386	  gcc_assert (rtx_equal_p (op1, y));
2387	  if (swap)
2388	    {
2389	      op0 = y;
2390	      op1 = x;
2391	    }
2392	}
2393      emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1)));
2394    }
2395  else
2396    emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2397  return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx);
2398}
2399
2400/* Return true if VALUE, a const_double, will fit in a limm (4 byte number).
2401   We assume the value can be either signed or unsigned.  */
2402
2403bool
2404arc_double_limm_p (rtx value)
2405{
2406  HOST_WIDE_INT low, high;
2407
2408  gcc_assert (GET_CODE (value) == CONST_DOUBLE);
2409
2410  if (TARGET_DPFP)
2411    return true;
2412
2413  low = CONST_DOUBLE_LOW (value);
2414  high = CONST_DOUBLE_HIGH (value);
2415
2416  if (low & 0x80000000)
2417    {
2418      return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0)
2419	      || (((low & - (unsigned HOST_WIDE_INT) 0x80000000)
2420		   == - (unsigned HOST_WIDE_INT) 0x80000000)
2421		  && high == -1));
2422    }
2423  else
2424    {
2425      return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0;
2426    }
2427}
2428
2429/* Do any needed setup for a variadic function.  For the ARC, we must
2430   create a register parameter block, and then copy any anonymous arguments
2431   in registers to memory.
2432
2433   CUM has not been updated for the last named argument (which is given
2434   by ARG), and we rely on this fact.  */
2435
2436static void
2437arc_setup_incoming_varargs (cumulative_args_t args_so_far,
2438			    const function_arg_info &arg,
2439			    int *pretend_size, int no_rtl)
2440{
2441  int first_anon_arg;
2442  CUMULATIVE_ARGS next_cum;
2443
2444  /* We must treat `__builtin_va_alist' as an anonymous arg.  */
2445
2446  next_cum = *get_cumulative_args (args_so_far);
2447  arc_function_arg_advance (pack_cumulative_args (&next_cum), arg);
2448  first_anon_arg = next_cum;
2449
2450  if (FUNCTION_ARG_REGNO_P (first_anon_arg))
2451    {
2452      /* First anonymous (unnamed) argument is in a reg.  */
2453
2454      /* Note that first_reg_offset < MAX_ARC_PARM_REGS.  */
2455      int first_reg_offset = first_anon_arg;
2456
2457      if (!no_rtl)
2458	{
2459	  rtx regblock
2460	    = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx,
2461			   FIRST_PARM_OFFSET (0)));
2462	  move_block_from_reg (first_reg_offset, regblock,
2463			       MAX_ARC_PARM_REGS - first_reg_offset);
2464	}
2465
2466      *pretend_size
2467	= ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD);
2468    }
2469}
2470
2471/* Cost functions.  */
2472
2473/* Provide the costs of an addressing mode that contains ADDR.
2474   If ADDR is not a valid address, its cost is irrelevant.  */
2475
2476static int
2477arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed)
2478{
2479  switch (GET_CODE (addr))
2480    {
2481    case REG :
2482      return speed || satisfies_constraint_Rcq (addr) ? 0 : 1;
2483    case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
2484    case PRE_MODIFY: case POST_MODIFY:
2485      return !speed;
2486
2487    case LABEL_REF :
2488    case SYMBOL_REF :
2489    case CONST :
2490      if (TARGET_NPS_CMEM && cmem_address (addr, SImode))
2491	return 0;
2492      /* Most likely needs a LIMM.  */
2493      return COSTS_N_INSNS (1);
2494
2495    case PLUS :
2496      {
2497	rtx plus0 = XEXP (addr, 0);
2498	rtx plus1 = XEXP (addr, 1);
2499
2500	if (GET_CODE (plus0) != REG
2501	    && (GET_CODE (plus0) != MULT
2502		|| !CONST_INT_P (XEXP (plus0, 1))
2503		|| (INTVAL (XEXP (plus0, 1)) != 2
2504		    && INTVAL (XEXP (plus0, 1)) != 4)))
2505	  break;
2506
2507	switch (GET_CODE (plus1))
2508	  {
2509	  case CONST_INT :
2510	    return (!RTX_OK_FOR_OFFSET_P (SImode, plus1)
2511		    ? COSTS_N_INSNS (1)
2512		    : speed
2513		    ? 0
2514		    : (satisfies_constraint_Rcq (plus0)
2515		       && satisfies_constraint_O (plus1))
2516		    ? 0
2517		    : 1);
2518	  case REG:
2519	    return (speed < 1 ? 0
2520		    : (satisfies_constraint_Rcq (plus0)
2521		       && satisfies_constraint_Rcq (plus1))
2522		    ? 0 : 1);
2523	  case CONST :
2524	  case SYMBOL_REF :
2525	  case LABEL_REF :
2526	    return COSTS_N_INSNS (1);
2527	  default:
2528	    break;
2529	  }
2530	break;
2531      }
2532    default:
2533      break;
2534    }
2535
2536  return 4;
2537}
2538
2539/* Emit instruction X with the frame related bit set.  */
2540
2541static rtx
2542frame_insn (rtx x)
2543{
2544  x = emit_insn (x);
2545  RTX_FRAME_RELATED_P (x) = 1;
2546  return x;
2547}
2548
2549/* Emit a frame insn to move SRC to DST.  */
2550
2551static rtx
2552frame_move (rtx dst, rtx src)
2553{
2554  rtx tmp = gen_rtx_SET (dst, src);
2555  RTX_FRAME_RELATED_P (tmp) = 1;
2556  return frame_insn (tmp);
2557}
2558
2559/* Like frame_move, but add a REG_INC note for REG if ADDR contains an
2560   auto increment address, or is zero.  */
2561
2562static rtx
2563frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr)
2564{
2565  rtx insn = frame_move (dst, src);
2566
2567  if (!addr
2568      || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC
2569      || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
2570    add_reg_note (insn, REG_INC, reg);
2571  return insn;
2572}
2573
2574/* Emit a frame insn which adjusts a frame address register REG by OFFSET.  */
2575
2576static rtx
2577frame_add (rtx reg, HOST_WIDE_INT offset)
2578{
2579  gcc_assert ((offset & 0x3) == 0);
2580  if (!offset)
2581    return NULL_RTX;
2582  return frame_move (reg, plus_constant (Pmode, reg, offset));
2583}
2584
2585/* Emit a frame insn which adjusts stack pointer by OFFSET.  */
2586
2587static rtx
2588frame_stack_add (HOST_WIDE_INT offset)
2589{
2590  return frame_add (stack_pointer_rtx, offset);
2591}
2592
2593/* Helper function to wrap FRAME_POINTER_NEEDED.  We do this as
2594   FRAME_POINTER_NEEDED will not be true until the IRA (Integrated
2595   Register Allocator) pass, while we want to get the frame size
2596   correct earlier than the IRA pass.
2597
2598   When a function uses eh_return we must ensure that the fp register
2599   is saved and then restored so that the unwinder can restore the
2600   correct value for the frame we are going to jump to.
2601
2602   To do this we force all frames that call eh_return to require a
2603   frame pointer (see arc_frame_pointer_required), this
2604   will ensure that the previous frame pointer is stored on entry to
2605   the function, and will then be reloaded at function exit.
2606
2607   As the frame pointer is handled as a special case in our prologue
2608   and epilogue code it must not be saved and restored using the
2609   MUST_SAVE_REGISTER mechanism otherwise we run into issues where GCC
2610   believes that the function is not using a frame pointer and that
2611   the value in the fp register is the frame pointer, while the
2612   prologue and epilogue are busy saving and restoring the fp
2613   register.
2614
2615   During compilation of a function the frame size is evaluated
2616   multiple times, it is not until the reload pass is complete the
2617   frame size is considered fixed (it is at this point that space for
2618   all spills has been allocated).  However the frame_pointer_needed
2619   variable is not set true until the register allocation pass, as a
2620   result in the early stages the frame size does not include space
2621   for the frame pointer to be spilled.
2622
2623   The problem that this causes is that the rtl generated for
2624   EH_RETURN_HANDLER_RTX uses the details of the frame size to compute
2625   the offset from the frame pointer at which the return address
2626   lives.  However, in early passes GCC has not yet realised we need a
2627   frame pointer, and so has not included space for the frame pointer
2628   in the frame size, and so gets the offset of the return address
2629   wrong.  This should not be an issue as in later passes GCC has
2630   realised that the frame pointer needs to be spilled, and has
2631   increased the frame size.  However, the rtl for the
2632   EH_RETURN_HANDLER_RTX is not regenerated to use the newer, larger
2633   offset, and the wrong smaller offset is used.  */
2634
2635static bool
2636arc_frame_pointer_needed (void)
2637{
2638  return (frame_pointer_needed || crtl->calls_eh_return);
2639}
2640
2641/* Tell prologue and epilogue if register REGNO should be saved /
2642   restored.  The SPECIAL_P is true when the register may need special
2643   ld/st sequence.  The return address, and stack pointer are treated
2644   separately.  Don't consider them here.  */
2645
2646static bool
2647arc_must_save_register (int regno, struct function *func, bool special_p)
2648{
2649  unsigned int fn_type = arc_compute_function_type (func);
2650  bool irq_auto_save_p = ((irq_ctrl_saved.irq_save_last_reg >= regno)
2651			  && ARC_AUTO_IRQ_P (fn_type));
2652  bool firq_auto_save_p = ARC_FAST_INTERRUPT_P (fn_type);
2653
2654  switch (rgf_banked_register_count)
2655    {
2656    case 4:
2657      firq_auto_save_p &= (regno < 4);
2658      break;
2659    case 8:
2660      firq_auto_save_p &= ((regno < 4) || ((regno > 11) && (regno < 16)));
2661      break;
2662    case 16:
2663      firq_auto_save_p &= ((regno < 4) || ((regno > 9) && (regno < 16))
2664			   || ((regno > 25) && (regno < 29))
2665			   || ((regno > 29) && (regno < 32)));
2666      break;
2667    case 32:
2668      firq_auto_save_p &= (regno != 29) && (regno < 32);
2669      break;
2670    default:
2671      firq_auto_save_p = false;
2672      break;
2673    }
2674
2675  switch (regno)
2676    {
2677    case ILINK1_REG:
2678    case RETURN_ADDR_REGNUM:
2679    case STACK_POINTER_REGNUM:
2680      /* The stack pointer and the return address are handled
2681	 separately.  */
2682      return false;
2683
2684    case R30_REG:
2685      /* r30 is either used as ilink2 by ARCv1 or as a free register
2686	 by ARCv2.  */
2687      if (!TARGET_V2)
2688	return false;
2689      break;
2690
2691    case R40_REG:
2692    case R41_REG:
2693    case R42_REG:
2694    case R43_REG:
2695    case R44_REG:
2696      /* If those ones are used by the FPX machinery, we handle them
2697	 separately.  */
2698      if (TARGET_DPFP && !special_p)
2699	return false;
2700      /* FALLTHRU.  */
2701
2702    case R32_REG:
2703    case R33_REG:
2704    case R34_REG:
2705    case R35_REG:
2706    case R36_REG:
2707    case R37_REG:
2708    case R38_REG:
2709    case R39_REG:
2710    case R45_REG:
2711    case R46_REG:
2712    case R47_REG:
2713    case R48_REG:
2714    case R49_REG:
2715    case R50_REG:
2716    case R51_REG:
2717    case R52_REG:
2718    case R53_REG:
2719    case R54_REG:
2720    case R55_REG:
2721    case R56_REG:
2722    case R57_REG:
2723      /* The Extension Registers.  */
2724      if (ARC_INTERRUPT_P (fn_type)
2725	  && (df_regs_ever_live_p (RETURN_ADDR_REGNUM)
2726	      || df_regs_ever_live_p (regno))
2727	  /* Not all extension registers are available, choose the
2728	     real ones.  */
2729	  && !fixed_regs[regno])
2730	return true;
2731      return false;
2732
2733    case R58_REG:
2734    case R59_REG:
2735      /* ARC600 specifies those ones as mlo/mhi registers, otherwise
2736	 just handle them like any other extension register.  */
2737      if (ARC_INTERRUPT_P (fn_type)
2738	  && (df_regs_ever_live_p (RETURN_ADDR_REGNUM)
2739	      || df_regs_ever_live_p (regno))
2740	  /* Not all extension registers are available, choose the
2741	     real ones.  */
2742	  && ((!fixed_regs[regno] && !special_p)
2743	      || (TARGET_MUL64_SET && special_p)))
2744	return true;
2745      return false;
2746
2747    case 61:
2748    case 62:
2749    case 63:
2750      /* Fixed/control register, nothing to do.  LP_COUNT is
2751	 different.  */
2752      return false;
2753
2754    case HARD_FRAME_POINTER_REGNUM:
2755      /* If we need FP reg as a frame pointer then don't save it as a
2756	 regular reg.  */
2757      if (arc_frame_pointer_needed ())
2758	return false;
2759      break;
2760
2761    default:
2762      break;
2763    }
2764
2765  if (((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
2766       /* In an interrupt save everything.  */
2767       || (ARC_INTERRUPT_P (fn_type)
2768	   && (df_regs_ever_live_p (RETURN_ADDR_REGNUM)
2769	       || df_regs_ever_live_p (regno))))
2770      /* Do not emit code for auto saved regs.  */
2771      && !irq_auto_save_p
2772      && !firq_auto_save_p)
2773    return true;
2774  return false;
2775}
2776
2777/* Return true if the return address must be saved in the current function,
2778   otherwise return false.  */
2779
2780static bool
2781arc_must_save_return_addr (struct function *func)
2782{
2783  if (func->machine->frame_info.save_return_addr)
2784    return true;
2785
2786  return false;
2787}
2788
2789/* Return non-zero if there are registers to be saved or loaded using
2790   millicode thunks.  We can only use consecutive sequences starting
2791   with r13, and not going beyond r25.
2792   GMASK is a bitmask of registers to save.  This function sets
2793   FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range
2794   of registers to be saved / restored with a millicode call.  */
2795
2796static int
2797arc_compute_millicode_save_restore_regs (uint64_t gmask,
2798					 struct arc_frame_info *frame)
2799{
2800  int regno;
2801
2802  int start_reg = 13, end_reg = 25;
2803
2804  for (regno = start_reg; regno <= end_reg && (gmask & (1ULL << regno));)
2805    regno++;
2806  end_reg = regno - 1;
2807  /* There is no point in using millicode thunks if we don't save/restore
2808     at least three registers.  For non-leaf functions we also have the
2809     blink restore.  */
2810  if (regno - start_reg >= 3 - (crtl->is_leaf == 0))
2811    {
2812      frame->millicode_start_reg = 13;
2813      frame->millicode_end_reg = regno - 1;
2814      return 1;
2815    }
2816  return 0;
2817}
2818
2819/* Return the bytes needed to compute the frame pointer from the
2820   current stack pointer.  */
2821
2822static unsigned int
2823arc_compute_frame_size (void)
2824{
2825  int regno;
2826  unsigned int total_size, var_size, args_size, pretend_size, extra_size;
2827  unsigned int reg_size;
2828  uint64_t gmask;
2829  struct arc_frame_info *frame_info;
2830  int size;
2831  unsigned int extra_plus_reg_size;
2832  unsigned int extra_plus_reg_size_aligned;
2833  unsigned int fn_type = arc_compute_function_type (cfun);
2834
2835  /* The answer might already be known.  */
2836  if (cfun->machine->frame_info.initialized)
2837    return cfun->machine->frame_info.total_size;
2838
2839  frame_info = &cfun->machine->frame_info;
2840  size = ARC_STACK_ALIGN (get_frame_size ());
2841
2842  /* 1) Size of locals and temporaries.  */
2843  var_size	= size;
2844
2845  /* 2) Size of outgoing arguments.  */
2846  args_size	= crtl->outgoing_args_size;
2847
2848  /* 3) Calculate space needed for saved registers.
2849     ??? We ignore the extension registers for now.  */
2850
2851  /* See if this is an interrupt handler.  Call used registers must be saved
2852     for them too.  */
2853
2854  reg_size = 0;
2855  gmask = 0;
2856
2857  /* The last 4 regs are special, avoid them.  */
2858  for (regno = 0; regno <= (GMASK_LEN - 4); regno++)
2859    {
2860      if (arc_must_save_register (regno, cfun, false))
2861	{
2862	  reg_size += UNITS_PER_WORD;
2863	  gmask |= 1ULL << regno;
2864	}
2865    }
2866
2867  /* In a frame that calls __builtin_eh_return two data registers are
2868     used to pass values back to the exception handler.
2869
2870     Ensure that these registers are spilled to the stack so that the
2871     exception throw code can find them, and update the saved values.
2872     The handling code will then consume these reloaded values to
2873     handle the exception.  */
2874  if (crtl->calls_eh_return)
2875    for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
2876      {
2877	reg_size += UNITS_PER_WORD;
2878	gmask |= 1ULL << regno;
2879      }
2880
2881  /* Check if we need to save the return address.  */
2882  frame_info->save_return_addr = (!crtl->is_leaf
2883				  || df_regs_ever_live_p (RETURN_ADDR_REGNUM)
2884				  || crtl->calls_eh_return);
2885
2886  /* Saving blink reg for millicode thunk calls.  */
2887  if (TARGET_MILLICODE_THUNK_SET
2888      && !ARC_INTERRUPT_P (fn_type)
2889      && !crtl->calls_eh_return)
2890    {
2891      if (arc_compute_millicode_save_restore_regs (gmask, frame_info))
2892	frame_info->save_return_addr = true;
2893    }
2894
2895  /* Save lp_count, lp_start and lp_end.  */
2896  if (arc_lpcwidth != 0 && arc_must_save_register (LP_COUNT, cfun, true))
2897    reg_size += UNITS_PER_WORD * 3;
2898
2899  /* Check for the special R40-R44 regs used by FPX extension.  */
2900  if (arc_must_save_register (TARGET_BIG_ENDIAN ? R41_REG : R40_REG,
2901			      cfun, TARGET_DPFP))
2902    reg_size += UNITS_PER_WORD * 2;
2903  if (arc_must_save_register (TARGET_BIG_ENDIAN ? R43_REG : R42_REG,
2904			      cfun, TARGET_DPFP))
2905    reg_size += UNITS_PER_WORD * 2;
2906
2907  /* Check if R58 is used.  */
2908  if (arc_must_save_register (R58_REG, cfun, true))
2909    reg_size += UNITS_PER_WORD * 2;
2910
2911  /* 4) Calculate extra size made up of the blink + fp size.  */
2912  extra_size = 0;
2913  if (arc_must_save_return_addr (cfun))
2914    extra_size = 4;
2915  /* Add FP size only when it is not autosaved.  */
2916  if (arc_frame_pointer_needed ()
2917      && !ARC_AUTOFP_IRQ_P (fn_type))
2918    extra_size += 4;
2919
2920  /* 5) Space for variable arguments passed in registers */
2921  pretend_size	= crtl->args.pretend_args_size;
2922
2923  /* Ensure everything before the locals is aligned appropriately.  */
2924  extra_plus_reg_size = extra_size + reg_size;
2925  extra_plus_reg_size_aligned = ARC_STACK_ALIGN (extra_plus_reg_size);
2926  reg_size = extra_plus_reg_size_aligned - extra_size;
2927
2928  /* Compute total frame size.  */
2929  total_size = var_size + args_size + extra_size + pretend_size + reg_size;
2930
2931  /* It used to be the case that the alignment was forced at this
2932     point.  However, that is dangerous, calculations based on
2933     total_size would be wrong.  Given that this has never cropped up
2934     as an issue I've changed this to an assert for now.  */
2935  gcc_assert (total_size == ARC_STACK_ALIGN (total_size));
2936
2937  /* Save computed information.  */
2938  frame_info->total_size   = total_size;
2939  frame_info->extra_size   = extra_size;
2940  frame_info->pretend_size = pretend_size;
2941  frame_info->var_size     = var_size;
2942  frame_info->args_size    = args_size;
2943  frame_info->reg_size     = reg_size;
2944  frame_info->gmask        = gmask;
2945  frame_info->initialized  = reload_completed;
2946
2947  /* Ok, we're done.  */
2948  return total_size;
2949}
2950
2951/* Build dwarf information when the context is saved via AUX_IRQ_CTRL
2952   mechanism.  */
2953
2954static void
2955arc_dwarf_emit_irq_save_regs (void)
2956{
2957  rtx tmp, par, insn, reg;
2958  int i, offset, j;
2959
2960  par = gen_rtx_SEQUENCE (VOIDmode,
2961			  rtvec_alloc (irq_ctrl_saved.irq_save_last_reg + 1
2962				       + irq_ctrl_saved.irq_save_blink
2963				       + irq_ctrl_saved.irq_save_lpcount
2964				       + 1));
2965
2966  /* Build the stack adjustment note for unwind info.  */
2967  j = 0;
2968  offset = UNITS_PER_WORD * (irq_ctrl_saved.irq_save_last_reg + 1
2969			     + irq_ctrl_saved.irq_save_blink
2970			     + irq_ctrl_saved.irq_save_lpcount);
2971  tmp = plus_constant (Pmode, stack_pointer_rtx, -1 * offset);
2972  tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
2973  RTX_FRAME_RELATED_P (tmp) = 1;
2974  XVECEXP (par, 0, j++) = tmp;
2975
2976  offset -= UNITS_PER_WORD;
2977
2978  /* 1st goes LP_COUNT.  */
2979  if (irq_ctrl_saved.irq_save_lpcount)
2980    {
2981      reg = gen_rtx_REG (SImode, 60);
2982      tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
2983      tmp = gen_frame_mem (SImode, tmp);
2984      tmp = gen_rtx_SET (tmp, reg);
2985      RTX_FRAME_RELATED_P (tmp) = 1;
2986      XVECEXP (par, 0, j++) = tmp;
2987      offset -= UNITS_PER_WORD;
2988    }
2989
2990  /* 2nd goes BLINK.  */
2991  if (irq_ctrl_saved.irq_save_blink)
2992    {
2993      reg = gen_rtx_REG (SImode, 31);
2994      tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
2995      tmp = gen_frame_mem (SImode, tmp);
2996      tmp = gen_rtx_SET (tmp, reg);
2997      RTX_FRAME_RELATED_P (tmp) = 1;
2998      XVECEXP (par, 0, j++) = tmp;
2999      offset -= UNITS_PER_WORD;
3000    }
3001
3002  /* Build the parallel of the remaining registers recorded as saved
3003     for unwind.  */
3004  for (i = irq_ctrl_saved.irq_save_last_reg; i >= 0; i--)
3005    {
3006      reg = gen_rtx_REG (SImode, i);
3007      tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
3008      tmp = gen_frame_mem (SImode, tmp);
3009      tmp = gen_rtx_SET (tmp, reg);
3010      RTX_FRAME_RELATED_P (tmp) = 1;
3011      XVECEXP (par, 0, j++) = tmp;
3012      offset -= UNITS_PER_WORD;
3013    }
3014
3015  /* Dummy insn used to anchor the dwarf info.  */
3016  insn = emit_insn (gen_stack_irq_dwarf());
3017  add_reg_note (insn, REG_FRAME_RELATED_EXPR, par);
3018  RTX_FRAME_RELATED_P (insn) = 1;
3019}
3020
3021/* Helper for prologue: emit frame store with pre_modify or pre_dec to
3022   save register REG on stack.  An initial offset OFFSET can be passed
3023   to the function.  */
3024
3025static int
3026frame_save_reg (rtx reg, HOST_WIDE_INT offset)
3027{
3028  rtx addr;
3029
3030  if (offset)
3031    {
3032      rtx tmp = plus_constant (Pmode, stack_pointer_rtx,
3033			       offset - GET_MODE_SIZE (GET_MODE (reg)));
3034      addr = gen_frame_mem (GET_MODE (reg),
3035			    gen_rtx_PRE_MODIFY (Pmode,
3036						stack_pointer_rtx,
3037						tmp));
3038    }
3039  else
3040    addr = gen_frame_mem (GET_MODE (reg), gen_rtx_PRE_DEC (Pmode,
3041							   stack_pointer_rtx));
3042  frame_move_inc (addr, reg, stack_pointer_rtx, 0);
3043
3044  return GET_MODE_SIZE (GET_MODE (reg)) - offset;
3045}
3046
3047/* Helper used when saving AUX regs during ISR.  */
3048
3049static int
3050push_reg (rtx reg)
3051{
3052  rtx stkslot = gen_rtx_MEM (GET_MODE (reg), gen_rtx_PRE_DEC (Pmode,
3053						   stack_pointer_rtx));
3054  rtx insn = emit_move_insn (stkslot, reg);
3055  RTX_FRAME_RELATED_P (insn) = 1;
3056  add_reg_note (insn, REG_CFA_ADJUST_CFA,
3057		gen_rtx_SET (stack_pointer_rtx,
3058			     plus_constant (Pmode, stack_pointer_rtx,
3059					    -GET_MODE_SIZE (GET_MODE (reg)))));
3060  return GET_MODE_SIZE (GET_MODE (reg));
3061}
3062
3063/* Helper for epilogue: emit frame load with post_modify or post_inc
3064   to restore register REG from stack.  The initial offset is passed
3065   via OFFSET.  */
3066
3067static int
3068frame_restore_reg (rtx reg, HOST_WIDE_INT offset)
3069{
3070  rtx addr, insn;
3071
3072  if (offset)
3073    {
3074      rtx tmp = plus_constant (Pmode, stack_pointer_rtx,
3075			       offset + GET_MODE_SIZE (GET_MODE (reg)));
3076      addr = gen_frame_mem (GET_MODE (reg),
3077			    gen_rtx_POST_MODIFY (Pmode,
3078						 stack_pointer_rtx,
3079						 tmp));
3080    }
3081  else
3082    addr = gen_frame_mem (GET_MODE (reg), gen_rtx_POST_INC (Pmode,
3083							    stack_pointer_rtx));
3084  insn = frame_move_inc (reg, addr, stack_pointer_rtx, 0);
3085  add_reg_note (insn, REG_CFA_RESTORE, reg);
3086
3087  if (reg == hard_frame_pointer_rtx)
3088    add_reg_note (insn, REG_CFA_DEF_CFA,
3089		  plus_constant (Pmode, stack_pointer_rtx,
3090				 GET_MODE_SIZE (GET_MODE (reg)) + offset));
3091  else
3092    add_reg_note (insn, REG_CFA_ADJUST_CFA,
3093		  gen_rtx_SET (stack_pointer_rtx,
3094			       plus_constant (Pmode, stack_pointer_rtx,
3095					      GET_MODE_SIZE (GET_MODE (reg))
3096					      + offset)));
3097
3098  return GET_MODE_SIZE (GET_MODE (reg)) + offset;
3099}
3100
3101/* Helper used when restoring AUX regs during ISR.  */
3102
3103static int
3104pop_reg (rtx reg)
3105{
3106  rtx stkslot = gen_rtx_MEM (GET_MODE (reg), gen_rtx_POST_INC (Pmode,
3107						   stack_pointer_rtx));
3108  rtx insn = emit_move_insn (reg, stkslot);
3109  RTX_FRAME_RELATED_P (insn) = 1;
3110  add_reg_note (insn, REG_CFA_ADJUST_CFA,
3111		gen_rtx_SET (stack_pointer_rtx,
3112			     plus_constant (Pmode, stack_pointer_rtx,
3113					    GET_MODE_SIZE (GET_MODE (reg)))));
3114  return GET_MODE_SIZE (GET_MODE (reg));
3115}
3116
3117/* Check if we have a continous range to be save/restored with the
3118   help of enter/leave instructions.  A vaild register range starts
3119   from $r13 and is up to (including) $r26.  */
3120
3121static bool
3122arc_enter_leave_p (uint64_t gmask)
3123{
3124  int regno;
3125  unsigned int rmask = 0;
3126
3127  if (!gmask)
3128    return false;
3129
3130  for (regno = ENTER_LEAVE_START_REG;
3131       regno <= ENTER_LEAVE_END_REG && (gmask & (1ULL << regno)); regno++)
3132    rmask |= 1ULL << regno;
3133
3134  if (rmask ^ gmask)
3135    return false;
3136
3137  return true;
3138}
3139
3140/* ARC's prologue, save any needed call-saved regs (and call-used if
3141   this is an interrupt handler) for ARCompact ISA, using ST/STD
3142   instructions.  */
3143
3144static int
3145arc_save_callee_saves (uint64_t gmask,
3146		       bool save_blink,
3147		       bool save_fp,
3148		       HOST_WIDE_INT offset,
3149		       bool emit_move)
3150{
3151  rtx reg;
3152  int frame_allocated = 0;
3153  int i;
3154
3155  /* The home-grown ABI says link register is saved first.  */
3156  if (save_blink)
3157    {
3158      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3159      frame_allocated += frame_save_reg (reg, offset);
3160      offset = 0;
3161    }
3162
3163  /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask.  */
3164  if (gmask)
3165    for (i = GMASK_LEN; i >= 0; i--)
3166      {
3167	machine_mode save_mode = SImode;
3168
3169	if (TARGET_LL64
3170	    && ((i - 1) % 2 == 0)
3171	    && ((gmask & (1ULL << i)) != 0)
3172	    && ((gmask & (1ULL << (i - 1))) != 0))
3173	  {
3174	    save_mode = DImode;
3175	    --i;
3176	  }
3177	else if ((gmask & (1ULL << i)) == 0)
3178	  continue;
3179
3180	reg = gen_rtx_REG (save_mode, i);
3181	frame_allocated += frame_save_reg (reg, offset);
3182	offset = 0;
3183      }
3184
3185  /* Save frame pointer if needed.  First save the FP on stack, if not
3186     autosaved.  Unfortunately, I cannot add it to gmask and use the
3187     above loop to save fp because our ABI states fp goes aftert all
3188     registers are saved.  */
3189  if (save_fp)
3190    {
3191      frame_allocated += frame_save_reg (hard_frame_pointer_rtx, offset);
3192      offset = 0;
3193    }
3194
3195  /* Emit mov fp,sp.  */
3196  if (emit_move)
3197    frame_move (hard_frame_pointer_rtx, stack_pointer_rtx);
3198
3199  return frame_allocated;
3200}
3201
3202/* ARC's epilogue, restore any required call-saved regs (and call-used
3203   if it is for an interrupt handler) using LD/LDD instructions.  */
3204
3205static int
3206arc_restore_callee_saves (uint64_t gmask,
3207			  bool restore_blink,
3208			  bool restore_fp,
3209			  HOST_WIDE_INT offset,
3210			  HOST_WIDE_INT allocated)
3211{
3212  rtx reg;
3213  int frame_deallocated = 0;
3214  HOST_WIDE_INT offs = cfun->machine->frame_info.reg_size;
3215  unsigned int fn_type = arc_compute_function_type (cfun);
3216  bool early_blink_restore;
3217  int i;
3218
3219  /* Emit mov fp,sp.  */
3220  if (arc_frame_pointer_needed () && offset)
3221    {
3222      frame_move (stack_pointer_rtx, hard_frame_pointer_rtx);
3223      frame_deallocated += offset;
3224      offset = 0;
3225    }
3226
3227  if (restore_fp)
3228    {
3229      /* Any offset is taken care by previous if-statement.  */
3230      gcc_assert (offset == 0);
3231      frame_deallocated += frame_restore_reg (hard_frame_pointer_rtx, 0);
3232    }
3233
3234  if (offset)
3235    {
3236      /* No $fp involved, we need to do an add to set the $sp to the
3237	 location of the first register.  */
3238      frame_stack_add (offset);
3239      frame_deallocated += offset;
3240      offset = 0;
3241    }
3242
3243  /* When we do not optimize for size or we aren't in an interrupt,
3244     restore first blink.  */
3245  early_blink_restore = restore_blink && !optimize_size && offs
3246    && !ARC_INTERRUPT_P (fn_type);
3247  if (early_blink_restore)
3248    {
3249      rtx addr = plus_constant (Pmode, stack_pointer_rtx, offs);
3250      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3251      rtx insn = frame_move_inc (reg, gen_frame_mem (Pmode, addr),
3252				 stack_pointer_rtx, NULL_RTX);
3253      add_reg_note (insn, REG_CFA_RESTORE, reg);
3254      restore_blink = false;
3255    }
3256
3257  /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask.  */
3258  if (gmask)
3259    for (i = 0; i <= GMASK_LEN; i++)
3260      {
3261	machine_mode restore_mode = SImode;
3262
3263	if (TARGET_LL64
3264	    && ((i % 2) == 0)
3265	    && ((gmask & (1ULL << i)) != 0)
3266	    && ((gmask & (1ULL << (i + 1))) != 0))
3267	  restore_mode = DImode;
3268	else if ((gmask & (1ULL << i)) == 0)
3269	  continue;
3270
3271	reg = gen_rtx_REG (restore_mode, i);
3272	offs = 0;
3273	switch (restore_mode)
3274	  {
3275	  case E_DImode:
3276	    if ((GMASK_LEN - __builtin_clzll (gmask)) == (i + 1)
3277		&& early_blink_restore)
3278	      offs = 4;
3279	    break;
3280	  case E_SImode:
3281	    if ((GMASK_LEN - __builtin_clzll (gmask)) == i
3282		&& early_blink_restore)
3283	      offs = 4;
3284	    break;
3285	  default:
3286	    offs = 0;
3287	  }
3288	frame_deallocated += frame_restore_reg (reg, offs);
3289	offset = 0;
3290
3291	if (restore_mode == DImode)
3292	  i++;
3293      }
3294
3295  if (restore_blink)
3296    {
3297      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3298      frame_deallocated += frame_restore_reg (reg, allocated
3299					      - frame_deallocated
3300					      /* Consider as well the
3301						 current restored
3302						 register size.  */
3303					      - UNITS_PER_WORD);
3304    }
3305
3306  return frame_deallocated;
3307}
3308
3309/* ARC prologue, save the registers using enter instruction.  Leave
3310   instruction can also save $blink (SAVE_BLINK) and $fp (SAVE_FP)
3311   register.  */
3312
3313static int
3314arc_save_callee_enter (uint64_t gmask,
3315		       bool save_blink,
3316		       bool save_fp,
3317		       HOST_WIDE_INT offset)
3318{
3319  int start_reg = ENTER_LEAVE_START_REG;
3320  int end_reg = ENTER_LEAVE_END_REG;
3321  int regno, indx, off, nregs;
3322  rtx insn, reg, mem;
3323  int frame_allocated = 0;
3324
3325  for (regno = start_reg; regno <= end_reg && (gmask & (1ULL << regno));)
3326    regno++;
3327
3328  end_reg = regno - 1;
3329  nregs = end_reg - start_reg + 1;
3330  nregs += save_blink ? 1 : 0;
3331  nregs += save_fp ? 1 : 0;
3332
3333  if (offset)
3334    frame_stack_add (offset);
3335
3336  insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + (save_fp ? 1 : 0)
3337						  + 1));
3338  indx = 0;
3339
3340  reg = gen_rtx_SET (stack_pointer_rtx,
3341		     plus_constant (Pmode,
3342				    stack_pointer_rtx,
3343				    -nregs * UNITS_PER_WORD));
3344  RTX_FRAME_RELATED_P (reg) = 1;
3345  XVECEXP (insn, 0, indx++) = reg;
3346  off = nregs * UNITS_PER_WORD;
3347
3348  if (save_blink)
3349    {
3350      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3351      mem = gen_frame_mem (Pmode, plus_constant (Pmode,
3352						 stack_pointer_rtx,
3353						 off));
3354      XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
3355      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
3356      off -= UNITS_PER_WORD;
3357      save_blink = false;
3358    }
3359
3360  for (regno = start_reg;
3361       regno <= end_reg;
3362       regno++, indx++, off -= UNITS_PER_WORD)
3363    {
3364      reg = gen_rtx_REG (SImode, regno);
3365      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3366						  stack_pointer_rtx,
3367						  off));
3368      XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
3369      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
3370      gmask = gmask & ~(1ULL << regno);
3371    }
3372
3373  if (save_fp)
3374    {
3375      mem = gen_frame_mem (Pmode, plus_constant (Pmode,
3376						 stack_pointer_rtx,
3377						 off));
3378      XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, hard_frame_pointer_rtx);
3379      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
3380      off -= UNITS_PER_WORD;
3381
3382      XVECEXP (insn, 0, indx) = gen_rtx_SET (hard_frame_pointer_rtx,
3383					     stack_pointer_rtx);
3384      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
3385      save_fp = false;
3386    }
3387
3388  gcc_assert (off == 0);
3389  insn = frame_insn (insn);
3390
3391  add_reg_note (insn, REG_INC, stack_pointer_rtx);
3392
3393  frame_allocated = nregs * UNITS_PER_WORD;
3394
3395  /* offset is a negative number, make sure we add it.  */
3396  return frame_allocated - offset;
3397}
3398
3399/* ARC epilogue, restore the registers using leave instruction.  An
3400   initial offset is passed in OFFSET.  Besides restoring an register
3401   range, leave can also restore $blink (RESTORE_BLINK), or $fp
3402   (RESTORE_FP), and can automatic return (RETURN_P).  */
3403
3404static int
3405arc_restore_callee_leave (uint64_t gmask,
3406			  bool restore_blink,
3407			  bool restore_fp,
3408			  bool return_p,
3409			  HOST_WIDE_INT offset)
3410{
3411  int start_reg = ENTER_LEAVE_START_REG;
3412  int end_reg = ENTER_LEAVE_END_REG;
3413  int regno, indx, off, nregs;
3414  rtx insn, reg, mem;
3415  int frame_allocated = 0;
3416
3417  for (regno = start_reg; regno <= end_reg && (gmask & (1ULL << regno));)
3418    regno++;
3419
3420  end_reg = regno - 1;
3421  nregs = end_reg - start_reg + 1;
3422  nregs += restore_blink ? 1 : 0;
3423  nregs += restore_fp ? 1 : 0;
3424
3425  insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1
3426						  + (return_p ? 1 : 0)));
3427  indx = 0;
3428
3429  if (return_p)
3430    XVECEXP (insn, 0, indx++) = ret_rtx;
3431
3432  if (restore_fp)
3433    {
3434      /* I cannot emit set (sp, fp) here as cselib expects a single sp
3435	 set and not two.  Thus, use the offset, and change sp adjust
3436	 value.  */
3437      frame_allocated += offset;
3438    }
3439
3440  if (offset && !restore_fp)
3441    {
3442      /* This add is only emmited when we do not restore fp with leave
3443	 instruction.  */
3444      frame_stack_add (offset);
3445      frame_allocated += offset;
3446      offset = 0;
3447    }
3448
3449  reg = gen_rtx_SET (stack_pointer_rtx,
3450		     plus_constant (Pmode,
3451				    stack_pointer_rtx,
3452				    offset + nregs * UNITS_PER_WORD));
3453  RTX_FRAME_RELATED_P (reg) = 1;
3454  XVECEXP (insn, 0, indx++) = reg;
3455  off = nregs * UNITS_PER_WORD;
3456
3457  if (restore_blink)
3458    {
3459      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3460      mem = gen_frame_mem (Pmode, plus_constant (Pmode,
3461						 stack_pointer_rtx,
3462						 off));
3463      XVECEXP (insn, 0, indx) = gen_rtx_SET (reg, mem);
3464      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
3465      off -= UNITS_PER_WORD;
3466    }
3467
3468  for (regno = start_reg;
3469       regno <= end_reg;
3470       regno++, indx++, off -= UNITS_PER_WORD)
3471    {
3472      reg = gen_rtx_REG (SImode, regno);
3473      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3474						  stack_pointer_rtx,
3475						  off));
3476      XVECEXP (insn, 0, indx) = gen_rtx_SET (reg, mem);
3477      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
3478      gmask = gmask & ~(1ULL << regno);
3479    }
3480
3481  if (restore_fp)
3482    {
3483      mem = gen_frame_mem (Pmode, plus_constant (Pmode,
3484						 stack_pointer_rtx,
3485						 off));
3486      XVECEXP (insn, 0, indx) = gen_rtx_SET (hard_frame_pointer_rtx, mem);
3487      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
3488      off -= UNITS_PER_WORD;
3489    }
3490
3491  gcc_assert (off == 0);
3492  if (return_p)
3493    {
3494      insn = emit_jump_insn (insn);
3495      RTX_FRAME_RELATED_P (insn) = 1;
3496    }
3497  else
3498    insn = frame_insn (insn);
3499
3500  add_reg_note (insn, REG_INC, stack_pointer_rtx);
3501
3502  /* Dwarf related info.  */
3503  if (restore_fp)
3504    {
3505      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
3506      add_reg_note (insn, REG_CFA_DEF_CFA,
3507		    plus_constant (Pmode, stack_pointer_rtx,
3508				   offset + nregs * UNITS_PER_WORD));
3509    }
3510  else
3511    {
3512      add_reg_note (insn, REG_CFA_ADJUST_CFA,
3513		    gen_rtx_SET (stack_pointer_rtx,
3514				 plus_constant (Pmode, stack_pointer_rtx,
3515						nregs * UNITS_PER_WORD)));
3516    }
3517  if (restore_blink)
3518    add_reg_note (insn, REG_CFA_RESTORE,
3519		  gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
3520  for (regno = start_reg; regno <= end_reg; regno++)
3521    add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (SImode, regno));
3522
3523  frame_allocated += nregs * UNITS_PER_WORD;
3524
3525  return frame_allocated;
3526}
3527
3528/* Millicode thunks implementation:
3529   Generates calls to millicodes for registers starting from r13 to r25
3530   Present Limitations:
3531   - Only one range supported.  The remaining regs will have the ordinary
3532   st and ld instructions for store and loads.  Hence a gmask asking
3533   to store r13-14, r16-r25 will only generate calls to store and
3534   load r13 to r14 while store and load insns will be generated for
3535   r16 to r25 in the prologue and epilogue respectively.
3536
3537   - Presently library only supports register ranges starting from r13.
3538*/
3539
3540static int
3541arc_save_callee_milli (uint64_t gmask,
3542		       bool save_blink,
3543		       bool save_fp,
3544		       HOST_WIDE_INT offset,
3545		       HOST_WIDE_INT reg_size)
3546{
3547  int start_reg = 13;
3548  int end_reg = 25;
3549  int regno, indx, off, nregs;
3550  rtx insn, reg, mem;
3551  int frame_allocated = 0;
3552
3553  for (regno = start_reg; regno <= end_reg && (gmask & (1ULL << regno));)
3554    regno++;
3555
3556  end_reg = regno - 1;
3557  nregs = end_reg - start_reg + 1;
3558  gcc_assert (end_reg > 14);
3559
3560
3561  /* Allocate space on stack for the registers, and take into account
3562     also the initial offset.  The registers will be saved using
3563     offsets.  N.B. OFFSET is a negative number.  */
3564  if (save_blink)
3565    {
3566      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3567      frame_allocated += frame_save_reg (reg, offset);
3568      offset = 0;
3569    }
3570
3571  if (reg_size || offset)
3572    {
3573      frame_stack_add (offset - reg_size);
3574      frame_allocated += nregs * UNITS_PER_WORD - offset;
3575      offset = 0;
3576    }
3577
3578  /* Start generate millicode call.  */
3579  insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
3580  indx = 0;
3581
3582  /* This is a call, we clobber blink.  */
3583  XVECEXP (insn, 0, nregs) =
3584    gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
3585
3586  for (regno = start_reg, indx = 0, off = 0;
3587       regno <= end_reg;
3588       regno++, indx++, off += UNITS_PER_WORD)
3589    {
3590      reg = gen_rtx_REG (SImode, regno);
3591      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3592						  stack_pointer_rtx,
3593						  off));
3594      XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
3595      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
3596      gmask = gmask & ~(1ULL << regno);
3597    }
3598  insn = frame_insn (insn);
3599
3600  /* Add DWARF info.  */
3601  for (regno = start_reg, off = 0;
3602       regno <= end_reg;
3603       regno++, off += UNITS_PER_WORD)
3604    {
3605      reg = gen_rtx_REG (SImode, regno);
3606      mem = gen_rtx_MEM (SImode, plus_constant (Pmode,
3607						stack_pointer_rtx, off));
3608      add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
3609
3610    }
3611
3612  /* In the case of millicode thunk, we need to restore the
3613     clobbered blink register.  */
3614  if (arc_must_save_return_addr (cfun))
3615    {
3616      emit_insn (gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
3617			      gen_rtx_MEM (Pmode,
3618					   plus_constant (Pmode,
3619							  stack_pointer_rtx,
3620							  reg_size))));
3621    }
3622
3623  /* Save remaining registers using st instructions.  */
3624  for (regno = 0; regno <= GMASK_LEN; regno++)
3625    {
3626      if ((gmask & (1ULL << regno)) == 0)
3627	continue;
3628
3629      reg = gen_rtx_REG (SImode, regno);
3630      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3631						  stack_pointer_rtx,
3632						  off));
3633      frame_move_inc (mem, reg, stack_pointer_rtx, 0);
3634      frame_allocated += UNITS_PER_WORD;
3635      off += UNITS_PER_WORD;
3636    }
3637
3638  /* Save frame pointer if needed.  First save the FP on stack, if not
3639     autosaved.  Unfortunately, I cannot add it to gmask and use the
3640     above loop to save fp because our ABI states fp goes aftert all
3641     registers are saved.  */
3642  if (save_fp)
3643    frame_allocated += frame_save_reg (hard_frame_pointer_rtx, offset);
3644
3645  /* Emit mov fp,sp.  */
3646  if (arc_frame_pointer_needed ())
3647    frame_move (hard_frame_pointer_rtx, stack_pointer_rtx);
3648
3649  return frame_allocated;
3650}
3651
3652/* Like the previous function but restore.  */
3653
3654static int
3655arc_restore_callee_milli (uint64_t gmask,
3656			  bool restore_blink,
3657			  bool restore_fp,
3658			  bool return_p,
3659			  HOST_WIDE_INT offset)
3660{
3661  int start_reg = 13;
3662  int end_reg = 25;
3663  int regno, indx, off, nregs;
3664  rtx insn, reg, mem;
3665  int frame_allocated = 0;
3666
3667  for (regno = start_reg; regno <= end_reg && (gmask & (1ULL << regno));)
3668    regno++;
3669
3670  end_reg = regno - 1;
3671  nregs = end_reg - start_reg + 1;
3672  gcc_assert (end_reg > 14);
3673
3674  /* Emit mov fp,sp.  */
3675  if (arc_frame_pointer_needed () && offset)
3676    {
3677      frame_move (stack_pointer_rtx, hard_frame_pointer_rtx);
3678      frame_allocated = offset;
3679      offset = 0;
3680    }
3681
3682  if (restore_fp)
3683    frame_allocated += frame_restore_reg (hard_frame_pointer_rtx, 0);
3684
3685  if (offset)
3686    {
3687      /* No fp involved, hence, we need to adjust the sp via an
3688	 add.  */
3689      frame_stack_add (offset);
3690      frame_allocated += offset;
3691      offset = 0;
3692    }
3693
3694  /* Start generate millicode call.  */
3695  insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc ((return_p ? 1 : 0)
3696						  + nregs + 1));
3697  indx = 0;
3698
3699  if (return_p)
3700    {
3701      /* sibling call, the blink is restored with the help of the
3702	 value held into r12.  */
3703      reg = gen_rtx_REG (Pmode, 12);
3704      XVECEXP (insn, 0, indx++) = ret_rtx;
3705      XVECEXP (insn, 0, indx++) =
3706	gen_rtx_SET (stack_pointer_rtx,
3707		     gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg));
3708      frame_allocated += UNITS_PER_WORD;
3709    }
3710  else
3711    {
3712      /* This is a call, we clobber blink.  */
3713      XVECEXP (insn, 0, nregs) =
3714	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
3715    }
3716
3717  for (regno = start_reg, off = 0;
3718       regno <= end_reg;
3719       regno++, indx++, off += UNITS_PER_WORD)
3720    {
3721      reg = gen_rtx_REG (SImode, regno);
3722      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3723						  stack_pointer_rtx,
3724						  off));
3725      XVECEXP (insn, 0, indx) = gen_rtx_SET (reg, mem);
3726      RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
3727      gmask = gmask & ~(1ULL << regno);
3728    }
3729
3730  /* Restore remaining registers using LD instructions.  */
3731  for (regno = 0; regno <= GMASK_LEN; regno++)
3732    {
3733      if ((gmask & (1ULL << regno)) == 0)
3734	continue;
3735
3736      reg = gen_rtx_REG (SImode, regno);
3737      mem = gen_frame_mem (SImode, plus_constant (Pmode,
3738						  stack_pointer_rtx,
3739						  off));
3740      rtx tmp = frame_move_inc (reg, mem, stack_pointer_rtx, 0);
3741      add_reg_note (tmp, REG_CFA_RESTORE, reg);
3742      off += UNITS_PER_WORD;
3743    }
3744
3745  /* Emit millicode call.  */
3746  if (return_p)
3747    {
3748      reg = gen_rtx_REG (Pmode, 12);
3749      frame_insn (gen_rtx_SET (reg, GEN_INT (off)));
3750      frame_allocated += off;
3751      insn = emit_jump_insn (insn);
3752      RTX_FRAME_RELATED_P (insn) = 1;
3753    }
3754  else
3755    insn = frame_insn (insn);
3756
3757  /* Add DWARF info.  */
3758  for (regno = start_reg; regno <= end_reg; regno++)
3759    {
3760      reg = gen_rtx_REG (SImode, regno);
3761      add_reg_note (insn, REG_CFA_RESTORE, reg);
3762
3763    }
3764
3765  if (restore_blink && !return_p)
3766    {
3767      reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3768      mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
3769						 off));
3770      insn = frame_insn (gen_rtx_SET (reg, mem));
3771      add_reg_note (insn, REG_CFA_RESTORE, reg);
3772    }
3773
3774  return frame_allocated;
3775}
3776
3777/* Set up the stack and frame pointer (if desired) for the function.  */
3778
3779void
3780arc_expand_prologue (void)
3781{
3782  int size;
3783  uint64_t gmask = cfun->machine->frame_info.gmask;
3784  struct arc_frame_info *frame = &cfun->machine->frame_info;
3785  unsigned int frame_size_to_allocate;
3786  int first_offset = 0;
3787  unsigned int fn_type = arc_compute_function_type (cfun);
3788  bool save_blink = false;
3789  bool save_fp = false;
3790  bool emit_move = false;
3791
3792  /* Naked functions don't have prologue.  */
3793  if (ARC_NAKED_P (fn_type))
3794    {
3795      if (flag_stack_usage_info)
3796	current_function_static_stack_size = 0;
3797      return;
3798    }
3799
3800  /* Compute total frame size.  */
3801  size = arc_compute_frame_size ();
3802
3803  if (flag_stack_usage_info)
3804    current_function_static_stack_size = size;
3805
3806  /* Keep track of frame size to be allocated.  */
3807  frame_size_to_allocate = size;
3808
3809  /* These cases shouldn't happen.  Catch them now.  */
3810  gcc_assert (!(size == 0 && gmask));
3811
3812  /* Allocate space for register arguments if this is a variadic function.  */
3813  if (frame->pretend_size != 0)
3814    first_offset = -frame->pretend_size;
3815
3816  /* IRQ using automatic save mechanism will save the register before
3817     anything we do.  */
3818  if (ARC_AUTO_IRQ_P (fn_type)
3819      && !ARC_FAST_INTERRUPT_P (fn_type))
3820    {
3821      frame_stack_add (first_offset);
3822      first_offset = 0;
3823      arc_dwarf_emit_irq_save_regs ();
3824    }
3825
3826  save_blink = arc_must_save_return_addr (cfun)
3827    && !ARC_AUTOBLINK_IRQ_P (fn_type);
3828  save_fp = arc_frame_pointer_needed () && !ARC_AUTOFP_IRQ_P (fn_type)
3829    && !ARC_INTERRUPT_P (fn_type);
3830  emit_move = arc_frame_pointer_needed () && !ARC_INTERRUPT_P (fn_type);
3831
3832  /* Use enter/leave only for non-interrupt functions.  */
3833  if (TARGET_CODE_DENSITY
3834      && TARGET_CODE_DENSITY_FRAME
3835      && !ARC_AUTOFP_IRQ_P (fn_type)
3836      && !ARC_AUTOBLINK_IRQ_P (fn_type)
3837      && !ARC_INTERRUPT_P (fn_type)
3838      && arc_enter_leave_p (gmask))
3839      frame_size_to_allocate -= arc_save_callee_enter (gmask, save_blink,
3840						       save_fp,
3841						       first_offset);
3842  else if (frame->millicode_end_reg > 14)
3843    frame_size_to_allocate -= arc_save_callee_milli (gmask, save_blink,
3844						     save_fp,
3845						     first_offset,
3846						     frame->reg_size);
3847  else
3848    frame_size_to_allocate -= arc_save_callee_saves (gmask, save_blink, save_fp,
3849						     first_offset, emit_move);
3850
3851  /* Check if we need to save the ZOL machinery.  */
3852  if (arc_lpcwidth != 0 && arc_must_save_register (LP_COUNT, cfun, true))
3853    {
3854      rtx reg0 = gen_rtx_REG (SImode, R0_REG);
3855      emit_insn (gen_rtx_SET (reg0,
3856			      gen_rtx_UNSPEC_VOLATILE
3857			      (Pmode, gen_rtvec (1, GEN_INT (AUX_LP_START)),
3858			       VUNSPEC_ARC_LR)));
3859      frame_size_to_allocate -= push_reg (reg0);
3860      emit_insn (gen_rtx_SET (reg0,
3861			      gen_rtx_UNSPEC_VOLATILE
3862			      (Pmode, gen_rtvec (1, GEN_INT (AUX_LP_END)),
3863			       VUNSPEC_ARC_LR)));
3864      frame_size_to_allocate -= push_reg (reg0);
3865      emit_move_insn (reg0, gen_rtx_REG (SImode, LP_COUNT));
3866      frame_size_to_allocate -= push_reg (reg0);
3867    }
3868
3869  /* Save AUX regs used by FPX machinery.  */
3870  if (arc_must_save_register (TARGET_BIG_ENDIAN ? R41_REG : R40_REG,
3871			      cfun, TARGET_DPFP))
3872    {
3873      rtx reg0 = gen_rtx_REG (SImode, R0_REG);
3874      int i;
3875
3876      for (i = 0; i < 4; i++)
3877	{
3878	  emit_insn (gen_rtx_SET (reg0,
3879				  gen_rtx_UNSPEC_VOLATILE
3880				  (Pmode, gen_rtvec (1, GEN_INT (AUX_DPFP_START
3881								 + i)),
3882				   VUNSPEC_ARC_LR)));
3883	  frame_size_to_allocate -= push_reg (reg0);
3884	}
3885    }
3886
3887  /* Save accumulator registers.  */
3888  if (arc_must_save_register (R58_REG, cfun, true))
3889    frame_size_to_allocate -= arc_save_callee_saves (3ULL << 58,
3890						     false, false, 0, false);
3891
3892  if (arc_frame_pointer_needed () && ARC_INTERRUPT_P (fn_type))
3893    {
3894      /* Just save fp at the end of the saving context.  */
3895      frame_size_to_allocate -=
3896	arc_save_callee_saves (0, false, !ARC_AUTOFP_IRQ_P (fn_type), 0, true);
3897    }
3898
3899  /* Allocate the stack frame.  */
3900  if (frame_size_to_allocate > 0)
3901    frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate);
3902
3903  /* Emit a blockage to avoid delay slot scheduling.  */
3904  emit_insn (gen_blockage ());
3905}
3906
3907/* Return the register number of the register holding the return address
3908   for a function of type TYPE.  */
3909
3910static int
3911arc_return_address_register (unsigned int fn_type)
3912{
3913  int regno = 0;
3914
3915  if (ARC_INTERRUPT_P (fn_type))
3916    {
3917      if ((fn_type & (ARC_FUNCTION_ILINK1 | ARC_FUNCTION_FIRQ)) != 0)
3918	regno = ILINK1_REG;
3919      else if ((fn_type & ARC_FUNCTION_ILINK2) != 0)
3920	regno = ILINK2_REG;
3921      else
3922	gcc_unreachable ();
3923    }
3924  else if (ARC_NORMAL_P (fn_type) || ARC_NAKED_P (fn_type))
3925    regno = RETURN_ADDR_REGNUM;
3926
3927  gcc_assert (regno != 0);
3928  return regno;
3929}
3930
3931/* Do any necessary cleanup after a function to restore stack, frame,
3932   and regs.  */
3933
3934void
3935arc_expand_epilogue (int sibcall_p)
3936{
3937  int size;
3938  unsigned int fn_type = arc_compute_function_type (cfun);
3939  unsigned int size_to_deallocate;
3940  int restored;
3941  int can_trust_sp_p = !cfun->calls_alloca;
3942  int first_offset;
3943  bool restore_fp = arc_frame_pointer_needed () && !ARC_AUTOFP_IRQ_P (fn_type);
3944  bool restore_blink = arc_must_save_return_addr (cfun)
3945    && !ARC_AUTOBLINK_IRQ_P (fn_type);
3946  uint64_t gmask = cfun->machine->frame_info.gmask;
3947  bool return_p = !sibcall_p && fn_type == ARC_FUNCTION_NORMAL
3948		   && !cfun->machine->frame_info.pretend_size;
3949  struct arc_frame_info *frame = &cfun->machine->frame_info;
3950
3951  /* Naked functions don't have epilogue.  */
3952  if (ARC_NAKED_P (fn_type))
3953    return;
3954
3955  size = arc_compute_frame_size ();
3956  size_to_deallocate = size;
3957
3958  first_offset = size - (frame->pretend_size + frame->reg_size
3959			 + frame->extra_size);
3960
3961  if (!can_trust_sp_p)
3962    gcc_assert (arc_frame_pointer_needed ());
3963
3964  /* Emit a blockage to avoid/flush all pending sp operations.  */
3965  if (size)
3966    emit_insn (gen_blockage ());
3967
3968  if (ARC_INTERRUPT_P (fn_type) && restore_fp)
3969    {
3970      /* We need to restore FP before any SP operation in an
3971	 interrupt.  */
3972      size_to_deallocate -= arc_restore_callee_saves (0, false,
3973						      restore_fp,
3974						      first_offset,
3975						      size_to_deallocate);
3976      restore_fp = false;
3977      first_offset = 0;
3978    }
3979
3980  /* Restore accumulator registers.  */
3981  if (arc_must_save_register (R58_REG, cfun, true))
3982    {
3983      rtx insn;
3984      rtx reg0 = gen_rtx_REG (SImode, R0_REG);
3985      rtx reg1 = gen_rtx_REG (SImode, R1_REG);
3986      size_to_deallocate -= pop_reg (reg0);
3987      size_to_deallocate -= pop_reg (reg1);
3988
3989      insn = emit_insn (gen_mulu64 (reg0, const1_rtx));
3990      add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (SImode, R58_REG));
3991      RTX_FRAME_RELATED_P (insn) = 1;
3992      emit_insn (gen_arc600_stall ());
3993      insn = emit_insn (gen_rtx_UNSPEC_VOLATILE
3994			(VOIDmode, gen_rtvec (2, reg1, GEN_INT (AUX_MULHI)),
3995			 VUNSPEC_ARC_SR));
3996      add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (SImode, R59_REG));
3997      RTX_FRAME_RELATED_P (insn) = 1;
3998    }
3999
4000  /* Restore AUX-regs used by FPX machinery.  */
4001  if (arc_must_save_register (TARGET_BIG_ENDIAN ? R41_REG : R40_REG,
4002			      cfun, TARGET_DPFP))
4003    {
4004      rtx reg0 = gen_rtx_REG (SImode, R0_REG);
4005      int i;
4006
4007      for (i = 0; i < 4; i++)
4008	{
4009	  size_to_deallocate -= pop_reg (reg0);
4010	  emit_insn (gen_rtx_UNSPEC_VOLATILE
4011		     (VOIDmode, gen_rtvec (2, reg0, GEN_INT (AUX_DPFP_START
4012							     + i)),
4013		      VUNSPEC_ARC_SR));
4014	}
4015    }
4016
4017  /* Check if we need to restore the ZOL machinery.  */
4018  if (arc_lpcwidth !=0 && arc_must_save_register (LP_COUNT, cfun, true))
4019    {
4020      rtx reg0 = gen_rtx_REG (SImode, R0_REG);
4021
4022      size_to_deallocate -= pop_reg (reg0);
4023      emit_move_insn (gen_rtx_REG (SImode, LP_COUNT), reg0);
4024
4025      size_to_deallocate -= pop_reg (reg0);
4026      emit_insn (gen_rtx_UNSPEC_VOLATILE
4027		 (VOIDmode, gen_rtvec (2, reg0, GEN_INT (AUX_LP_END)),
4028		  VUNSPEC_ARC_SR));
4029
4030      size_to_deallocate -= pop_reg (reg0);
4031      emit_insn (gen_rtx_UNSPEC_VOLATILE
4032		 (VOIDmode, gen_rtvec (2, reg0, GEN_INT (AUX_LP_START)),
4033		  VUNSPEC_ARC_SR));
4034    }
4035
4036  if (TARGET_CODE_DENSITY
4037      && TARGET_CODE_DENSITY_FRAME
4038      && !ARC_AUTOFP_IRQ_P (fn_type)
4039      && !ARC_AUTOBLINK_IRQ_P (fn_type)
4040      && !ARC_INTERRUPT_P (fn_type)
4041      && arc_enter_leave_p (gmask))
4042    {
4043      /* Using leave instruction.  */
4044      size_to_deallocate -= arc_restore_callee_leave (gmask, restore_blink,
4045						      restore_fp,
4046						      return_p,
4047						      first_offset);
4048      if (return_p)
4049	{
4050	  gcc_assert (size_to_deallocate == 0);
4051	  return;
4052	}
4053    }
4054  else if (frame->millicode_end_reg > 14)
4055    {
4056      /* Using millicode calls.  */
4057      size_to_deallocate -= arc_restore_callee_milli (gmask, restore_blink,
4058						      restore_fp,
4059						      return_p,
4060						      first_offset);
4061      if (return_p)
4062	{
4063	  gcc_assert (size_to_deallocate == 0);
4064	  return;
4065	}
4066    }
4067  else
4068    size_to_deallocate -= arc_restore_callee_saves (gmask, restore_blink,
4069						    restore_fp,
4070						    first_offset,
4071						    size_to_deallocate);
4072
4073  /* Keep track of how much of the stack pointer we've restored.  It
4074     makes the following a lot more readable.  */
4075  restored = size - size_to_deallocate;
4076
4077  if (size > restored)
4078    frame_stack_add (size - restored);
4079
4080  /* For frames that use __builtin_eh_return, the register defined by
4081     EH_RETURN_STACKADJ_RTX is set to 0 for all standard return paths.
4082     On eh_return paths however, the register is set to the value that
4083     should be added to the stack pointer in order to restore the
4084     correct stack pointer for the exception handling frame.
4085
4086     For ARC we are going to use r2 for EH_RETURN_STACKADJ_RTX, add
4087     this onto the stack for eh_return frames.  */
4088  if (crtl->calls_eh_return)
4089    emit_insn (gen_add2_insn (stack_pointer_rtx,
4090			      EH_RETURN_STACKADJ_RTX));
4091
4092  /* Emit the return instruction.  */
4093  if (ARC_INTERRUPT_P (fn_type))
4094    {
4095      rtx ra = gen_rtx_REG (Pmode, arc_return_address_register (fn_type));
4096
4097      if (TARGET_V2)
4098	emit_jump_insn (gen_rtie ());
4099      else if (TARGET_ARC700)
4100	emit_jump_insn (gen_rtie ());
4101      else
4102	emit_jump_insn (gen_arc600_rtie (ra));
4103    }
4104  else if (sibcall_p == FALSE)
4105    emit_jump_insn (gen_simple_return ());
4106}
4107
4108/* Helper for {push/pop}_multi_operand: check if rtx OP is a suitable
4109   construct to match either enter or leave instruction.  Which one
4110   which is selected by PUSH_P argument.  */
4111
4112bool
4113arc_check_multi (rtx op, bool push_p)
4114{
4115  HOST_WIDE_INT len = XVECLEN (op, 0);
4116  unsigned int regno, i, start;
4117  unsigned int memp = push_p ? 0 : 1;
4118  rtx elt;
4119
4120  if (len <= 1)
4121    return false;
4122
4123  start = 1;
4124  elt = XVECEXP (op, 0, 0);
4125  if (!push_p && GET_CODE (elt) == RETURN)
4126    start = 2;
4127
4128  for (i = start, regno = ENTER_LEAVE_START_REG; i < len; i++, regno++)
4129    {
4130      rtx elt = XVECEXP (op, 0, i);
4131      rtx reg, mem, addr;
4132
4133      if (GET_CODE (elt) != SET)
4134	return false;
4135      mem = XEXP (elt, memp);
4136      reg = XEXP (elt, 1 - memp);
4137
4138      if (!REG_P (reg)
4139	  || !MEM_P (mem))
4140	return false;
4141
4142      /* Check for blink.  */
4143      if (REGNO (reg) == RETURN_ADDR_REGNUM
4144	  && i == start)
4145	regno = 12;
4146      else if (REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
4147	++i;
4148      else if (REGNO (reg) != regno)
4149	return false;
4150
4151      addr = XEXP (mem, 0);
4152      if (GET_CODE (addr) == PLUS)
4153	{
4154	  if (!rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
4155	      || !CONST_INT_P (XEXP (addr, 1)))
4156	    return false;
4157	}
4158      else
4159	{
4160	  if (!rtx_equal_p (stack_pointer_rtx, addr))
4161	    return false;
4162	}
4163    }
4164  return true;
4165}
4166
4167/* Return rtx for the location of the return address on the stack,
4168   suitable for use in __builtin_eh_return.  The new return address
4169   will be written to this location in order to redirect the return to
4170   the exception handler.  Our ABI says the blink is pushed first on
4171   stack followed by an unknown number of register saves, and finally
4172   by fp.  Hence we cannot use the EH_RETURN_ADDRESS macro as the
4173   stack is not finalized.  */
4174
4175void
4176arc_eh_return_address_location (rtx source)
4177{
4178  rtx mem;
4179  int offset;
4180  struct arc_frame_info *afi;
4181
4182  arc_compute_frame_size ();
4183  afi = &cfun->machine->frame_info;
4184
4185  gcc_assert (crtl->calls_eh_return);
4186  gcc_assert (afi->save_return_addr);
4187  gcc_assert (afi->extra_size >= 4);
4188
4189  /* The '-4' removes the size of the return address, which is
4190     included in the 'extra_size' field.  */
4191  offset = afi->reg_size + afi->extra_size - 4;
4192  mem = gen_frame_mem (Pmode,
4193		       plus_constant (Pmode, hard_frame_pointer_rtx, offset));
4194
4195  /* The following should not be needed, and is, really a hack.  The
4196     issue being worked around here is that the DSE (Dead Store
4197     Elimination) pass will remove this write to the stack as it sees
4198     a single store and no corresponding read.  The read however
4199     occurs in the epilogue code, which is not added into the function
4200     rtl until a later pass.  So, at the time of DSE, the decision to
4201     remove this store seems perfectly sensible.  Marking the memory
4202     address as volatile obviously has the effect of preventing DSE
4203     from removing the store.  */
4204  MEM_VOLATILE_P (mem) = true;
4205  emit_move_insn (mem, source);
4206}
4207
4208/* PIC */
4209
4210/* Helper to generate unspec constant.  */
4211
4212static rtx
4213arc_unspec_offset (rtx loc, int unspec)
4214{
4215  return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, loc),
4216					       unspec));
4217}
4218
4219/* !TARGET_BARREL_SHIFTER support.  */
4220/* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what
4221   kind of shift.  */
4222
4223void
4224emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2)
4225{
4226  rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2);
4227  rtx pat
4228    = ((shift4_operator (shift, SImode) ?  gen_shift_si3 : gen_shift_si3_loop)
4229	(op0, op1, op2, shift));
4230  emit_insn (pat);
4231}
4232
4233/* Output the assembler code for doing a shift.
4234   We go to a bit of trouble to generate efficient code as the ARC601 only has
4235   single bit shifts.  This is taken from the h8300 port.  We only have one
4236   mode of shifting and can't access individual bytes like the h8300 can, so
4237   this is greatly simplified (at the expense of not generating hyper-
4238   efficient code).
4239
4240   This function is not used if the variable shift insns are present.  */
4241
4242/* FIXME:  This probably can be done using a define_split in arc.md.
4243   Alternately, generate rtx rather than output instructions.  */
4244
4245const char *
4246output_shift (rtx *operands)
4247{
4248  /*  static int loopend_lab;*/
4249  rtx shift = operands[3];
4250  machine_mode mode = GET_MODE (shift);
4251  enum rtx_code code = GET_CODE (shift);
4252  const char *shift_one;
4253
4254  gcc_assert (mode == SImode);
4255
4256  switch (code)
4257    {
4258    case ASHIFT:   shift_one = "add %0,%1,%1"; break;
4259    case ASHIFTRT: shift_one = "asr %0,%1"; break;
4260    case LSHIFTRT: shift_one = "lsr %0,%1"; break;
4261    default:       gcc_unreachable ();
4262    }
4263
4264  if (GET_CODE (operands[2]) != CONST_INT)
4265    {
4266      output_asm_insn ("and.f lp_count,%2, 0x1f", operands);
4267      goto shiftloop;
4268    }
4269  else
4270    {
4271      int n;
4272
4273      n = INTVAL (operands[2]);
4274
4275      /* Only consider the lower 5 bits of the shift count.  */
4276      n = n & 0x1f;
4277
4278      /* First see if we can do them inline.  */
4279      /* ??? We could get better scheduling & shorter code (using short insns)
4280	 by using splitters.  Alas, that'd be even more verbose.  */
4281      if (code == ASHIFT && n <= 9 && n > 2
4282	  && dest_reg_operand (operands[4], SImode))
4283	{
4284	  output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands);
4285	  for (n -=3 ; n >= 3; n -= 3)
4286	    output_asm_insn ("add3 %0,%4,%0", operands);
4287	  if (n == 2)
4288	    output_asm_insn ("add2 %0,%4,%0", operands);
4289	  else if (n)
4290	    output_asm_insn ("add %0,%0,%0", operands);
4291	}
4292      else if (n <= 4)
4293	{
4294	  while (--n >= 0)
4295	    {
4296	      output_asm_insn (shift_one, operands);
4297	      operands[1] = operands[0];
4298	    }
4299	}
4300      /* See if we can use a rotate/and.  */
4301      else if (n == BITS_PER_WORD - 1)
4302	{
4303	  switch (code)
4304	    {
4305	    case ASHIFT :
4306	      output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands);
4307	      break;
4308	    case ASHIFTRT :
4309	      /* The ARC doesn't have a rol insn.  Use something else.  */
4310	      output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands);
4311	      break;
4312	    case LSHIFTRT :
4313	      /* The ARC doesn't have a rol insn.  Use something else.  */
4314	      output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands);
4315	      break;
4316	    default:
4317	      break;
4318	    }
4319	}
4320      else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode))
4321	{
4322	  switch (code)
4323	    {
4324	    case ASHIFT :
4325	      output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands);
4326	      break;
4327	    case ASHIFTRT :
4328#if 1 /* Need some scheduling comparisons.  */
4329	      output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t"
4330			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
4331#else
4332	      output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t"
4333			       "sbc.f %0,%0,%4\n\trlc %0,%0", operands);
4334#endif
4335	      break;
4336	    case LSHIFTRT :
4337#if 1
4338	      output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t"
4339			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
4340#else
4341	      output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t"
4342			       "and %0,%0,1\n\trlc %0,%0", operands);
4343#endif
4344	      break;
4345	    default:
4346	      break;
4347	    }
4348	}
4349      else if (n == BITS_PER_WORD - 3 && code == ASHIFT)
4350	output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0",
4351			 operands);
4352      /* Must loop.  */
4353      else
4354	{
4355	  operands[2] = GEN_INT (n);
4356	  output_asm_insn ("mov.f lp_count, %2", operands);
4357
4358	shiftloop:
4359	    {
4360	      output_asm_insn ("lpnz\t2f", operands);
4361	      output_asm_insn (shift_one, operands);
4362	      output_asm_insn ("nop", operands);
4363	      fprintf (asm_out_file, "2:\t%s end single insn loop\n",
4364		       ASM_COMMENT_START);
4365	    }
4366	}
4367    }
4368
4369  return "";
4370}
4371
4372/* Nested function support.  */
4373
4374/* Output assembler code for a block containing the constant parts of
4375   a trampoline, leaving space for variable parts.  A trampoline looks
4376   like this:
4377
4378   ld_s r12,[pcl,8]
4379   ld   r11,[pcl,12]
4380   j_s [r12]
4381   .word function's address
4382   .word static chain value
4383
4384*/
4385
4386static void
4387arc_asm_trampoline_template (FILE *f)
4388{
4389  asm_fprintf (f, "\tld_s\t%s,[pcl,8]\n", ARC_TEMP_SCRATCH_REG);
4390  asm_fprintf (f, "\tld\t%s,[pcl,12]\n", reg_names[STATIC_CHAIN_REGNUM]);
4391  asm_fprintf (f, "\tj_s\t[%s]\n", ARC_TEMP_SCRATCH_REG);
4392  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4393  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4394}
4395
4396/* Emit RTL insns to initialize the variable parts of a trampoline.
4397   FNADDR is an RTX for the address of the function's pure code.  CXT
4398   is an RTX for the static chain value for the function.
4399
4400   The fastest trampoline to execute for trampolines within +-8KB of CTX
4401   would be:
4402
4403   add2 r11,pcl,s12
4404   j [limm]           0x20200f80 limm
4405
4406   and that would also be faster to write to the stack by computing
4407   the offset from CTX to TRAMP at compile time.  However, it would
4408   really be better to get rid of the high cost of cache invalidation
4409   when generating trampolines, which requires that the code part of
4410   trampolines stays constant, and additionally either making sure
4411   that no executable code but trampolines is on the stack, no icache
4412   entries linger for the area of the stack from when before the stack
4413   was allocated, and allocating trampolines in trampoline-only cache
4414   lines or allocate trampolines fram a special pool of pre-allocated
4415   trampolines.  */
4416
4417static void
4418arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
4419{
4420  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4421
4422  emit_block_move (tramp, assemble_trampoline_template (),
4423		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4424  emit_move_insn (adjust_address (tramp, SImode, 8), fnaddr);
4425  emit_move_insn (adjust_address (tramp, SImode, 12), cxt);
4426  maybe_emit_call_builtin___clear_cache (XEXP (tramp, 0),
4427					 plus_constant (Pmode,
4428							XEXP (tramp, 0),
4429							TRAMPOLINE_SIZE));
4430}
4431
4432/* Add the given function declaration to emit code in JLI section.  */
4433
4434static void
4435arc_add_jli_section (rtx pat)
4436{
4437  const char *name;
4438  tree attrs;
4439  arc_jli_section *sec = arc_jli_sections, *new_section;
4440  tree decl = SYMBOL_REF_DECL (pat);
4441
4442  if (!pat)
4443    return;
4444
4445  if (decl)
4446    {
4447      /* For fixed locations do not generate the jli table entry.  It
4448	 should be provided by the user as an asm file.  */
4449      attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4450      if (lookup_attribute ("jli_fixed", attrs))
4451	return;
4452    }
4453
4454  name = XSTR (pat, 0);
4455
4456  /* Don't insert the same symbol twice.  */
4457  while (sec != NULL)
4458    {
4459      if(strcmp (name, sec->name) == 0)
4460	return;
4461      sec = sec->next;
4462    }
4463
4464  /* New name, insert it.  */
4465  new_section = (arc_jli_section *) xmalloc (sizeof (arc_jli_section));
4466  gcc_assert (new_section != NULL);
4467  new_section->name = name;
4468  new_section->next = arc_jli_sections;
4469  arc_jli_sections = new_section;
4470}
4471
4472/* This is set briefly to 1 when we output a ".as" address modifer, and then
4473   reset when we output the scaled address.  */
4474static int output_scaled = 0;
4475
4476/* Set when we force sdata output.  */
4477static int output_sdata = 0;
4478
4479/* Print operand X (an rtx) in assembler syntax to file FILE.
4480   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4481   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
4482/* In final.cc:output_asm_insn:
4483    'l' : label
4484    'a' : address
4485    'c' : constant address if CONSTANT_ADDRESS_P
4486    'n' : negative
4487   Here:
4488    'Z': log2(x+1)-1
4489    'z': log2
4490    'M': log2(~x)
4491    'p': bit Position of lsb
4492    's': size of bit field
4493    '#': condbranch delay slot suffix
4494    '*': jump delay slot suffix
4495    '?' : nonjump-insn suffix for conditional execution or short instruction
4496    '!' : jump / call suffix for conditional execution or short instruction
4497    '`': fold constant inside unary o-perator, re-recognize, and emit.
4498    'd'
4499    'D'
4500    'R': Second word
4501    'S': JLI instruction
4502    'j': used by mov instruction to properly emit jli related labels.
4503    'B': Branch comparison operand - suppress sda reference
4504    'H': Most significant word
4505    'L': Least significant word
4506    'A': ASCII decimal representation of floating point value
4507    'U': Load/store update or scaling indicator
4508    'V': cache bypass indicator for volatile
4509    'P'
4510    'F'
4511    '^'
4512    'O': Operator
4513    'o': original symbol - no @ prepending.  */
4514
4515void
4516arc_print_operand (FILE *file, rtx x, int code)
4517{
4518  switch (code)
4519    {
4520    case 'Z':
4521      if (GET_CODE (x) == CONST_INT)
4522	fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 );
4523      else
4524	output_operand_lossage ("invalid operand to %%Z code");
4525
4526      return;
4527
4528    case 'z':
4529      if (GET_CODE (x) == CONST_INT)
4530	fprintf (file, "%d",exact_log2 (INTVAL (x) & 0xffffffff));
4531      else
4532	output_operand_lossage ("invalid operand to %%z code");
4533
4534      return;
4535
4536    case 'c':
4537      if (GET_CODE (x) == CONST_INT)
4538        fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) );
4539      else
4540        output_operand_lossage ("invalid operands to %%c code");
4541
4542      return;
4543
4544    case 'M':
4545      if (GET_CODE (x) == CONST_INT)
4546	fprintf (file, "%d",exact_log2(~INTVAL (x)) );
4547      else
4548	output_operand_lossage ("invalid operand to %%M code");
4549
4550      return;
4551
4552    case 'p':
4553      if (GET_CODE (x) == CONST_INT)
4554	fprintf (file, "%d", exact_log2 (INTVAL (x) & -INTVAL (x)));
4555      else
4556	output_operand_lossage ("invalid operand to %%p code");
4557      return;
4558
4559    case 's':
4560      if (GET_CODE (x) == CONST_INT)
4561	{
4562	  HOST_WIDE_INT i = INTVAL (x);
4563	  HOST_WIDE_INT s = exact_log2 (i & -i);
4564	  fprintf (file, "%d", exact_log2 (((0xffffffffUL & i) >> s) + 1));
4565	}
4566      else
4567	output_operand_lossage ("invalid operand to %%s code");
4568      return;
4569
4570    case '#' :
4571      /* Conditional branches depending on condition codes.
4572	 Note that this is only for branches that were known to depend on
4573	 condition codes before delay slot scheduling;
4574	 out-of-range brcc / bbit expansions should use '*'.
4575	 This distinction is important because of the different
4576	 allowable delay slot insns and the output of the delay suffix
4577	 for TARGET_AT_DBR_COND_EXEC.  */
4578    case '*' :
4579      /* Unconditional branches / branches not depending on condition codes.
4580	 This could also be a CALL_INSN.
4581	 Output the appropriate delay slot suffix.  */
4582      if (final_sequence && final_sequence->len () != 1)
4583	{
4584	  rtx_insn *jump = final_sequence->insn (0);
4585	  rtx_insn *delay = final_sequence->insn (1);
4586
4587	  /* For TARGET_PAD_RETURN we might have grabbed the delay insn.  */
4588	  if (delay->deleted ())
4589	    return;
4590	  if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
4591	    fputs (INSN_FROM_TARGET_P (delay) ? ".d"
4592		   : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d"
4593		   : get_attr_type (jump) == TYPE_RETURN && code == '#' ? ""
4594		   : ".nd",
4595		   file);
4596	  else
4597	    fputs (".d", file);
4598	}
4599      return;
4600    case '?' : /* with leading "." */
4601    case '!' : /* without leading "." */
4602      /* This insn can be conditionally executed.  See if the ccfsm machinery
4603	 says it should be conditionalized.
4604	 If it shouldn't, we'll check the compact attribute if this insn
4605	 has a short variant, which may be used depending on code size and
4606	 alignment considerations.  */
4607      if (current_insn_predicate)
4608	arc_ccfsm_current.cc
4609	  = get_arc_condition_code (current_insn_predicate);
4610      if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current))
4611	{
4612	  /* Is this insn in a delay slot sequence?  */
4613	  if (!final_sequence || XVECLEN (final_sequence, 0) < 2
4614	      || current_insn_predicate
4615	      || CALL_P (final_sequence->insn (0))
4616	      || simplejump_p (final_sequence->insn (0)))
4617	    {
4618	      /* This insn isn't in a delay slot sequence, or conditionalized
4619		 independently of its position in a delay slot.  */
4620	      fprintf (file, "%s%s",
4621		       code == '?' ? "." : "",
4622		       arc_condition_codes[arc_ccfsm_current.cc]);
4623	      /* If this is a jump, there are still short variants.  However,
4624		 only beq_s / bne_s have the same offset range as b_s,
4625		 and the only short conditional returns are jeq_s and jne_s.  */
4626	      if (code == '!'
4627		  && (arc_ccfsm_current.cc == ARC_CC_EQ
4628		      || arc_ccfsm_current.cc == ARC_CC_NE
4629		      || 0 /* FIXME: check if branch in 7 bit range.  */))
4630		output_short_suffix (file);
4631	    }
4632	  else if (code == '!') /* Jump with delay slot.  */
4633	    fputs (arc_condition_codes[arc_ccfsm_current.cc], file);
4634	  else /* An Instruction in a delay slot of a jump or call.  */
4635	    {
4636	      rtx jump = XVECEXP (final_sequence, 0, 0);
4637	      rtx insn = XVECEXP (final_sequence, 0, 1);
4638
4639	      /* If the insn is annulled and is from the target path, we need
4640		 to inverse the condition test.  */
4641	      if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
4642		{
4643		  if (INSN_FROM_TARGET_P (insn))
4644		    fprintf (file, "%s%s",
4645			     code == '?' ? "." : "",
4646			     arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]);
4647		  else
4648		    fprintf (file, "%s%s",
4649			     code == '?' ? "." : "",
4650			     arc_condition_codes[arc_ccfsm_current.cc]);
4651		  if (arc_ccfsm_current.state == 5)
4652		    arc_ccfsm_current.state = 0;
4653		}
4654	      else
4655		/* This insn is executed for either path, so don't
4656		   conditionalize it at all.  */
4657		output_short_suffix (file);
4658
4659	    }
4660	}
4661      else
4662	output_short_suffix (file);
4663      return;
4664    case'`':
4665      /* FIXME: fold constant inside unary operator, re-recognize, and emit.  */
4666      gcc_unreachable ();
4667    case 'd' :
4668      fputs (arc_condition_codes[get_arc_condition_code (x)], file);
4669      return;
4670    case 'D' :
4671      fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE
4672				 (get_arc_condition_code (x))],
4673	     file);
4674      return;
4675    case 'R' :
4676      /* Write second word of DImode or DFmode reference,
4677	 register or memory.  */
4678      if (GET_CODE (x) == REG)
4679	fputs (reg_names[REGNO (x)+1], file);
4680      else if (GET_CODE (x) == MEM)
4681	{
4682	  fputc ('[', file);
4683
4684	  /* Handle possible auto-increment.  For PRE_INC / PRE_DEC /
4685	    PRE_MODIFY, we will have handled the first word already;
4686	    For POST_INC / POST_DEC / POST_MODIFY, the access to the
4687	    first word will be done later.  In either case, the access
4688	    to the first word will do the modify, and we only have
4689	    to add an offset of four here.  */
4690	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
4691	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
4692	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY
4693	      || GET_CODE (XEXP (x, 0)) == POST_INC
4694	      || GET_CODE (XEXP (x, 0)) == POST_DEC
4695	      || GET_CODE (XEXP (x, 0)) == POST_MODIFY)
4696	    output_address (VOIDmode,
4697			    plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4));
4698	  else if (output_scaled)
4699	    {
4700	      rtx addr = XEXP (x, 0);
4701	      int size = GET_MODE_SIZE (GET_MODE (x));
4702
4703	      output_address (VOIDmode,
4704			      plus_constant (Pmode, XEXP (addr, 0),
4705					     ((INTVAL (XEXP (addr, 1)) + 4)
4706					      >> (size == 2 ? 1 : 2))));
4707	      output_scaled = 0;
4708	    }
4709	  else
4710	    output_address (VOIDmode,
4711			    plus_constant (Pmode, XEXP (x, 0), 4));
4712	  fputc (']', file);
4713	}
4714      else
4715	output_operand_lossage ("invalid operand to %%R code");
4716      return;
4717    case 'j':
4718    case 'S' :
4719      if (GET_CODE (x) == SYMBOL_REF
4720	  && arc_is_jli_call_p (x))
4721	{
4722	  if (SYMBOL_REF_DECL (x))
4723	    {
4724	      tree attrs = (TREE_TYPE (SYMBOL_REF_DECL (x)) != error_mark_node
4725			    ? TYPE_ATTRIBUTES (TREE_TYPE (SYMBOL_REF_DECL (x)))
4726			    : NULL_TREE);
4727	      if (lookup_attribute ("jli_fixed", attrs))
4728		{
4729		  /* No special treatment for jli_fixed functions.  */
4730		  if (code == 'j')
4731		    break;
4732		  fprintf (file, HOST_WIDE_INT_PRINT_DEC "\t; @",
4733			   TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attrs))));
4734		  assemble_name (file, XSTR (x, 0));
4735		  return;
4736		}
4737	    }
4738	  fprintf (file, "@__jli.");
4739	  assemble_name (file, XSTR (x, 0));
4740	  if (code == 'j')
4741	    arc_add_jli_section (x);
4742	  return;
4743	}
4744      if (GET_CODE (x) == SYMBOL_REF
4745	  && arc_is_secure_call_p (x))
4746	{
4747	  /* No special treatment for secure functions.  */
4748	  if (code == 'j' )
4749	    break;
4750	  tree attrs = (TREE_TYPE (SYMBOL_REF_DECL (x)) != error_mark_node
4751			? TYPE_ATTRIBUTES (TREE_TYPE (SYMBOL_REF_DECL (x)))
4752			: NULL_TREE);
4753	  fprintf (file, HOST_WIDE_INT_PRINT_DEC "\t; @",
4754		   TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attrs))));
4755	  assemble_name (file, XSTR (x, 0));
4756	  return;
4757	}
4758      break;
4759    case 'B' /* Branch or other LIMM ref - must not use sda references.  */ :
4760      if (CONSTANT_P (x))
4761	{
4762	  output_addr_const (file, x);
4763	  return;
4764	}
4765      break;
4766    case 'H' :
4767    case 'L' :
4768      if (GET_CODE (x) == REG)
4769	{
4770	  /* L = least significant word, H = most significant word.  */
4771	  if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
4772	    fputs (reg_names[REGNO (x)], file);
4773	  else
4774	    fputs (reg_names[REGNO (x)+1], file);
4775	}
4776      else if (GET_CODE (x) == CONST_INT
4777	       || GET_CODE (x) == CONST_DOUBLE)
4778	{
4779	  rtx first, second, word;
4780
4781	  split_double (x, &first, &second);
4782
4783	  if((WORDS_BIG_ENDIAN) == 0)
4784	    word = (code == 'L' ? first : second);
4785	  else
4786	    word = (code == 'L' ? second : first);
4787
4788	  fprintf (file, "0x%08" PRIx32, ((uint32_t) INTVAL (word)));
4789	}
4790      else
4791	output_operand_lossage ("invalid operand to %%H/%%L code");
4792      return;
4793    case 'A' :
4794      {
4795	char str[30];
4796
4797	gcc_assert (GET_CODE (x) == CONST_DOUBLE
4798		    && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT);
4799
4800	real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
4801	fprintf (file, "%s", str);
4802	return;
4803      }
4804    case 'U' :
4805      /* Output a load/store with update indicator if appropriate.  */
4806      if (GET_CODE (x) == MEM)
4807	{
4808	  rtx addr = XEXP (x, 0);
4809	  switch (GET_CODE (addr))
4810	    {
4811	    case PRE_INC: case PRE_DEC: case PRE_MODIFY:
4812	      fputs (".a", file); break;
4813	    case POST_INC: case POST_DEC: case POST_MODIFY:
4814	      fputs (".ab", file); break;
4815	    case PLUS:
4816	      /* Are we using a scaled index?  */
4817	      if (GET_CODE (XEXP (addr, 0)) == MULT)
4818		fputs (".as", file);
4819	      /* Can we use a scaled offset?  */
4820	      else if (CONST_INT_P (XEXP (addr, 1))
4821		       && GET_MODE_SIZE (GET_MODE (x)) > 1
4822		       && (!(INTVAL (XEXP (addr, 1))
4823			     & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3))
4824		       /* Does it make a difference?  */
4825		       && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)),
4826					   GET_MODE_SIZE (GET_MODE (x)) - 2, 0))
4827		{
4828		  fputs (".as", file);
4829		  output_scaled = 1;
4830		}
4831	      break;
4832	    case SYMBOL_REF:
4833	    case CONST:
4834	      if (legitimate_small_data_address_p (addr, GET_MODE (x))
4835		  && GET_MODE_SIZE (GET_MODE (x)) > 1)
4836		{
4837		  int align = get_symbol_alignment (addr);
4838		  int mask = 0;
4839		  switch (GET_MODE (x))
4840		    {
4841		    case E_HImode:
4842		      mask = 1;
4843		      break;
4844		    default:
4845		      mask = 3;
4846		      break;
4847		    }
4848		  if (align && ((align & mask) == 0))
4849		    fputs (".as", file);
4850		}
4851	      break;
4852	    case REG:
4853	      break;
4854	    default:
4855	      gcc_assert (CONSTANT_P (addr)); break;
4856	    }
4857	}
4858      else
4859	output_operand_lossage ("invalid operand to %%U code");
4860      return;
4861    case 'V' :
4862      /* Output cache bypass indicator for a load/store insn.  Volatile memory
4863	 refs are defined to use the cache bypass mechanism.  */
4864      if (GET_CODE (x) == MEM)
4865	{
4866	  if ((MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET)
4867	      || arc_is_uncached_mem_p (x))
4868	    fputs (".di", file);
4869	}
4870      else
4871	output_operand_lossage ("invalid operand to %%V code");
4872      return;
4873      /* plt code.  */
4874    case 'P':
4875    case 0 :
4876      /* Do nothing special.  */
4877      break;
4878    case 'F':
4879      fputs (reg_names[REGNO (x)]+1, file);
4880      return;
4881    case '^':
4882	/* This punctuation character is needed because label references are
4883	printed in the output template using %l. This is a front end
4884	character, and when we want to emit a '@' before it, we have to use
4885	this '^'.  */
4886
4887	fputc('@',file);
4888	return;
4889    case 'O':
4890      /* Output an operator.  */
4891      switch (GET_CODE (x))
4892	{
4893	case PLUS:	fputs ("add", file); return;
4894	case SS_PLUS:	fputs ("adds", file); return;
4895	case AND:	fputs ("and", file); return;
4896	case IOR:	fputs ("or", file); return;
4897	case XOR:	fputs ("xor", file); return;
4898	case MINUS:	fputs ("sub", file); return;
4899	case SS_MINUS:	fputs ("subs", file); return;
4900	case ASHIFT:	fputs ("asl", file); return;
4901	case ASHIFTRT:	fputs ("asr", file); return;
4902	case LSHIFTRT:	fputs ("lsr", file); return;
4903	case ROTATERT:	fputs ("ror", file); return;
4904	case MULT:	fputs ("mpy", file); return;
4905	case ABS:	fputs ("abs", file); return; /* Unconditional.  */
4906	case NEG:	fputs ("neg", file); return;
4907	case SS_NEG:	fputs ("negs", file); return;
4908	case NOT:	fputs ("not", file); return; /* Unconditional.  */
4909	case ZERO_EXTEND:
4910	  fputs ("ext", file); /* bmsk allows predication.  */
4911	  goto size_suffix;
4912	case SIGN_EXTEND: /* Unconditional.  */
4913	  fputs ("sex", file);
4914	size_suffix:
4915	  switch (GET_MODE (XEXP (x, 0)))
4916	    {
4917	    case E_QImode: fputs ("b", file); return;
4918	    case E_HImode: fputs ("w", file); return;
4919	    default: break;
4920	    }
4921	  break;
4922	case SS_TRUNCATE:
4923	  if (GET_MODE (x) != HImode)
4924	    break;
4925	  fputs ("sat16", file);
4926	default: break;
4927	}
4928      output_operand_lossage ("invalid operand to %%O code"); return;
4929    case 'o':
4930      if (GET_CODE (x) == SYMBOL_REF)
4931	{
4932	  assemble_name (file, XSTR (x, 0));
4933	  return;
4934	}
4935      break;
4936    case '&':
4937      if (TARGET_ANNOTATE_ALIGN)
4938	fprintf (file, "; unalign: %d", cfun->machine->unalign);
4939      return;
4940    case '+':
4941      if (TARGET_V2)
4942	fputs ("m", file);
4943      else
4944	fputs ("h", file);
4945      return;
4946    case '_':
4947      if (TARGET_V2)
4948	fputs ("h", file);
4949      else
4950	fputs ("w", file);
4951      return;
4952    default :
4953      /* Unknown flag.  */
4954      output_operand_lossage ("invalid operand output code");
4955    }
4956
4957  switch (GET_CODE (x))
4958    {
4959    case REG :
4960      fputs (reg_names[REGNO (x)], file);
4961      break;
4962    case MEM :
4963      {
4964	rtx addr = XEXP (x, 0);
4965	int size = GET_MODE_SIZE (GET_MODE (x));
4966
4967	if (legitimate_small_data_address_p (addr, GET_MODE (x)))
4968	  output_sdata = 1;
4969
4970	fputc ('[', file);
4971
4972	switch (GET_CODE (addr))
4973	  {
4974	  case PRE_INC: case POST_INC:
4975	    output_address (VOIDmode,
4976			    plus_constant (Pmode, XEXP (addr, 0), size)); break;
4977	  case PRE_DEC: case POST_DEC:
4978	    output_address (VOIDmode,
4979			    plus_constant (Pmode, XEXP (addr, 0), -size));
4980	    break;
4981	  case PRE_MODIFY: case POST_MODIFY:
4982	    output_address (VOIDmode, XEXP (addr, 1)); break;
4983	  case PLUS:
4984	    if (output_scaled)
4985	      {
4986		output_address (VOIDmode,
4987				plus_constant (Pmode, XEXP (addr, 0),
4988					       (INTVAL (XEXP (addr, 1))
4989						>> (size == 2 ? 1 : 2))));
4990		output_scaled = 0;
4991	      }
4992	    else
4993	      output_address (VOIDmode, addr);
4994	    break;
4995	  default:
4996	    if (flag_pic && CONSTANT_ADDRESS_P (addr))
4997	      arc_output_pic_addr_const (file, addr, code);
4998	    else
4999	      output_address (VOIDmode, addr);
5000	    break;
5001	  }
5002	fputc (']', file);
5003	break;
5004      }
5005    case CONST_DOUBLE :
5006      /* We handle SFmode constants here as output_addr_const doesn't.  */
5007      if (GET_MODE (x) == SFmode)
5008	{
5009	  long l;
5010
5011	  REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
5012	  fprintf (file, "0x%08lx", l);
5013	  break;
5014	}
5015      /* FALLTHRU */
5016      /* Let output_addr_const deal with it.  */
5017    default :
5018      if (flag_pic
5019	  || (GET_CODE (x) == CONST
5020	      && GET_CODE (XEXP (x, 0)) == UNSPEC
5021	      && (XINT (XEXP (x, 0), 1) == UNSPEC_TLS_OFF
5022		  || XINT (XEXP (x, 0), 1) == UNSPEC_TLS_GD))
5023	  || (GET_CODE (x) == CONST
5024	      && GET_CODE (XEXP (x, 0)) == PLUS
5025	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
5026	      && (XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_TLS_OFF
5027		  || XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_TLS_GD)))
5028	arc_output_pic_addr_const (file, x, code);
5029      else
5030	output_addr_const (file, x);
5031      break;
5032    }
5033}
5034
5035/* Print a memory address as an operand to reference that memory location.  */
5036
5037void
5038arc_print_operand_address (FILE *file , rtx addr)
5039{
5040  rtx base, index = 0;
5041
5042  switch (GET_CODE (addr))
5043    {
5044    case REG :
5045      fputs (reg_names[REGNO (addr)], file);
5046      break;
5047    case SYMBOL_REF:
5048      if (output_sdata)
5049	fputs ("gp,", file);
5050      output_addr_const (file, addr);
5051      if (output_sdata)
5052	fputs ("@sda", file);
5053      output_sdata = 0;
5054      break;
5055    case PLUS :
5056      if (GET_CODE (XEXP (addr, 0)) == MULT)
5057	index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1);
5058      else if (CONST_INT_P (XEXP (addr, 0)))
5059	index = XEXP (addr, 0), base = XEXP (addr, 1);
5060      else
5061	base = XEXP (addr, 0), index = XEXP (addr, 1);
5062
5063      gcc_assert (OBJECT_P (base));
5064      arc_print_operand_address (file, base);
5065      if (CONSTANT_P (base) && CONST_INT_P (index))
5066	fputc ('+', file);
5067      else
5068	fputc (',', file);
5069      gcc_assert (OBJECT_P (index));
5070      arc_print_operand_address (file, index);
5071      break;
5072    case CONST:
5073      {
5074	rtx c = XEXP (addr, 0);
5075
5076	if ((GET_CODE (c) == UNSPEC
5077	     && (XINT (c, 1) == UNSPEC_TLS_OFF
5078		 || XINT (c, 1) == UNSPEC_TLS_IE))
5079	    || (GET_CODE (c) == PLUS
5080		&& GET_CODE (XEXP (c, 0)) == UNSPEC
5081		&& (XINT (XEXP (c, 0), 1) == UNSPEC_TLS_OFF
5082		    || XINT (XEXP (c, 0), 1) == ARC_UNSPEC_GOTOFFPC)))
5083	  {
5084	    arc_output_pic_addr_const (file, c, 0);
5085	    break;
5086	  }
5087	gcc_assert (GET_CODE (c) == PLUS);
5088	gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF);
5089	gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT);
5090
5091	output_address (VOIDmode, XEXP (addr, 0));
5092
5093	break;
5094      }
5095    case PRE_INC :
5096    case PRE_DEC :
5097      /* We shouldn't get here as we've lost the mode of the memory object
5098	 (which says how much to inc/dec by.  */
5099      gcc_unreachable ();
5100      break;
5101    default :
5102      if (flag_pic)
5103	arc_output_pic_addr_const (file, addr, 0);
5104      else
5105	output_addr_const (file, addr);
5106      break;
5107    }
5108}
5109
5110/* Conditional execution support.
5111
5112   This is based on the ARM port but for now is much simpler.
5113
5114   A finite state machine takes care of noticing whether or not instructions
5115   can be conditionally executed, and thus decrease execution time and code
5116   size by deleting branch instructions.  The fsm is controlled by
5117   arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the
5118   actions of PRINT_OPERAND.  The patterns in the .md file for the branch
5119   insns also have a hand in this.  */
5120/* The way we leave dealing with non-anulled or annull-false delay slot
5121   insns to the consumer is awkward.  */
5122
5123/* The state of the fsm controlling condition codes are:
5124   0: normal, do nothing special
5125   1: don't output this insn
5126   2: don't output this insn
5127   3: make insns conditional
5128   4: make insns conditional
5129   5: make insn conditional (only for outputting anulled delay slot insns)
5130
5131   special value for cfun->machine->uid_ccfsm_state:
5132   6: return with but one insn before it since function start / call
5133
5134   State transitions (state->state by whom, under what condition):
5135   0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over
5136          some instructions.
5137   0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed
5138          by zero or more non-jump insns and an unconditional branch with
5139	  the same target label as the condbranch.
5140   1 -> 3 branch patterns, after having not output the conditional branch
5141   2 -> 4 branch patterns, after having not output the conditional branch
5142   0 -> 5 branch patterns, for anulled delay slot insn.
5143   3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached
5144          (the target label has CODE_LABEL_NUMBER equal to
5145	  arc_ccfsm_target_label).
5146   4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached
5147   3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns.
5148   5 -> 0 when outputting the delay slot insn
5149
5150   If the jump clobbers the conditions then we use states 2 and 4.
5151
5152   A similar thing can be done with conditional return insns.
5153
5154   We also handle separating branches from sets of the condition code.
5155   This is done here because knowledge of the ccfsm state is required,
5156   we may not be outputting the branch.  */
5157
5158/* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current,
5159   before letting final output INSN.  */
5160
5161static void
5162arc_ccfsm_advance (rtx_insn *insn, struct arc_ccfsm *state)
5163{
5164  /* BODY will hold the body of INSN.  */
5165  rtx body;
5166
5167  /* This will be 1 if trying to repeat the trick (ie: do the `else' part of
5168     an if/then/else), and things need to be reversed.  */
5169  int reverse = 0;
5170
5171  /* If we start with a return insn, we only succeed if we find another one.  */
5172  int seeking_return = 0;
5173
5174  /* START_INSN will hold the insn from where we start looking.  This is the
5175     first insn after the following code_label if REVERSE is true.  */
5176  rtx_insn *start_insn = insn;
5177
5178  /* Type of the jump_insn. Brcc insns don't affect ccfsm changes,
5179     since they don't rely on a cmp preceding the.  */
5180  enum attr_type jump_insn_type;
5181
5182  /* Allow -mdebug-ccfsm to turn this off so we can see how well it does.
5183     We can't do this in macro FINAL_PRESCAN_INSN because its called from
5184     final_scan_insn which has `optimize' as a local.  */
5185  if (optimize < 2 || TARGET_NO_COND_EXEC)
5186    return;
5187
5188  /* Ignore notes and labels.  */
5189  if (!INSN_P (insn))
5190    return;
5191  body = PATTERN (insn);
5192  /* If in state 4, check if the target branch is reached, in order to
5193     change back to state 0.  */
5194  if (state->state == 4)
5195    {
5196      if (insn == state->target_insn)
5197	{
5198	  state->target_insn = NULL;
5199	  state->state = 0;
5200	}
5201      return;
5202    }
5203
5204  /* If in state 3, it is possible to repeat the trick, if this insn is an
5205     unconditional branch to a label, and immediately following this branch
5206     is the previous target label which is only used once, and the label this
5207     branch jumps to is not too far off.  Or in other words "we've done the
5208     `then' part, see if we can do the `else' part."  */
5209  if (state->state == 3)
5210    {
5211      if (simplejump_p (insn))
5212	{
5213	  start_insn = next_nonnote_insn (start_insn);
5214	  if (GET_CODE (start_insn) == BARRIER)
5215	    {
5216	      /* ??? Isn't this always a barrier?  */
5217	      start_insn = next_nonnote_insn (start_insn);
5218	    }
5219	  if (GET_CODE (start_insn) == CODE_LABEL
5220	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
5221	      && LABEL_NUSES (start_insn) == 1)
5222	    reverse = TRUE;
5223	  else
5224	    return;
5225	}
5226      else if (GET_CODE (body) == SIMPLE_RETURN)
5227	{
5228	  start_insn = next_nonnote_insn (start_insn);
5229	  if (GET_CODE (start_insn) == BARRIER)
5230	    start_insn = next_nonnote_insn (start_insn);
5231	  if (GET_CODE (start_insn) == CODE_LABEL
5232	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
5233	      && LABEL_NUSES (start_insn) == 1)
5234	    {
5235	      reverse = TRUE;
5236	      seeking_return = 1;
5237	    }
5238	  else
5239	    return;
5240	}
5241      else
5242	return;
5243    }
5244
5245  if (GET_CODE (insn) != JUMP_INSN
5246      || GET_CODE (PATTERN (insn)) == ADDR_VEC
5247      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
5248    return;
5249
5250 /* We can't predicate BRCC or loop ends.
5251    Also, when generating PIC code, and considering a medium range call,
5252    we can't predicate the call.  */
5253  jump_insn_type = get_attr_type (insn);
5254  if (jump_insn_type == TYPE_BRCC
5255      || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT
5256      || jump_insn_type == TYPE_LOOP_END
5257      || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn)))
5258    return;
5259
5260  /* This jump might be paralleled with a clobber of the condition codes,
5261     the jump should always come first.  */
5262  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
5263    body = XVECEXP (body, 0, 0);
5264
5265  if (reverse
5266      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
5267	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
5268    {
5269      int insns_skipped = 0, fail = FALSE, succeed = FALSE;
5270      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
5271      int then_not_else = TRUE;
5272      /* Nonzero if next insn must be the target label.  */
5273      int next_must_be_target_label_p;
5274      rtx_insn *this_insn = start_insn;
5275      rtx label = 0;
5276
5277      /* Register the insn jumped to.  */
5278      if (reverse)
5279	{
5280	  if (!seeking_return)
5281	    label = XEXP (SET_SRC (body), 0);
5282	}
5283      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
5284	label = XEXP (XEXP (SET_SRC (body), 1), 0);
5285      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
5286	{
5287	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
5288	  then_not_else = FALSE;
5289	}
5290      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN)
5291	seeking_return = 1;
5292      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN)
5293	{
5294	  seeking_return = 1;
5295	  then_not_else = FALSE;
5296	}
5297      else
5298	gcc_unreachable ();
5299
5300      /* If this is a non-annulled branch with a delay slot, there is
5301	 no need to conditionalize the delay slot.  */
5302      if ((GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) == SEQUENCE)
5303	  && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn))
5304	{
5305	  this_insn = NEXT_INSN (this_insn);
5306	}
5307      /* See how many insns this branch skips, and what kind of insns.  If all
5308	 insns are okay, and the label or unconditional branch to the same
5309	 label is not too far away, succeed.  */
5310      for (insns_skipped = 0, next_must_be_target_label_p = FALSE;
5311	   !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED;
5312	   insns_skipped++)
5313	{
5314	  rtx scanbody;
5315
5316	  this_insn = next_nonnote_insn (this_insn);
5317	  if (!this_insn)
5318	    break;
5319
5320	  if (next_must_be_target_label_p)
5321	    {
5322	      if (GET_CODE (this_insn) == BARRIER)
5323		continue;
5324	      if (GET_CODE (this_insn) == CODE_LABEL
5325		  && this_insn == label)
5326		{
5327		  state->state = 1;
5328		  succeed = TRUE;
5329		}
5330	      else
5331		fail = TRUE;
5332	      break;
5333	    }
5334
5335	  switch (GET_CODE (this_insn))
5336	    {
5337	    case CODE_LABEL:
5338	      /* Succeed if it is the target label, otherwise fail since
5339		 control falls in from somewhere else.  */
5340	      if (this_insn == label)
5341		{
5342		  state->state = 1;
5343		  succeed = TRUE;
5344		}
5345	      else
5346		fail = TRUE;
5347	      break;
5348
5349	    case BARRIER:
5350	      /* Succeed if the following insn is the target label.
5351		 Otherwise fail.
5352		 If return insns are used then the last insn in a function
5353		 will be a barrier.  */
5354	      next_must_be_target_label_p = TRUE;
5355	      break;
5356
5357	    case CALL_INSN:
5358	      /* Can handle a call insn if there are no insns after it.
5359		 IE: The next "insn" is the target label.  We don't have to
5360		 worry about delay slots as such insns are SEQUENCE's inside
5361		 INSN's.  ??? It is possible to handle such insns though.  */
5362	      if (get_attr_cond (this_insn) == COND_CANUSE)
5363		next_must_be_target_label_p = TRUE;
5364	      else
5365		fail = TRUE;
5366	      break;
5367
5368	    case JUMP_INSN:
5369	      scanbody = PATTERN (this_insn);
5370
5371	      /* If this is an unconditional branch to the same label, succeed.
5372		 If it is to another label, do nothing.  If it is conditional,
5373		 fail.  */
5374	      /* ??? Probably, the test for the SET and the PC are
5375		 unnecessary.  */
5376
5377	      if (GET_CODE (scanbody) == SET
5378		  && GET_CODE (SET_DEST (scanbody)) == PC)
5379		{
5380		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
5381		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
5382		    {
5383		      state->state = 2;
5384		      succeed = TRUE;
5385		    }
5386		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
5387		    fail = TRUE;
5388		  else if (get_attr_cond (this_insn) != COND_CANUSE)
5389		    fail = TRUE;
5390		}
5391	      else if (GET_CODE (scanbody) == SIMPLE_RETURN
5392		       && seeking_return)
5393		{
5394		  state->state = 2;
5395		  succeed = TRUE;
5396		}
5397	      else if (GET_CODE (scanbody) == PARALLEL)
5398		{
5399		  if (get_attr_cond (this_insn) != COND_CANUSE)
5400		    fail = TRUE;
5401		}
5402	      break;
5403
5404	    case INSN:
5405	      scanbody = PATTERN (this_insn);
5406
5407	      /* We can only do this with insns that can use the condition
5408		 codes (and don't set them).  */
5409	      if (GET_CODE (scanbody) == SET
5410		  || GET_CODE (scanbody) == PARALLEL)
5411		{
5412		  if (get_attr_cond (this_insn) != COND_CANUSE)
5413		    fail = TRUE;
5414		}
5415	      /* We can't handle other insns like sequences.  */
5416	      else
5417		fail = TRUE;
5418	      break;
5419
5420	    default:
5421	      break;
5422	    }
5423	}
5424
5425      if (succeed)
5426	{
5427	  if ((!seeking_return) && (state->state == 1 || reverse))
5428	    state->target_label = CODE_LABEL_NUMBER (label);
5429	  else if (seeking_return || state->state == 2)
5430	    {
5431	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
5432		{
5433		  this_insn = next_nonnote_insn (this_insn);
5434
5435		  gcc_assert (!this_insn ||
5436			      (GET_CODE (this_insn) != BARRIER
5437			       && GET_CODE (this_insn) != CODE_LABEL));
5438		}
5439	      if (!this_insn)
5440		{
5441		  /* Oh dear! we ran off the end, give up.  */
5442		  extract_insn_cached (insn);
5443		  state->state = 0;
5444		  state->target_insn = NULL;
5445		  return;
5446		}
5447	      state->target_insn = this_insn;
5448	    }
5449	  else
5450	    gcc_unreachable ();
5451
5452	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
5453	     what it was.  */
5454	  if (!reverse)
5455	    {
5456	      state->cond = XEXP (SET_SRC (body), 0);
5457	      state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0));
5458	    }
5459
5460	  if (reverse || then_not_else)
5461	    state->cc = ARC_INVERSE_CONDITION_CODE (state->cc);
5462	}
5463
5464      /* Restore recog_operand.  Getting the attributes of other insns can
5465	 destroy this array, but final.cc assumes that it remains intact
5466	 across this call; since the insn has been recognized already we
5467	 call insn_extract direct.  */
5468      extract_insn_cached (insn);
5469    }
5470}
5471
5472/* Record that we are currently outputting label NUM with prefix PREFIX.
5473   It it's the label we're looking for, reset the ccfsm machinery.
5474
5475   Called from ASM_OUTPUT_INTERNAL_LABEL.  */
5476
5477static void
5478arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state)
5479{
5480  if (state->state == 3 && state->target_label == num
5481      && !strcmp (prefix, "L"))
5482    {
5483      state->state = 0;
5484      state->target_insn = NULL;
5485    }
5486}
5487
5488/* We are considering a conditional branch with the condition COND.
5489   Check if we want to conditionalize a delay slot insn, and if so modify
5490   the ccfsm state accordingly.
5491   REVERSE says branch will branch when the condition is false.  */
5492void
5493arc_ccfsm_record_condition (rtx cond, bool reverse, rtx_insn *jump,
5494			    struct arc_ccfsm *state)
5495{
5496  rtx_insn *seq_insn = NEXT_INSN (PREV_INSN (jump));
5497  if (!state)
5498    state = &arc_ccfsm_current;
5499
5500  gcc_assert (state->state == 0);
5501  if (seq_insn != jump)
5502    {
5503      rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1);
5504
5505      if (!as_a<rtx_insn *> (insn)->deleted ()
5506	  && INSN_ANNULLED_BRANCH_P (jump)
5507	  && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn)))
5508	{
5509	  state->cond = cond;
5510	  state->cc = get_arc_condition_code (cond);
5511	  if (!reverse)
5512	    arc_ccfsm_current.cc
5513	      = ARC_INVERSE_CONDITION_CODE (state->cc);
5514	  rtx pat = PATTERN (insn);
5515	  if (GET_CODE (pat) == COND_EXEC)
5516	    gcc_assert ((INSN_FROM_TARGET_P (insn)
5517			 ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc)
5518			== get_arc_condition_code (XEXP (pat, 0)));
5519	  else
5520	    state->state = 5;
5521	}
5522    }
5523}
5524
5525/* Update *STATE as we would when we emit INSN.  */
5526
5527static void
5528arc_ccfsm_post_advance (rtx_insn *insn, struct arc_ccfsm *state)
5529{
5530  enum attr_type type;
5531
5532  if (LABEL_P (insn))
5533    arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state);
5534  else if (JUMP_P (insn)
5535	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
5536	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
5537	   && ((type = get_attr_type (insn)) == TYPE_BRANCH
5538	       || ((type == TYPE_UNCOND_BRANCH
5539		    || type == TYPE_RETURN)
5540		   && ARC_CCFSM_BRANCH_DELETED_P (state))))
5541    {
5542      if (ARC_CCFSM_BRANCH_DELETED_P (state))
5543	ARC_CCFSM_RECORD_BRANCH_DELETED (state);
5544      else
5545	{
5546	  rtx src = SET_SRC (PATTERN (insn));
5547	  arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx,
5548				      insn, state);
5549	}
5550    }
5551  else if (arc_ccfsm_current.state == 5)
5552    arc_ccfsm_current.state = 0;
5553}
5554
5555/* Return true if the current insn, which is a conditional branch, is to be
5556   deleted.  */
5557
5558bool
5559arc_ccfsm_branch_deleted_p (void)
5560{
5561  return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current);
5562}
5563
5564/* Record a branch isn't output because subsequent insns can be
5565   conditionalized.  */
5566
5567void
5568arc_ccfsm_record_branch_deleted (void)
5569{
5570  ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current);
5571}
5572
5573/* During insn output, indicate if the current insn is predicated.  */
5574
5575bool
5576arc_ccfsm_cond_exec_p (void)
5577{
5578  return (cfun->machine->prescan_initialized
5579	  && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current));
5580}
5581
5582/* When deciding if an insn should be output short, we want to know something
5583   about the following insns:
5584   - if another insn follows which we know we can output as a short insn
5585     before an alignment-sensitive point, we can output this insn short:
5586     the decision about the eventual alignment can be postponed.
5587   - if a to-be-aligned label comes next, we should output this insn such
5588     as to get / preserve 4-byte alignment.
5589   - if a likely branch without delay slot insn, or a call with an immediately
5590     following short insn comes next, we should out output this insn such as to
5591     get / preserve 2 mod 4 unalignment.
5592   - do the same for a not completely unlikely branch with a short insn
5593     following before any other branch / label.
5594   - in order to decide if we are actually looking at a branch, we need to
5595     call arc_ccfsm_advance.
5596   - in order to decide if we are looking at a short insn, we should know
5597     if it is conditionalized.  To a first order of approximation this is
5598     the case if the state from arc_ccfsm_advance from before this insn
5599     indicates the insn is conditionalized.  However, a further refinement
5600     could be to not conditionalize an insn if the destination register(s)
5601     is/are dead in the non-executed case.  */
5602/* Return non-zero if INSN should be output as a short insn.  UNALIGN is
5603   zero if the current insn is aligned to a 4-byte-boundary, two otherwise.
5604   If CHECK_ATTR is greater than 0, check the iscompact attribute first.  */
5605
5606static int
5607arc_verify_short (rtx_insn *insn, int, int check_attr)
5608{
5609  enum attr_iscompact iscompact;
5610
5611  if (check_attr > 0)
5612    {
5613      iscompact = get_attr_iscompact (insn);
5614      if (iscompact == ISCOMPACT_FALSE)
5615	return 0;
5616    }
5617
5618  return (get_attr_length (insn) & 2) != 0;
5619}
5620
5621/* When outputting an instruction (alternative) that can potentially be short,
5622   output the short suffix if the insn is in fact short, and update
5623   cfun->machine->unalign accordingly.  */
5624
5625static void
5626output_short_suffix (FILE *file)
5627{
5628  rtx_insn *insn = current_output_insn;
5629  if (!insn)
5630    return;
5631
5632  if (arc_verify_short (insn, cfun->machine->unalign, 1))
5633    {
5634      fprintf (file, "_s");
5635      cfun->machine->unalign ^= 2;
5636    }
5637  /* Restore recog_operand.  */
5638  extract_insn_cached (insn);
5639}
5640
5641/* Implement FINAL_PRESCAN_INSN.  */
5642
5643void
5644arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
5645			int noperands ATTRIBUTE_UNUSED)
5646{
5647  if (TARGET_DUMPISIZE)
5648    fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5649
5650  if (!cfun->machine->prescan_initialized)
5651    {
5652      /* Clear lingering state from branch shortening.  */
5653      memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current);
5654      cfun->machine->prescan_initialized = 1;
5655    }
5656  arc_ccfsm_advance (insn, &arc_ccfsm_current);
5657}
5658
5659/* Given FROM and TO register numbers, say whether this elimination is allowed.
5660   Frame pointer elimination is automatically handled.
5661
5662   All eliminations are permissible. If we need a frame
5663   pointer, we must eliminate ARG_POINTER_REGNUM into
5664   FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
5665
5666static bool
5667arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
5668{
5669  return ((to == HARD_FRAME_POINTER_REGNUM) || (to == STACK_POINTER_REGNUM));
5670}
5671
5672/* Define the offset between two registers, one to be eliminated, and
5673   the other its replacement, at the start of a routine.  */
5674
5675int
5676arc_initial_elimination_offset (int from, int to)
5677{
5678  if (!cfun->machine->frame_info.initialized)
5679    arc_compute_frame_size ();
5680
5681  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5682    {
5683      return (cfun->machine->frame_info.extra_size
5684	      + cfun->machine->frame_info.reg_size);
5685    }
5686
5687  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5688    {
5689      return (cfun->machine->frame_info.total_size
5690	      - cfun->machine->frame_info.pretend_size);
5691    }
5692
5693  if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
5694    {
5695      return (cfun->machine->frame_info.total_size
5696	      - (cfun->machine->frame_info.pretend_size
5697	      + cfun->machine->frame_info.extra_size
5698	      + cfun->machine->frame_info.reg_size));
5699    }
5700  if ((from == FRAME_POINTER_REGNUM) && (to == HARD_FRAME_POINTER_REGNUM))
5701    return 0;
5702
5703  gcc_unreachable ();
5704}
5705
5706static bool
5707arc_frame_pointer_required (void)
5708{
5709 return cfun->calls_alloca || crtl->calls_eh_return;
5710}
5711
5712
5713/* Return the destination address of a branch.  */
5714
5715static int
5716branch_dest (rtx branch)
5717{
5718  rtx pat = PATTERN (branch);
5719  rtx dest = (GET_CODE (pat) == PARALLEL
5720	      ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat));
5721  int dest_uid;
5722
5723  if (GET_CODE (dest) == IF_THEN_ELSE)
5724    dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1);
5725
5726  dest = XEXP (dest, 0);
5727  dest_uid = INSN_UID (dest);
5728
5729  return INSN_ADDRESSES (dest_uid);
5730}
5731
5732
5733/* Implement TARGET_ENCODE_SECTION_INFO hook.  */
5734
5735static void
5736arc_encode_section_info (tree decl, rtx rtl, int first)
5737{
5738  /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION.
5739     This clears machine specific flags, so has to come first.  */
5740  default_encode_section_info (decl, rtl, first);
5741
5742  /* Check if it is a function, and whether it has the
5743     [long/medium/short]_call attribute specified.  */
5744  if (TREE_CODE (decl) == FUNCTION_DECL)
5745    {
5746      rtx symbol = XEXP (rtl, 0);
5747      int flags = SYMBOL_REF_FLAGS (symbol);
5748
5749      tree attr = (TREE_TYPE (decl) != error_mark_node
5750		   ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE);
5751      tree long_call_attr = lookup_attribute ("long_call", attr);
5752      tree medium_call_attr = lookup_attribute ("medium_call", attr);
5753      tree short_call_attr = lookup_attribute ("short_call", attr);
5754
5755      if (long_call_attr != NULL_TREE)
5756	flags |= SYMBOL_FLAG_LONG_CALL;
5757      else if (medium_call_attr != NULL_TREE)
5758	flags |= SYMBOL_FLAG_MEDIUM_CALL;
5759      else if (short_call_attr != NULL_TREE)
5760	flags |= SYMBOL_FLAG_SHORT_CALL;
5761
5762      SYMBOL_REF_FLAGS (symbol) = flags;
5763    }
5764  else if (TREE_CODE (decl) == VAR_DECL)
5765    {
5766      rtx symbol = XEXP (rtl, 0);
5767
5768      tree attr = (TREE_TYPE (decl) != error_mark_node
5769		   ? DECL_ATTRIBUTES (decl) : NULL_TREE);
5770
5771      tree sec_attr = lookup_attribute ("section", attr);
5772      if (sec_attr)
5773	{
5774	  const char *sec_name
5775	    = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (sec_attr)));
5776	  if (strcmp (sec_name, ".cmem") == 0
5777	      || strcmp (sec_name, ".cmem_shared") == 0
5778	      || strcmp (sec_name, ".cmem_private") == 0)
5779	    SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_CMEM;
5780	}
5781    }
5782}
5783
5784/* This is how to output a definition of an internal numbered label where
5785   PREFIX is the class of label and NUM is the number within the class.  */
5786
5787static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
5788{
5789  if (cfun)
5790    arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current);
5791  default_internal_label (stream, prefix, labelno);
5792}
5793
5794/* Set the cpu type and print out other fancy things,
5795   at the top of the file.  */
5796
5797static void arc_file_start (void)
5798{
5799  default_file_start ();
5800  fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
5801
5802  /* Set some want to have build attributes.  */
5803  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_PCS_config, %d\n",
5804	       ATTRIBUTE_PCS);
5805  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_rf16, %d\n",
5806	       TARGET_RF16 ? 1 : 0);
5807  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_pic, %d\n",
5808	       flag_pic ? 2 : 0);
5809  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_tls, %d\n",
5810	       (arc_tp_regno != -1) ? 1 : 0);
5811  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_sda, %d\n",
5812	       TARGET_NO_SDATA_SET ? 0 : 2);
5813  asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_exceptions, %d\n",
5814	       TARGET_OPTFPE ? 1 : 0);
5815  if (TARGET_V2)
5816    asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_CPU_variation, %d\n",
5817		 (arc_tune < ARC_TUNE_CORE_3) ? 2 :
5818		 (arc_tune == ARC_TUNE_CORE_3 ? 3 : 4));
5819}
5820
5821/* Implement `TARGET_ASM_FILE_END'.  */
5822/* Outputs to the stdio stream FILE jli related text.  */
5823
5824void arc_file_end (void)
5825{
5826  arc_jli_section *sec = arc_jli_sections;
5827
5828  while (sec != NULL)
5829    {
5830      fprintf (asm_out_file, "\n");
5831      fprintf (asm_out_file, "# JLI entry for function ");
5832      assemble_name (asm_out_file, sec->name);
5833      fprintf (asm_out_file, "\n\t.section .jlitab, \"axG\", @progbits, "
5834	       ".jlitab.");
5835      assemble_name (asm_out_file, sec->name);
5836      fprintf (asm_out_file,", comdat\n");
5837
5838      fprintf (asm_out_file, "\t.align\t4\n");
5839      fprintf (asm_out_file, "__jli.");
5840      assemble_name (asm_out_file, sec->name);
5841      fprintf (asm_out_file, ":\n\t.weak __jli.");
5842      assemble_name (asm_out_file, sec->name);
5843      fprintf (asm_out_file, "\n\tb\t@");
5844      assemble_name (asm_out_file, sec->name);
5845      fprintf (asm_out_file, "\n");
5846      sec = sec->next;
5847    }
5848  file_end_indicate_exec_stack ();
5849}
5850
5851/* Cost functions.  */
5852
5853/* Compute a (partial) cost for rtx X.  Return true if the complete
5854   cost has been computed, and false if subexpressions should be
5855   scanned.  In either case, *TOTAL contains the cost result.  */
5856
5857static bool
5858arc_rtx_costs (rtx x, machine_mode mode, int outer_code,
5859	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
5860{
5861  int code = GET_CODE (x);
5862
5863  switch (code)
5864    {
5865      /* Small integers are as cheap as registers.  */
5866    case CONST_INT:
5867      {
5868	bool nolimm = false; /* Can we do without long immediate?  */
5869
5870	nolimm = false;
5871	if (UNSIGNED_INT6 (INTVAL (x)))
5872	  nolimm = true;
5873	else
5874	  {
5875	    switch (outer_code)
5876	      {
5877	      case AND: /* bclr, bmsk, ext[bw] */
5878		if (satisfies_constraint_Ccp (x) /* bclr */
5879		    || satisfies_constraint_C1p (x) /* bmsk */)
5880		  nolimm = true;
5881		break;
5882	      case IOR: /* bset */
5883		if (satisfies_constraint_C0p (x)) /* bset */
5884		  nolimm = true;
5885		break;
5886	      case XOR:
5887		if (satisfies_constraint_C0p (x)) /* bxor */
5888		  nolimm = true;
5889		break;
5890	      case SET:
5891		if (UNSIGNED_INT8 (INTVAL (x)))
5892		  nolimm = true;
5893		if (satisfies_constraint_Chi (x))
5894		  nolimm = true;
5895		if (satisfies_constraint_Clo (x))
5896		  nolimm = true;
5897		break;
5898	      case MULT:
5899		if (TARGET_MUL64_SET)
5900		  if (SIGNED_INT12 (INTVAL (x)))
5901		    nolimm = true;
5902		break;
5903	      default:
5904		break;
5905	      }
5906	  }
5907	if (nolimm)
5908	  {
5909	    *total = 0;
5910	    return true;
5911	  }
5912      }
5913      /* FALLTHRU */
5914
5915      /*  4 byte values can be fetched as immediate constants -
5916	  let's give that the cost of an extra insn.  */
5917    case CONST:
5918    case LABEL_REF:
5919    case SYMBOL_REF:
5920      *total = speed ? COSTS_N_INSNS (1) : COSTS_N_INSNS (4);
5921      return true;
5922
5923    case CONST_DOUBLE:
5924      {
5925	rtx first, second;
5926
5927	if (TARGET_DPFP)
5928	  {
5929	    *total = COSTS_N_INSNS (1);
5930	    return true;
5931	  }
5932	split_double (x, &first, &second);
5933	*total = COSTS_N_INSNS (!SMALL_INT (INTVAL (first))
5934				+ !SMALL_INT (INTVAL (second)));
5935	return true;
5936      }
5937
5938    /* Encourage synth_mult to find a synthetic multiply when reasonable.
5939       If we need more than 12 insns to do a multiply, then go out-of-line,
5940       since the call overhead will be < 10% of the cost of the multiply.  */
5941    case ASHIFT:
5942    case ASHIFTRT:
5943    case LSHIFTRT:
5944      if (TARGET_BARREL_SHIFTER)
5945	{
5946	  if (CONSTANT_P (XEXP (x, 0)))
5947	    {
5948	      *total += rtx_cost (XEXP (x, 1), mode, (enum rtx_code) code,
5949				  0, speed);
5950	      return true;
5951	    }
5952	  *total = COSTS_N_INSNS (1);
5953	}
5954      else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5955	*total = COSTS_N_INSNS (16);
5956      else
5957	{
5958	  *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
5959	  /* ??? want_to_gcse_p can throw negative shift counts at us,
5960	     and then panics when it gets a negative cost as result.
5961	     Seen for gcc.c-torture/compile/20020710-1.c -Os .  */
5962	  if (*total < 0)
5963	    *total = 0;
5964	}
5965      return false;
5966
5967    case DIV:
5968    case UDIV:
5969      if (GET_MODE_CLASS (mode) == MODE_FLOAT
5970	  && (TARGET_FP_SP_SQRT || TARGET_FP_DP_SQRT))
5971	*total = COSTS_N_INSNS(1);
5972      else if (GET_MODE_CLASS (mode) == MODE_INT
5973	       && TARGET_DIVREM)
5974	*total = COSTS_N_INSNS(1);
5975      else if (speed)
5976	*total = COSTS_N_INSNS(30);
5977      else
5978	*total = COSTS_N_INSNS(1);
5979	return false;
5980
5981    case MULT:
5982      if ((TARGET_DPFP && GET_MODE (x) == DFmode))
5983	*total = COSTS_N_INSNS (1);
5984      else if (speed)
5985	*total= arc_multcost;
5986      /* We do not want synth_mult sequences when optimizing
5987	 for size.  */
5988      else if (TARGET_ANY_MPY)
5989	*total = COSTS_N_INSNS (1);
5990      else
5991	*total = COSTS_N_INSNS (2);
5992      return false;
5993
5994    case PLUS:
5995      if (outer_code == MEM && CONST_INT_P (XEXP (x, 1))
5996	  && RTX_OK_FOR_OFFSET_P (mode, XEXP (x, 1)))
5997	{
5998	  *total = 0;
5999	  return true;
6000	}
6001
6002      if ((GET_CODE (XEXP (x, 0)) == ASHIFT
6003	   && _1_2_3_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
6004          || (GET_CODE (XEXP (x, 0)) == MULT
6005              && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode)))
6006	{
6007	  if (CONSTANT_P (XEXP (x, 1)) && !speed)
6008	    *total += COSTS_N_INSNS (4);
6009	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, PLUS, 1, speed);
6010	  return true;
6011	}
6012      return false;
6013    case MINUS:
6014      if ((GET_CODE (XEXP (x, 1)) == ASHIFT
6015	   && _1_2_3_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
6016          || (GET_CODE (XEXP (x, 1)) == MULT
6017              && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode)))
6018	{
6019	  if (CONSTANT_P (XEXP (x, 0)) && !speed)
6020	    *total += COSTS_N_INSNS (4);
6021	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, PLUS, 1, speed);
6022	  return true;
6023	}
6024      return false;
6025
6026    case COMPARE:
6027      {
6028	rtx op0 = XEXP (x, 0);
6029	rtx op1 = XEXP (x, 1);
6030
6031	if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
6032	    && XEXP (op0, 1) == const1_rtx)
6033	  {
6034	    /* btst / bbit0 / bbit1:
6035	       Small integers and registers are free; everything else can
6036	       be put in a register.  */
6037	    mode = GET_MODE (XEXP (op0, 0));
6038	    *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed)
6039		      + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed));
6040	    return true;
6041	  }
6042	if (GET_CODE (op0) == AND && op1 == const0_rtx
6043	    && satisfies_constraint_C1p (XEXP (op0, 1)))
6044	  {
6045	    /* bmsk.f */
6046	    *total = rtx_cost (XEXP (op0, 0), VOIDmode, SET, 1, speed);
6047	    return true;
6048	  }
6049	/* add.f  */
6050	if (GET_CODE (op1) == NEG)
6051	  {
6052	    /* op0 might be constant, the inside of op1 is rather
6053	       unlikely to be so.  So swapping the operands might lower
6054	       the cost.  */
6055	    mode = GET_MODE (op0);
6056	    *total = (rtx_cost (op0, mode, PLUS, 1, speed)
6057		      + rtx_cost (XEXP (op1, 0), mode, PLUS, 0, speed));
6058	  }
6059	return false;
6060      }
6061    case EQ: case NE:
6062      if (outer_code == IF_THEN_ELSE
6063	  && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
6064	  && XEXP (x, 1) == const0_rtx
6065	  && XEXP (XEXP (x, 0), 1) == const1_rtx)
6066	{
6067	  /* btst / bbit0 / bbit1:
6068	     Small integers and registers are free; everything else can
6069	     be put in a register.  */
6070	  rtx op0 = XEXP (x, 0);
6071
6072	  mode = GET_MODE (XEXP (op0, 0));
6073	  *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed)
6074		    + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed));
6075	  return true;
6076	}
6077      /* Fall through.  */
6078    /* scc_insn expands into two insns.  */
6079    case GTU: case GEU: case LEU:
6080      if (mode == SImode)
6081	*total += COSTS_N_INSNS (1);
6082      return false;
6083    case LTU: /* might use adc.  */
6084      if (mode == SImode)
6085	*total += COSTS_N_INSNS (1) - 1;
6086      return false;
6087    default:
6088      return false;
6089    }
6090}
6091
6092/* Return true if ADDR is a valid pic address.
6093   A valid pic address on arc should look like
6094   const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT))  */
6095
6096bool
6097arc_legitimate_pic_addr_p (rtx addr)
6098{
6099  if (GET_CODE (addr) != CONST)
6100    return false;
6101
6102  addr = XEXP (addr, 0);
6103
6104
6105  if (GET_CODE (addr) == PLUS)
6106    {
6107      if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
6108	return false;
6109      addr = XEXP (addr, 0);
6110    }
6111
6112  if (GET_CODE (addr) != UNSPEC
6113      || XVECLEN (addr, 0) != 1)
6114    return false;
6115
6116  /* Must be one of @GOT, @GOTOFF, @GOTOFFPC, @tlsgd, tlsie.  */
6117  if (XINT (addr, 1) != ARC_UNSPEC_GOT
6118      && XINT (addr, 1) != ARC_UNSPEC_GOTOFF
6119      && XINT (addr, 1) != ARC_UNSPEC_GOTOFFPC
6120      && XINT (addr, 1) != UNSPEC_TLS_GD
6121      && XINT (addr, 1) != UNSPEC_TLS_IE)
6122    return false;
6123
6124  if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF
6125      && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF)
6126    return false;
6127
6128  return true;
6129}
6130
6131
6132
6133/* Return true if OP contains a symbol reference.  */
6134
6135static bool
6136symbolic_reference_mentioned_p (rtx op)
6137{
6138  const char *fmt;
6139  int i;
6140
6141  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
6142    return true;
6143
6144  fmt = GET_RTX_FORMAT (GET_CODE (op));
6145  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6146    {
6147      if (fmt[i] == 'E')
6148	{
6149	  int j;
6150
6151	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6152	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
6153	      return true;
6154	}
6155
6156      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
6157	return true;
6158    }
6159
6160  return false;
6161}
6162
6163/* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec.
6164   If SKIP_LOCAL is true, skip symbols that bind locally.
6165   This is used further down in this file, and, without SKIP_LOCAL,
6166   in the addsi3 / subsi3 expanders when generating PIC code.  */
6167
6168bool
6169arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local)
6170{
6171  const char *fmt;
6172  int i;
6173
6174  if (GET_CODE(op) == UNSPEC)
6175    return false;
6176
6177  if (GET_CODE (op) == SYMBOL_REF)
6178    {
6179      if (SYMBOL_REF_TLS_MODEL (op))
6180	return true;
6181      if (!flag_pic)
6182	return false;
6183      tree decl = SYMBOL_REF_DECL (op);
6184      return !skip_local || !decl || !default_binds_local_p (decl);
6185    }
6186
6187  fmt = GET_RTX_FORMAT (GET_CODE (op));
6188  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6189    {
6190      if (fmt[i] == 'E')
6191	{
6192	  int j;
6193
6194	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6195	    if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j),
6196							skip_local))
6197	      return true;
6198	}
6199
6200      else if (fmt[i] == 'e'
6201	       && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i),
6202							  skip_local))
6203	return true;
6204    }
6205
6206  return false;
6207}
6208
6209/* The __tls_get_attr symbol.  */
6210static GTY(()) rtx arc_tls_symbol;
6211
6212/* Emit a call to __tls_get_addr.  TI is the argument to this function.
6213   RET is an RTX for the return value location.  The entire insn sequence
6214   is returned.  */
6215
6216static rtx
6217arc_call_tls_get_addr (rtx ti)
6218{
6219  rtx arg = gen_rtx_REG (Pmode, R0_REG);
6220  rtx ret = gen_rtx_REG (Pmode, R0_REG);
6221  rtx fn;
6222  rtx_insn *insn;
6223
6224  if (!arc_tls_symbol)
6225    arc_tls_symbol = init_one_libfunc ("__tls_get_addr");
6226
6227  emit_move_insn (arg, ti);
6228  fn = gen_rtx_MEM (SImode, arc_tls_symbol);
6229  insn = emit_call_insn (gen_call_value (ret, fn, const0_rtx));
6230  RTL_CONST_CALL_P (insn) = 1;
6231  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), ret);
6232  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), arg);
6233
6234  return ret;
6235}
6236
6237#define DTPOFF_ZERO_SYM ".tdata"
6238
6239/* Return a legitimized address for ADDR,
6240   which is a SYMBOL_REF with tls_model MODEL.  */
6241
6242static rtx
6243arc_legitimize_tls_address (rtx addr, enum tls_model model)
6244{
6245  rtx tmp;
6246
6247  if (!flag_pic && model == TLS_MODEL_LOCAL_DYNAMIC)
6248    model = TLS_MODEL_LOCAL_EXEC;
6249
6250
6251  /* The TP pointer needs to be set.  */
6252  gcc_assert (arc_tp_regno != -1);
6253
6254  switch (model)
6255    {
6256    case TLS_MODEL_GLOBAL_DYNAMIC:
6257      tmp = gen_reg_rtx (Pmode);
6258      emit_move_insn (tmp, arc_unspec_offset (addr, UNSPEC_TLS_GD));
6259      return arc_call_tls_get_addr (tmp);
6260
6261    case TLS_MODEL_LOCAL_DYNAMIC:
6262      rtx base;
6263      tree decl;
6264      const char *base_name;
6265
6266      decl = SYMBOL_REF_DECL (addr);
6267      base_name = DTPOFF_ZERO_SYM;
6268      if (decl && bss_initializer_p (decl))
6269	base_name = ".tbss";
6270
6271      base = gen_rtx_SYMBOL_REF (Pmode, base_name);
6272      tmp = gen_reg_rtx (Pmode);
6273      emit_move_insn (tmp, arc_unspec_offset (base, UNSPEC_TLS_GD));
6274      base = arc_call_tls_get_addr (tmp);
6275      return gen_rtx_PLUS (Pmode, force_reg (Pmode, base),
6276			   arc_unspec_offset (addr, UNSPEC_TLS_OFF));
6277
6278    case TLS_MODEL_INITIAL_EXEC:
6279      addr = arc_unspec_offset (addr, UNSPEC_TLS_IE);
6280      addr = copy_to_mode_reg (Pmode, gen_const_mem (Pmode, addr));
6281      return gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, arc_tp_regno), addr);
6282
6283    case TLS_MODEL_LOCAL_EXEC:
6284      addr = arc_unspec_offset (addr, UNSPEC_TLS_OFF);
6285      return gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, arc_tp_regno), addr);
6286
6287    default:
6288      gcc_unreachable ();
6289    }
6290}
6291
6292/* Return true if SYMBOL_REF X binds locally.  */
6293
6294static bool
6295arc_symbol_binds_local_p (const_rtx x)
6296{
6297  return (SYMBOL_REF_DECL (x)
6298	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6299	  : SYMBOL_REF_LOCAL_P (x));
6300}
6301
6302/* Legitimize a pic address reference in ADDR.  The return value is
6303   the legitimated address.  */
6304
6305static rtx
6306arc_legitimize_pic_address (rtx addr)
6307{
6308  if (!flag_pic)
6309    return addr;
6310
6311  switch (GET_CODE (addr))
6312    {
6313    case UNSPEC:
6314      /* Can be one or our GOT or GOTOFFPC unspecs.  This situation
6315	 happens when an address is not a legitimate constant and we
6316	 need the resolve it via force_reg in
6317	 prepare_move_operands.  */
6318      switch (XINT (addr, 1))
6319	{
6320	case ARC_UNSPEC_GOT:
6321	case ARC_UNSPEC_GOTOFFPC:
6322	  /* Recover the symbol ref.  */
6323	  addr = XVECEXP (addr, 0, 0);
6324	  break;
6325	default:
6326	  return addr;
6327	}
6328      /* Fall through.  */
6329    case SYMBOL_REF:
6330      /* TLS symbols are handled in different place.  */
6331      if (SYMBOL_REF_TLS_MODEL (addr))
6332	return addr;
6333
6334      /* This symbol must be referenced via a load from the Global
6335	 Offset Table (@GOTPC).  */
6336      if (!arc_symbol_binds_local_p (addr))
6337	return gen_const_mem (Pmode, arc_unspec_offset (addr, ARC_UNSPEC_GOT));
6338
6339      /* Local symb: use @pcl to access it.  */
6340      /* Fall through.  */
6341    case LABEL_REF:
6342      return arc_unspec_offset (addr, ARC_UNSPEC_GOTOFFPC);
6343
6344    default:
6345      break;
6346    }
6347
6348 return addr;
6349}
6350
6351/* Output address constant X to FILE, taking PIC into account.  */
6352
6353static void
6354arc_output_pic_addr_const (FILE * file, rtx x, int code)
6355{
6356  char buf[256];
6357
6358 restart:
6359  switch (GET_CODE (x))
6360    {
6361    case PC:
6362      if (flag_pic)
6363	putc ('.', file);
6364      else
6365	gcc_unreachable ();
6366      break;
6367
6368    case SYMBOL_REF:
6369      output_addr_const (file, x);
6370
6371      /* Local functions do not get references through the PLT.  */
6372      if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6373	fputs ("@plt", file);
6374      break;
6375
6376    case LABEL_REF:
6377      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
6378      assemble_name (file, buf);
6379      break;
6380
6381    case CODE_LABEL:
6382      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6383      assemble_name (file, buf);
6384      break;
6385
6386    case CONST_INT:
6387      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6388      break;
6389
6390    case CONST:
6391      arc_output_pic_addr_const (file, XEXP (x, 0), code);
6392      break;
6393
6394    case CONST_DOUBLE:
6395      if (GET_MODE (x) == VOIDmode)
6396	{
6397	  /* We can use %d if the number is one word and positive.  */
6398	  if (CONST_DOUBLE_HIGH (x))
6399	    fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
6400		     CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x));
6401	  else if  (CONST_DOUBLE_LOW (x) < 0)
6402	    fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
6403	  else
6404	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6405	}
6406      else
6407	/* We can't handle floating point constants;
6408	   PRINT_OPERAND must handle them.  */
6409	output_operand_lossage ("floating constant misused");
6410      break;
6411
6412    case PLUS:
6413      /* FIXME: Not needed here.  */
6414      /* Some assemblers need integer constants to appear last (eg masm).  */
6415      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6416	{
6417	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
6418	  fprintf (file, "+");
6419	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
6420	}
6421      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6422	{
6423	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
6424	  if (INTVAL (XEXP (x, 1)) >= 0)
6425	    fprintf (file, "+");
6426	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
6427	}
6428      else
6429	gcc_unreachable();
6430      break;
6431
6432    case MINUS:
6433      /* Avoid outputting things like x-x or x+5-x,
6434	 since some assemblers can't handle that.  */
6435      x = simplify_subtraction (x);
6436      if (GET_CODE (x) != MINUS)
6437	goto restart;
6438
6439      arc_output_pic_addr_const (file, XEXP (x, 0), code);
6440      fprintf (file, "-");
6441      if (GET_CODE (XEXP (x, 1)) == CONST_INT
6442	  && INTVAL (XEXP (x, 1)) < 0)
6443	{
6444	  fprintf (file, "(");
6445	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
6446	  fprintf (file, ")");
6447	}
6448      else
6449	arc_output_pic_addr_const (file, XEXP (x, 1), code);
6450      break;
6451
6452    case ZERO_EXTEND:
6453    case SIGN_EXTEND:
6454      arc_output_pic_addr_const (file, XEXP (x, 0), code);
6455      break;
6456
6457
6458    case UNSPEC:
6459      const char *suffix;
6460      bool pcrel; pcrel = false;
6461      rtx base; base = NULL;
6462      gcc_assert (XVECLEN (x, 0) >= 1);
6463      switch (XINT (x, 1))
6464	{
6465	case ARC_UNSPEC_GOT:
6466	  suffix = "@gotpc", pcrel = true;
6467	  break;
6468	case ARC_UNSPEC_GOTOFF:
6469	  suffix = "@gotoff";
6470	  break;
6471	case ARC_UNSPEC_GOTOFFPC:
6472	  suffix = "@pcl",   pcrel = true;
6473	  break;
6474	case ARC_UNSPEC_PLT:
6475	  suffix = "@plt";
6476	  break;
6477	case UNSPEC_TLS_GD:
6478	  suffix = "@tlsgd", pcrel = true;
6479	  break;
6480	case UNSPEC_TLS_IE:
6481	  suffix = "@tlsie", pcrel = true;
6482	  break;
6483	case UNSPEC_TLS_OFF:
6484	  if (XVECLEN (x, 0) == 2)
6485	    base = XVECEXP (x, 0, 1);
6486	  if (SYMBOL_REF_TLS_MODEL (XVECEXP (x, 0, 0)) == TLS_MODEL_LOCAL_EXEC
6487	      || (!flag_pic && !base))
6488	    suffix = "@tpoff";
6489	  else
6490	    suffix = "@dtpoff";
6491	  break;
6492	default:
6493	  suffix = "@invalid";
6494	  output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1));
6495	  break;
6496	}
6497      if (pcrel)
6498	fputs ("pcl,", file);
6499      arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6500      fputs (suffix, file);
6501      if (base)
6502	arc_output_pic_addr_const (file, base, code);
6503      break;
6504
6505    default:
6506      output_operand_lossage ("invalid expression as operand");
6507    }
6508}
6509
6510/* The function returning the number of words, at the beginning of an
6511   argument, must be put in registers.  The returned value must be
6512   zero for arguments that are passed entirely in registers or that
6513   are entirely pushed on the stack.
6514
6515   On some machines, certain arguments must be passed partially in
6516   registers and partially in memory.  On these machines, typically
6517   the first N words of arguments are passed in registers, and the
6518   rest on the stack.  If a multi-word argument (a `double' or a
6519   structure) crosses that boundary, its first few words must be
6520   passed in registers and the rest must be pushed.  This function
6521   tells the compiler when this occurs, and how many of the words
6522   should go in registers.
6523
6524   `FUNCTION_ARG' for these arguments should return the first register
6525   to be used by the caller for this argument; likewise
6526   `FUNCTION_INCOMING_ARG', for the called function.
6527
6528   The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS.  */
6529
6530/* If REGNO is the least arg reg available then what is the total number of arg
6531   regs available.  */
6532#define GPR_REST_ARG_REGS(REGNO) \
6533  ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 )
6534
6535/* Since arc parm regs are contiguous.  */
6536#define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 )
6537
6538/* Implement TARGET_ARG_PARTIAL_BYTES.  */
6539
6540static int
6541arc_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
6542{
6543  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6544  int bytes = arg.promoted_size_in_bytes ();
6545  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6546  int arg_num = *cum;
6547  int ret;
6548
6549  arg_num = ROUND_ADVANCE_CUM (arg_num, arg.mode, arg.type);
6550  ret = GPR_REST_ARG_REGS (arg_num);
6551
6552  /* ICEd at function.cc:2361, and ret is copied to data->partial */
6553    ret = (ret >= words ? 0 : ret * UNITS_PER_WORD);
6554
6555  return ret;
6556}
6557
6558/* Implement TARGET_FUNCTION_ARG.  On the ARC the first MAX_ARC_PARM_REGS
6559   args are normally in registers and the rest are pushed.  */
6560
6561static rtx
6562arc_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
6563{
6564  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6565  int arg_num = *cum;
6566  rtx ret;
6567  const char *debstr ATTRIBUTE_UNUSED;
6568
6569  arg_num = ROUND_ADVANCE_CUM (arg_num, arg.mode, arg.type);
6570  /* Return a marker for use in the call instruction.  */
6571  if (arg.end_marker_p ())
6572    {
6573      ret = const0_rtx;
6574      debstr = "<0>";
6575    }
6576  else if (GPR_REST_ARG_REGS (arg_num) > 0)
6577    {
6578      ret = gen_rtx_REG (arg.mode, arg_num);
6579      debstr = reg_names [arg_num];
6580    }
6581  else
6582    {
6583      ret = NULL_RTX;
6584      debstr = "memory";
6585    }
6586  return ret;
6587}
6588
6589/* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
6590/* For the ARC: the cum set here is passed on to function_arg where we
6591   look at its value and say which reg to use. Strategy: advance the
6592   regnumber here till we run out of arg regs, then set *cum to last
6593   reg. In function_arg, since *cum > last arg reg we would return 0
6594   and thus the arg will end up on the stack. For straddling args of
6595   course function_arg_partial_nregs will come into play.  */
6596
6597static void
6598arc_function_arg_advance (cumulative_args_t cum_v,
6599			  const function_arg_info &arg)
6600{
6601  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6602  int bytes = arg.promoted_size_in_bytes ();
6603  int words = (bytes + UNITS_PER_WORD  - 1) / UNITS_PER_WORD;
6604  int i;
6605
6606  if (words)
6607    *cum = ROUND_ADVANCE_CUM (*cum, arg.mode, arg.type);
6608  for (i = 0; i < words; i++)
6609    *cum = ARC_NEXT_ARG_REG (*cum);
6610
6611}
6612
6613/* Define how to find the value returned by a function.
6614   VALTYPE is the data type of the value (as a tree).
6615   If the precise function being called is known, FN_DECL_OR_TYPE is its
6616   FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type.  */
6617
6618static rtx
6619arc_function_value (const_tree valtype,
6620		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
6621		    bool outgoing ATTRIBUTE_UNUSED)
6622{
6623  machine_mode mode = TYPE_MODE (valtype);
6624  int unsignedp ATTRIBUTE_UNUSED;
6625
6626  unsignedp = TYPE_UNSIGNED (valtype);
6627  if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE)
6628    PROMOTE_MODE (mode, unsignedp, valtype);
6629  return gen_rtx_REG (mode, 0);
6630}
6631
6632/* Returns the return address that is used by builtin_return_address.  */
6633
6634rtx
6635arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame)
6636{
6637  if (count != 0)
6638    return const0_rtx;
6639
6640  return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM);
6641}
6642
6643/* Determine if a given RTX is a valid constant.  We already know this
6644   satisfies CONSTANT_P.  */
6645
6646bool
6647arc_legitimate_constant_p (machine_mode mode, rtx x)
6648{
6649  switch (GET_CODE (x))
6650    {
6651    case CONST:
6652      if (flag_pic)
6653	{
6654	  if (arc_legitimate_pic_addr_p (x))
6655	    return true;
6656	}
6657      return arc_legitimate_constant_p (mode, XEXP (x, 0));
6658
6659    case SYMBOL_REF:
6660      if (SYMBOL_REF_TLS_MODEL (x))
6661	return false;
6662      /* Fall through.  */
6663    case LABEL_REF:
6664      if (flag_pic)
6665	return false;
6666      /* Fall through.  */
6667    case CONST_INT:
6668    case CONST_DOUBLE:
6669      return true;
6670
6671    case NEG:
6672      return arc_legitimate_constant_p (mode, XEXP (x, 0));
6673
6674    case PLUS:
6675    case MINUS:
6676      {
6677	bool t1 = arc_legitimate_constant_p (mode, XEXP (x, 0));
6678	bool t2 = arc_legitimate_constant_p (mode, XEXP (x, 1));
6679
6680	return (t1 && t2);
6681      }
6682
6683    case CONST_VECTOR:
6684      switch (mode)
6685	{
6686	case E_V2HImode:
6687	  return TARGET_PLUS_DMPY;
6688	case E_V2SImode:
6689	case E_V4HImode:
6690	  return TARGET_PLUS_QMACW;
6691	default:
6692	  return false;
6693	}
6694
6695    case UNSPEC:
6696      switch (XINT (x, 1))
6697	{
6698	case UNSPEC_TLS_GD:
6699	case UNSPEC_TLS_OFF:
6700	case UNSPEC_TLS_IE:
6701	  return true;
6702	default:
6703	  /* Any other unspec ending here are pic related, hence the above
6704	     constant pic address checking returned false.  */
6705	  return false;
6706	}
6707      /* Fall through.  */
6708
6709    default:
6710      fatal_insn ("unrecognized supposed constant", x);
6711    }
6712
6713  gcc_unreachable ();
6714}
6715
6716static bool
6717arc_legitimate_address_p (machine_mode mode, rtx x, bool strict)
6718{
6719  if (RTX_OK_FOR_BASE_P (x, strict))
6720     return true;
6721  if (legitimate_offset_address_p (mode, x, TARGET_INDEXED_LOADS, strict))
6722     return true;
6723  if (legitimate_scaled_address_p (mode, x, strict))
6724    return true;
6725  if (legitimate_small_data_address_p (x, mode))
6726     return true;
6727  if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x)))
6728     return true;
6729
6730  /* When we compile for size avoid const (@sym + offset)
6731     addresses.  */
6732  if (!flag_pic && optimize_size && !reload_completed
6733      && (GET_CODE (x) == CONST)
6734      && (GET_CODE (XEXP (x, 0)) == PLUS)
6735      && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
6736      && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) == 0
6737      && !SYMBOL_REF_FUNCTION_P (XEXP (XEXP (x, 0), 0)))
6738    {
6739      rtx addend = XEXP (XEXP (x, 0), 1);
6740      gcc_assert (CONST_INT_P (addend));
6741      HOST_WIDE_INT offset = INTVAL (addend);
6742
6743      /* Allow addresses having a large offset to pass.  Anyhow they
6744	 will end in a limm.  */
6745      return !(offset > -1024 && offset < 1020);
6746    }
6747
6748  if ((GET_MODE_SIZE (mode) != 16) && CONSTANT_P (x))
6749    {
6750      return arc_legitimate_constant_p (mode, x);
6751    }
6752  if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC
6753       || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC)
6754      && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict))
6755    return true;
6756      /* We're restricted here by the `st' insn.  */
6757  if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY)
6758      && GET_CODE (XEXP ((x), 1)) == PLUS
6759      && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0))
6760      && legitimate_offset_address_p (QImode, XEXP (x, 1),
6761				      TARGET_AUTO_MODIFY_REG, strict))
6762    return true;
6763  return false;
6764}
6765
6766/* Return true iff ADDR (a legitimate address expression)
6767   has an effect that depends on the machine mode it is used for.  */
6768
6769static bool
6770arc_mode_dependent_address_p (const_rtx addr, addr_space_t)
6771{
6772  /* SYMBOL_REF is not mode dependent: it is either a small data reference,
6773     which is valid for loads and stores, or a limm offset, which is valid for
6774     loads.  Scaled indices are scaled by the access mode.  */
6775  if (GET_CODE (addr) == PLUS
6776      && GET_CODE (XEXP ((addr), 0)) == MULT)
6777    return true;
6778  return false;
6779}
6780
6781/* Determine if it's legal to put X into the constant pool.  */
6782
6783static bool
6784arc_cannot_force_const_mem (machine_mode mode, rtx x)
6785{
6786  return !arc_legitimate_constant_p (mode, x);
6787}
6788
6789/* IDs for all the ARC builtins.  */
6790
6791enum arc_builtin_id
6792  {
6793#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)	\
6794    ARC_BUILTIN_ ## NAME,
6795#include "builtins.def"
6796#undef DEF_BUILTIN
6797
6798    ARC_BUILTIN_COUNT
6799  };
6800
6801struct GTY(()) arc_builtin_description
6802{
6803  enum insn_code icode;
6804  int n_args;
6805  tree fndecl;
6806};
6807
6808static GTY(()) struct arc_builtin_description
6809arc_bdesc[ARC_BUILTIN_COUNT] =
6810{
6811#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)		\
6812  { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE },
6813#include "builtins.def"
6814#undef DEF_BUILTIN
6815};
6816
6817/* Transform UP into lowercase and write the result to LO.
6818   You must provide enough space for LO.  Return LO.  */
6819
6820static char*
6821arc_tolower (char *lo, const char *up)
6822{
6823  char *lo0 = lo;
6824
6825  for (; *up; up++, lo++)
6826    *lo = TOLOWER (*up);
6827
6828  *lo = '\0';
6829
6830  return lo0;
6831}
6832
6833/* Implement `TARGET_BUILTIN_DECL'.  */
6834
6835static tree
6836arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
6837{
6838  if (id < ARC_BUILTIN_COUNT)
6839    return arc_bdesc[id].fndecl;
6840
6841  return error_mark_node;
6842}
6843
6844static void
6845arc_init_builtins (void)
6846{
6847  tree V4HI_type_node;
6848  tree V2SI_type_node;
6849  tree V2HI_type_node;
6850
6851  /* Vector types based on HS SIMD elements.  */
6852  V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
6853  V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
6854  V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
6855
6856  tree pcvoid_type_node
6857    = build_pointer_type (build_qualified_type (void_type_node,
6858						TYPE_QUAL_CONST));
6859  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node,
6860						    V8HImode);
6861
6862  tree void_ftype_void
6863    = build_function_type_list (void_type_node, NULL_TREE);
6864  tree int_ftype_int
6865    = build_function_type_list (integer_type_node, integer_type_node,
6866				NULL_TREE);
6867  tree int_ftype_pcvoid_int
6868    = build_function_type_list (integer_type_node, pcvoid_type_node,
6869				integer_type_node, NULL_TREE);
6870  tree void_ftype_usint_usint
6871    = build_function_type_list (void_type_node, long_unsigned_type_node,
6872				long_unsigned_type_node, NULL_TREE);
6873  tree int_ftype_int_int
6874    = build_function_type_list (integer_type_node, integer_type_node,
6875				integer_type_node, NULL_TREE);
6876  tree usint_ftype_usint
6877    = build_function_type_list  (long_unsigned_type_node,
6878				 long_unsigned_type_node, NULL_TREE);
6879  tree void_ftype_usint
6880    = build_function_type_list (void_type_node, long_unsigned_type_node,
6881				NULL_TREE);
6882  tree int_ftype_void
6883    = build_function_type_list (integer_type_node, void_type_node,
6884				NULL_TREE);
6885  tree void_ftype_int
6886    = build_function_type_list (void_type_node, integer_type_node,
6887				NULL_TREE);
6888  tree int_ftype_short
6889    = build_function_type_list (integer_type_node, short_integer_type_node,
6890				NULL_TREE);
6891
6892  /* Old ARC SIMD types.  */
6893  tree v8hi_ftype_v8hi_v8hi
6894    = build_function_type_list (V8HI_type_node, V8HI_type_node,
6895				V8HI_type_node, NULL_TREE);
6896  tree v8hi_ftype_v8hi_int
6897    = build_function_type_list (V8HI_type_node, V8HI_type_node,
6898				integer_type_node, NULL_TREE);
6899  tree v8hi_ftype_v8hi_int_int
6900    = build_function_type_list (V8HI_type_node, V8HI_type_node,
6901				integer_type_node, integer_type_node,
6902				NULL_TREE);
6903  tree void_ftype_v8hi_int_int
6904    = build_function_type_list (void_type_node, V8HI_type_node,
6905				integer_type_node, integer_type_node,
6906				NULL_TREE);
6907  tree void_ftype_v8hi_int_int_int
6908    = build_function_type_list (void_type_node, V8HI_type_node,
6909				integer_type_node, integer_type_node,
6910				integer_type_node, NULL_TREE);
6911  tree v8hi_ftype_int_int
6912    = build_function_type_list (V8HI_type_node, integer_type_node,
6913				integer_type_node, NULL_TREE);
6914  tree void_ftype_int_int
6915    = build_function_type_list (void_type_node, integer_type_node,
6916				integer_type_node, NULL_TREE);
6917  tree v8hi_ftype_v8hi
6918    = build_function_type_list (V8HI_type_node, V8HI_type_node,
6919				NULL_TREE);
6920  /* ARCv2 SIMD types.  */
6921  tree long_ftype_v4hi_v4hi
6922    = build_function_type_list (long_long_integer_type_node,
6923				V4HI_type_node,	V4HI_type_node, NULL_TREE);
6924  tree int_ftype_v2hi_v2hi
6925    = build_function_type_list (integer_type_node,
6926				V2HI_type_node, V2HI_type_node, NULL_TREE);
6927  tree v2si_ftype_v2hi_v2hi
6928    = build_function_type_list (V2SI_type_node,
6929				V2HI_type_node, V2HI_type_node, NULL_TREE);
6930  tree v2hi_ftype_v2hi_v2hi
6931    = build_function_type_list (V2HI_type_node,
6932				V2HI_type_node, V2HI_type_node, NULL_TREE);
6933  tree v2si_ftype_v2si_v2si
6934    = build_function_type_list (V2SI_type_node,
6935				V2SI_type_node, V2SI_type_node, NULL_TREE);
6936  tree v4hi_ftype_v4hi_v4hi
6937    = build_function_type_list (V4HI_type_node,
6938				V4HI_type_node, V4HI_type_node, NULL_TREE);
6939  tree long_ftype_v2si_v2hi
6940    = build_function_type_list (long_long_integer_type_node,
6941				V2SI_type_node, V2HI_type_node, NULL_TREE);
6942
6943  /* Add the builtins.  */
6944#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)			\
6945  {									\
6946    int id = ARC_BUILTIN_ ## NAME;					\
6947    const char *Name = "__builtin_arc_" #NAME;				\
6948    char *name = (char*) alloca (1 + strlen (Name));			\
6949									\
6950    gcc_assert (id < ARC_BUILTIN_COUNT);				\
6951    if (MASK)								\
6952      arc_bdesc[id].fndecl						\
6953	= add_builtin_function (arc_tolower(name, Name), TYPE, id,	\
6954				BUILT_IN_MD, NULL, NULL_TREE);		\
6955  }
6956#include "builtins.def"
6957#undef DEF_BUILTIN
6958}
6959
6960/* Helper to expand __builtin_arc_aligned (void* val, int
6961  alignval).  */
6962
6963static rtx
6964arc_expand_builtin_aligned (tree exp)
6965{
6966  tree arg0 = CALL_EXPR_ARG (exp, 0);
6967  tree arg1 = CALL_EXPR_ARG (exp, 1);
6968  fold (arg1);
6969  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6970  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6971
6972  if (!CONST_INT_P (op1))
6973    {
6974      /* If we can't fold the alignment to a constant integer
6975	 whilst optimizing, this is probably a user error.  */
6976      if (optimize)
6977	warning (0, "%<__builtin_arc_aligned%> with non-constant alignment");
6978    }
6979  else
6980    {
6981      HOST_WIDE_INT alignTest = INTVAL (op1);
6982      /* Check alignTest is positive, and a power of two.  */
6983      if (alignTest <= 0 || alignTest != (alignTest & -alignTest))
6984	{
6985	  error ("invalid alignment value for %<__builtin_arc_aligned%>");
6986	  return NULL_RTX;
6987	}
6988
6989      if (CONST_INT_P (op0))
6990	{
6991	  HOST_WIDE_INT pnt = INTVAL (op0);
6992
6993	  if ((pnt & (alignTest - 1)) == 0)
6994	    return const1_rtx;
6995	}
6996      else
6997	{
6998	  unsigned  align = get_pointer_alignment (arg0);
6999	  unsigned  numBits = alignTest * BITS_PER_UNIT;
7000
7001	  if (align && align >= numBits)
7002	    return const1_rtx;
7003	  /* Another attempt to ascertain alignment.  Check the type
7004	     we are pointing to.  */
7005	  if (POINTER_TYPE_P (TREE_TYPE (arg0))
7006	      && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits)
7007	    return const1_rtx;
7008	}
7009    }
7010
7011  /* Default to false.  */
7012  return const0_rtx;
7013}
7014
7015/* Helper arc_expand_builtin, generates a pattern for the given icode
7016   and arguments.  */
7017
7018static rtx_insn *
7019apply_GEN_FCN (enum insn_code icode, rtx *arg)
7020{
7021  switch (insn_data[icode].n_generator_args)
7022    {
7023    case 0:
7024      return GEN_FCN (icode) ();
7025    case 1:
7026      return GEN_FCN (icode) (arg[0]);
7027    case 2:
7028      return GEN_FCN (icode) (arg[0], arg[1]);
7029    case 3:
7030      return GEN_FCN (icode) (arg[0], arg[1], arg[2]);
7031    case 4:
7032      return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3]);
7033    case 5:
7034      return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3], arg[4]);
7035    default:
7036      gcc_unreachable ();
7037    }
7038}
7039
7040/* Expand an expression EXP that calls a built-in function,
7041   with result going to TARGET if that's convenient
7042   (and in mode MODE if that's convenient).
7043   SUBTARGET may be used as the target for computing one of EXP's operands.
7044   IGNORE is nonzero if the value is to be ignored.  */
7045
7046static rtx
7047arc_expand_builtin (tree exp,
7048		    rtx target,
7049		    rtx subtarget ATTRIBUTE_UNUSED,
7050		    machine_mode mode ATTRIBUTE_UNUSED,
7051		    int ignore ATTRIBUTE_UNUSED)
7052{
7053  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
7054  unsigned int id = DECL_MD_FUNCTION_CODE (fndecl);
7055  const struct arc_builtin_description *d = &arc_bdesc[id];
7056  int i, j, n_args = call_expr_nargs (exp);
7057  rtx pat = NULL_RTX;
7058  rtx xop[5];
7059  enum insn_code icode = d->icode;
7060  machine_mode tmode = insn_data[icode].operand[0].mode;
7061  int nonvoid;
7062  tree arg0;
7063  tree arg1;
7064  tree arg2;
7065  tree arg3;
7066  rtx op0;
7067  rtx op1;
7068  rtx op2;
7069  rtx op3;
7070  rtx op4;
7071  machine_mode mode0;
7072  machine_mode mode1;
7073  machine_mode mode2;
7074  machine_mode mode3;
7075  machine_mode mode4;
7076
7077  if (id >= ARC_BUILTIN_COUNT)
7078    internal_error ("bad builtin fcode");
7079
7080  /* 1st part: Expand special builtins.  */
7081  switch (id)
7082    {
7083    case ARC_BUILTIN_NOP:
7084      emit_insn (gen_nopv ());
7085      return NULL_RTX;
7086
7087    case ARC_BUILTIN_RTIE:
7088    case ARC_BUILTIN_SYNC:
7089    case ARC_BUILTIN_BRK:
7090    case ARC_BUILTIN_SWI:
7091    case ARC_BUILTIN_UNIMP_S:
7092      gcc_assert (icode != 0);
7093      emit_insn (GEN_FCN (icode) (const1_rtx));
7094      return NULL_RTX;
7095
7096    case ARC_BUILTIN_ALIGNED:
7097      return arc_expand_builtin_aligned (exp);
7098
7099    case ARC_BUILTIN_CLRI:
7100      target = gen_reg_rtx (SImode);
7101      emit_insn (gen_clri (target, const1_rtx));
7102      return target;
7103
7104    case ARC_BUILTIN_TRAP_S:
7105    case ARC_BUILTIN_SLEEP:
7106      arg0 = CALL_EXPR_ARG (exp, 0);
7107      fold (arg0);
7108      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
7109
7110      gcc_assert (icode != 0);
7111      emit_insn (GEN_FCN (icode) (op0));
7112      return NULL_RTX;
7113
7114    case ARC_BUILTIN_VDORUN:
7115    case ARC_BUILTIN_VDIRUN:
7116      arg0 = CALL_EXPR_ARG (exp, 0);
7117      arg1 = CALL_EXPR_ARG (exp, 1);
7118      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7119      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7120
7121      target = gen_rtx_REG (SImode, (id == ARC_BUILTIN_VDIRUN) ? 131 : 139);
7122
7123      mode0 =  insn_data[icode].operand[1].mode;
7124      mode1 =  insn_data[icode].operand[2].mode;
7125
7126      if (!insn_data[icode].operand[1].predicate (op0, mode0))
7127	op0 = copy_to_mode_reg (mode0, op0);
7128
7129      if (!insn_data[icode].operand[2].predicate (op1, mode1))
7130	op1 = copy_to_mode_reg (mode1, op1);
7131
7132      pat = GEN_FCN (icode) (target, op0, op1);
7133      if (!pat)
7134	return NULL_RTX;
7135
7136      emit_insn (pat);
7137      return NULL_RTX;
7138
7139    case ARC_BUILTIN_VDIWR:
7140    case ARC_BUILTIN_VDOWR:
7141      arg0 = CALL_EXPR_ARG (exp, 0);
7142      arg1 = CALL_EXPR_ARG (exp, 1);
7143      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7144      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7145
7146      if (!CONST_INT_P (op0)
7147	  || !(UNSIGNED_INT3 (INTVAL (op0))))
7148	error ("operand 1 should be an unsigned 3-bit immediate");
7149
7150      mode1 =  insn_data[icode].operand[1].mode;
7151
7152      if (icode == CODE_FOR_vdiwr_insn)
7153	target = gen_rtx_REG (SImode,
7154			      ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0));
7155      else if (icode == CODE_FOR_vdowr_insn)
7156	target = gen_rtx_REG (SImode,
7157			      ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0));
7158      else
7159	gcc_unreachable ();
7160
7161      if (!insn_data[icode].operand[2].predicate (op1, mode1))
7162	op1 = copy_to_mode_reg (mode1, op1);
7163
7164      pat = GEN_FCN (icode) (target, op1);
7165      if (!pat)
7166	return NULL_RTX;
7167
7168      emit_insn (pat);
7169      return NULL_RTX;
7170
7171    case ARC_BUILTIN_VASRW:
7172    case ARC_BUILTIN_VSR8:
7173    case ARC_BUILTIN_VSR8AW:
7174      arg0 = CALL_EXPR_ARG (exp, 0);
7175      arg1 = CALL_EXPR_ARG (exp, 1);
7176      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7177      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7178      op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
7179
7180      target = gen_reg_rtx (V8HImode);
7181      mode0 =  insn_data[icode].operand[1].mode;
7182      mode1 =  insn_data[icode].operand[2].mode;
7183
7184      if (!insn_data[icode].operand[1].predicate (op0, mode0))
7185	op0 = copy_to_mode_reg (mode0, op0);
7186
7187      if ((!insn_data[icode].operand[2].predicate (op1, mode1))
7188	  || !(UNSIGNED_INT3 (INTVAL (op1))))
7189	error ("operand 2 should be an unsigned 3-bit value (I0-I7)");
7190
7191      pat = GEN_FCN (icode) (target, op0, op1, op2);
7192      if (!pat)
7193	return NULL_RTX;
7194
7195      emit_insn (pat);
7196      return target;
7197
7198    case ARC_BUILTIN_VLD32WH:
7199    case ARC_BUILTIN_VLD32WL:
7200    case ARC_BUILTIN_VLD64:
7201    case ARC_BUILTIN_VLD32:
7202      rtx src_vreg;
7203      icode = d->icode;
7204      arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg.  */
7205      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
7206      arg2 = CALL_EXPR_ARG (exp, 2); /* u8.  */
7207
7208      src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7209      op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7210      op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
7211      op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
7212
7213      /* target <- src vreg.  */
7214      emit_insn (gen_move_insn (target, src_vreg));
7215
7216      /* target <- vec_concat: target, mem (Ib, u8).  */
7217      mode0 =  insn_data[icode].operand[3].mode;
7218      mode1 =  insn_data[icode].operand[1].mode;
7219
7220      if ((!insn_data[icode].operand[3].predicate (op0, mode0))
7221	  || !(UNSIGNED_INT3 (INTVAL (op0))))
7222	error ("operand 1 should be an unsigned 3-bit value (I0-I7)");
7223
7224      if ((!insn_data[icode].operand[1].predicate (op1, mode1))
7225	  || !(UNSIGNED_INT8 (INTVAL (op1))))
7226	error ("operand 2 should be an unsigned 8-bit value");
7227
7228      pat = GEN_FCN (icode) (target, op1, op2, op0);
7229      if (!pat)
7230	return NULL_RTX;
7231
7232      emit_insn (pat);
7233      return target;
7234
7235    case ARC_BUILTIN_VLD64W:
7236    case ARC_BUILTIN_VLD128:
7237      arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg.  */
7238      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
7239
7240      op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
7241      op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7242      op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7243
7244      /* target <- src vreg.  */
7245      target = gen_reg_rtx (V8HImode);
7246
7247      /* target <- vec_concat: target, mem (Ib, u8).  */
7248      mode0 =  insn_data[icode].operand[1].mode;
7249      mode1 =  insn_data[icode].operand[2].mode;
7250      mode2 =  insn_data[icode].operand[3].mode;
7251
7252      if ((!insn_data[icode].operand[2].predicate (op1, mode1))
7253	  || !(UNSIGNED_INT3 (INTVAL (op1))))
7254	error ("operand 1 should be an unsigned 3-bit value (I0-I7)");
7255
7256      if ((!insn_data[icode].operand[3].predicate (op2, mode2))
7257	  || !(UNSIGNED_INT8 (INTVAL (op2))))
7258	error ("operand 2 should be an unsigned 8-bit value");
7259
7260      pat = GEN_FCN (icode) (target, op0, op1, op2);
7261
7262      if (!pat)
7263	return NULL_RTX;
7264
7265      emit_insn (pat);
7266      return target;
7267
7268    case ARC_BUILTIN_VST128:
7269    case ARC_BUILTIN_VST64:
7270      arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg.  */
7271      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
7272      arg2 = CALL_EXPR_ARG (exp, 2); /* u8.  */
7273
7274      op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
7275      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7276      op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
7277      op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7278
7279      mode0 = insn_data[icode].operand[0].mode;
7280      mode1 = insn_data[icode].operand[1].mode;
7281      mode2 = insn_data[icode].operand[2].mode;
7282      mode3 = insn_data[icode].operand[3].mode;
7283
7284      if ((!insn_data[icode].operand[1].predicate (op1, mode1))
7285	  || !(UNSIGNED_INT3 (INTVAL (op1))))
7286	error ("operand 2 should be an unsigned 3-bit value (I0-I7)");
7287
7288      if ((!insn_data[icode].operand[2].predicate (op2, mode2))
7289	  || !(UNSIGNED_INT8 (INTVAL (op2))))
7290	error ("operand 3 should be an unsigned 8-bit value");
7291
7292      if (!insn_data[icode].operand[3].predicate (op3, mode3))
7293	op3 = copy_to_mode_reg (mode3, op3);
7294
7295      pat = GEN_FCN (icode) (op0, op1, op2, op3);
7296      if (!pat)
7297	return NULL_RTX;
7298
7299      emit_insn (pat);
7300      return NULL_RTX;
7301
7302    case ARC_BUILTIN_VST16_N:
7303    case ARC_BUILTIN_VST32_N:
7304      arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg.  */
7305      arg1 = CALL_EXPR_ARG (exp, 1); /* u3.  */
7306      arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7.  */
7307      arg3 = CALL_EXPR_ARG (exp, 3); /* u8.  */
7308
7309      op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL);
7310      op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
7311      op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
7312      op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7313      op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7314
7315      mode0 = insn_data[icode].operand[0].mode;
7316      mode2 = insn_data[icode].operand[2].mode;
7317      mode3 = insn_data[icode].operand[3].mode;
7318      mode4 = insn_data[icode].operand[4].mode;
7319
7320      /* Do some correctness checks for the operands.  */
7321      if ((!insn_data[icode].operand[0].predicate (op0, mode0))
7322	  || !(UNSIGNED_INT8 (INTVAL (op0))))
7323	error ("operand 4 should be an unsigned 8-bit value (0-255)");
7324
7325      if ((!insn_data[icode].operand[2].predicate (op2, mode2))
7326	  || !(UNSIGNED_INT3 (INTVAL (op2))))
7327	error ("operand 3 should be an unsigned 3-bit value (I0-I7)");
7328
7329      if (!insn_data[icode].operand[3].predicate (op3, mode3))
7330	op3 = copy_to_mode_reg (mode3, op3);
7331
7332      if ((!insn_data[icode].operand[4].predicate (op4, mode4))
7333	   || !(UNSIGNED_INT3 (INTVAL (op4))))
7334	error ("operand 2 should be an unsigned 3-bit value (subreg 0-7)");
7335      else if (icode == CODE_FOR_vst32_n_insn
7336	       && ((INTVAL (op4) % 2) != 0))
7337	error ("operand 2 should be an even 3-bit value (subreg 0,2,4,6)");
7338
7339      pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
7340      if (!pat)
7341	return NULL_RTX;
7342
7343      emit_insn (pat);
7344      return NULL_RTX;
7345
7346    default:
7347      break;
7348    }
7349
7350  /* 2nd part: Expand regular builtins.  */
7351  if (icode == 0)
7352    internal_error ("bad builtin fcode");
7353
7354  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
7355  j = 0;
7356
7357  if (nonvoid)
7358    {
7359      if (target == NULL_RTX
7360	  || GET_MODE (target) != tmode
7361	  || !insn_data[icode].operand[0].predicate (target, tmode))
7362	{
7363	  target = gen_reg_rtx (tmode);
7364	}
7365      xop[j++] = target;
7366    }
7367
7368  gcc_assert (n_args <= 4);
7369  for (i = 0; i < n_args; i++, j++)
7370    {
7371      tree arg = CALL_EXPR_ARG (exp, i);
7372      machine_mode mode = insn_data[icode].operand[j].mode;
7373      rtx op = expand_expr (arg, NULL_RTX, mode, EXPAND_NORMAL);
7374      machine_mode opmode = GET_MODE (op);
7375      char c = insn_data[icode].operand[j].constraint[0];
7376
7377      /* SIMD extension requires exact immediate operand match.  */
7378      if ((id > ARC_BUILTIN_SIMD_BEGIN)
7379	  && (id < ARC_BUILTIN_SIMD_END)
7380	  && (c != 'v')
7381	  && (c != 'r'))
7382	{
7383	  if (!CONST_INT_P (op))
7384	    error ("builtin requires an immediate for operand %d", j);
7385	  switch (c)
7386	    {
7387	    case 'L':
7388	      if (!satisfies_constraint_L (op))
7389		error ("operand %d should be a 6 bit unsigned immediate", j);
7390	      break;
7391	    case 'P':
7392	      if (!satisfies_constraint_P (op))
7393		error ("operand %d should be a 8 bit unsigned immediate", j);
7394	      break;
7395	    case 'K':
7396	      if (!satisfies_constraint_K (op))
7397		error ("operand %d should be a 3 bit unsigned immediate", j);
7398	      break;
7399	    default:
7400	      error ("unknown builtin immediate operand type for operand %d",
7401		     j);
7402	    }
7403	}
7404
7405      if (CONST_INT_P (op))
7406	opmode = mode;
7407
7408      if ((opmode == SImode) && (mode == HImode))
7409	{
7410	  opmode = HImode;
7411	  op = gen_lowpart (HImode, op);
7412	}
7413
7414      /* In case the insn wants input operands in modes different from
7415	 the result, abort.  */
7416      gcc_assert (opmode == mode || opmode == VOIDmode);
7417
7418      if (!insn_data[icode].operand[i + nonvoid].predicate (op, mode))
7419	op = copy_to_mode_reg (mode, op);
7420
7421      xop[j] = op;
7422    }
7423
7424  pat = apply_GEN_FCN (icode, xop);
7425  if (pat == NULL_RTX)
7426    return NULL_RTX;
7427
7428  emit_insn (pat);
7429
7430  if (nonvoid)
7431    return target;
7432  else
7433    return const0_rtx;
7434}
7435
7436/* Returns true if the operands[opno] is a valid compile-time constant to be
7437   used as register number in the code for builtins.  Else it flags an error
7438   and returns false.  */
7439
7440bool
7441check_if_valid_regno_const (rtx *operands, int opno)
7442{
7443
7444  switch (GET_CODE (operands[opno]))
7445    {
7446    case SYMBOL_REF :
7447    case CONST :
7448    case CONST_INT :
7449      return true;
7450    default:
7451	error ("register number must be a compile-time constant.  "
7452	       "Try giving higher optimization levels");
7453	break;
7454    }
7455  return false;
7456}
7457
7458/* Return true if it is ok to make a tail-call to DECL.  */
7459
7460static bool
7461arc_function_ok_for_sibcall (tree decl,
7462			     tree exp ATTRIBUTE_UNUSED)
7463{
7464  tree attrs = NULL_TREE;
7465
7466  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7467  if (ARC_INTERRUPT_P (arc_compute_function_type (cfun)))
7468    return false;
7469
7470  if (decl)
7471    {
7472      attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7473
7474      if (lookup_attribute ("jli_always", attrs))
7475	return false;
7476      if (lookup_attribute ("jli_fixed", attrs))
7477	return false;
7478      if (lookup_attribute ("secure_call", attrs))
7479	return false;
7480    }
7481
7482  /* Everything else is ok.  */
7483  return true;
7484}
7485
7486/* Output code to add DELTA to the first argument, and then jump
7487   to FUNCTION.  Used for C++ multiple inheritance.  */
7488
7489static void
7490arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7491		     HOST_WIDE_INT delta,
7492		     HOST_WIDE_INT vcall_offset,
7493		     tree function)
7494{
7495  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
7496  int mi_delta = delta;
7497  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
7498  int shift = 0;
7499  int this_regno
7500    = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0;
7501  rtx fnaddr;
7502
7503  assemble_start_function (thunk, fnname);
7504
7505  if (mi_delta < 0)
7506    mi_delta = - mi_delta;
7507
7508  /* Add DELTA.  When possible use a plain add, otherwise load it into
7509     a register first.  */
7510
7511  while (mi_delta != 0)
7512    {
7513      if ((mi_delta & (3 << shift)) == 0)
7514	shift += 2;
7515      else
7516	{
7517	  asm_fprintf (file, "\t%s\t%s, %s, %d\n",
7518		       mi_op, reg_names[this_regno], reg_names[this_regno],
7519		       mi_delta & (0xff << shift));
7520	  mi_delta &= ~(0xff << shift);
7521	  shift += 8;
7522	}
7523    }
7524
7525  /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
7526  if (vcall_offset != 0)
7527    {
7528      /* ld  r12,[this]           --> temp = *this
7529	 add r12,r12,vcall_offset --> temp = *(*this + vcall_offset)
7530	 ld r12,[r12]
7531	 add this,this,r12        --> this+ = *(*this + vcall_offset) */
7532      asm_fprintf (file, "\tld\t%s, [%s]\n",
7533		   ARC_TEMP_SCRATCH_REG, reg_names[this_regno]);
7534      asm_fprintf (file, "\tadd\t%s, %s, " HOST_WIDE_INT_PRINT_DEC "\n",
7535		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset);
7536      asm_fprintf (file, "\tld\t%s, [%s]\n",
7537		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG);
7538      asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno],
7539		   reg_names[this_regno], ARC_TEMP_SCRATCH_REG);
7540    }
7541
7542  fnaddr = XEXP (DECL_RTL (function), 0);
7543
7544  if (arc_is_longcall_p (fnaddr))
7545    {
7546      if (flag_pic)
7547	{
7548	  asm_fprintf (file, "\tld\t%s, [pcl, @",
7549		       ARC_TEMP_SCRATCH_REG);
7550	  assemble_name (file, XSTR (fnaddr, 0));
7551	  fputs ("@gotpc]\n", file);
7552	  asm_fprintf (file, "\tj\t[%s]", ARC_TEMP_SCRATCH_REG);
7553	}
7554      else
7555	{
7556	  fputs ("\tj\t@", file);
7557	  assemble_name (file, XSTR (fnaddr, 0));
7558	}
7559    }
7560  else
7561    {
7562      fputs ("\tb\t@", file);
7563      assemble_name (file, XSTR (fnaddr, 0));
7564      if (flag_pic)
7565	fputs ("@plt\n", file);
7566    }
7567  fputc ('\n', file);
7568  assemble_end_function (thunk, fnname);
7569}
7570
7571/* Return true if a 32 bit "long_call" should be generated for
7572   this calling SYM_REF.  We generate a long_call if the function:
7573
7574        a.  has an __attribute__((long call))
7575     or b.  the -mlong-calls command line switch has been specified
7576
7577   However we do not generate a long call if the function has an
7578   __attribute__ ((short_call)) or __attribute__ ((medium_call))
7579
7580   This function will be called by C fragments contained in the machine
7581   description file.  */
7582
7583bool
7584arc_is_longcall_p (rtx sym_ref)
7585{
7586  if (GET_CODE (sym_ref) != SYMBOL_REF)
7587    return false;
7588
7589  return (SYMBOL_REF_LONG_CALL_P (sym_ref)
7590	  || (TARGET_LONG_CALLS_SET
7591	      && !SYMBOL_REF_SHORT_CALL_P (sym_ref)
7592	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
7593
7594}
7595
7596/* Likewise for short calls.  */
7597
7598bool
7599arc_is_shortcall_p (rtx sym_ref)
7600{
7601  if (GET_CODE (sym_ref) != SYMBOL_REF)
7602    return false;
7603
7604  return (SYMBOL_REF_SHORT_CALL_P (sym_ref)
7605	  || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS
7606	      && !SYMBOL_REF_LONG_CALL_P (sym_ref)
7607	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
7608
7609}
7610
7611/* Worker function for TARGET_RETURN_IN_MEMORY.  */
7612
7613static bool
7614arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7615{
7616  if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type))
7617    return true;
7618  else
7619    {
7620      HOST_WIDE_INT size = int_size_in_bytes (type);
7621      return (size == -1 || size > (TARGET_V2 ? 16 : 8));
7622    }
7623}
7624
7625static bool
7626arc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7627{
7628  return (arg.type != 0
7629	  && (TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST
7630	      || TREE_ADDRESSABLE (arg.type)));
7631}
7632
7633/* Implement TARGET_CAN_USE_DOLOOP_P.  */
7634
7635static bool
7636arc_can_use_doloop_p (const widest_int &,
7637		      const widest_int &iterations_max,
7638		      unsigned int loop_depth, bool entered_at_top)
7639{
7640  /* Considering limitations in the hardware, only use doloop
7641     for innermost loops which must be entered from the top.  */
7642  if (loop_depth > 1 || !entered_at_top)
7643    return false;
7644
7645  /* Check for lp_count width boundary.  */
7646  if (arc_lpcwidth != 32
7647      && (wi::gtu_p (iterations_max, ((1 << arc_lpcwidth) - 1))
7648	  || wi::eq_p (iterations_max, 0)))
7649    return false;
7650  return true;
7651}
7652
7653/* NULL if INSN insn is valid within a low-overhead loop.  Otherwise
7654   return why doloop cannot be applied.  */
7655
7656static const char *
7657arc_invalid_within_doloop (const rtx_insn *insn)
7658{
7659  if (CALL_P (insn))
7660    return "Function call in the loop.";
7661
7662  /* FIXME! add here all the ZOL exceptions.  */
7663  return NULL;
7664}
7665
7666/* Return the next active insn, skiping the inline assembly code.  */
7667
7668static rtx_insn *
7669arc_active_insn (rtx_insn *insn)
7670{
7671  while (insn)
7672    {
7673      insn = NEXT_INSN (insn);
7674      if (insn == 0
7675	  || (active_insn_p (insn)
7676	      && NONDEBUG_INSN_P (insn)
7677	      && !NOTE_P (insn)
7678	      && GET_CODE (PATTERN (insn)) != UNSPEC_VOLATILE
7679	      && GET_CODE (PATTERN (insn)) != PARALLEL))
7680	break;
7681    }
7682  return insn;
7683}
7684
7685/* Search for a sequence made out of two stores and a given number of
7686   loads, insert a nop if required.  */
7687
7688static void
7689check_store_cacheline_hazard (void)
7690{
7691  rtx_insn *insn, *succ0, *insn1;
7692  bool found = false;
7693
7694  for (insn = get_insns (); insn; insn = arc_active_insn (insn))
7695    {
7696      succ0 = arc_active_insn (insn);
7697
7698      if (!succ0)
7699	return;
7700
7701      if (!single_set (insn))
7702	continue;
7703
7704      if ((get_attr_type (insn) != TYPE_STORE))
7705	continue;
7706
7707      /* Found at least two consecutive stores.  Goto the end of the
7708	 store sequence.  */
7709      for (insn1 = succ0; insn1; insn1 = arc_active_insn (insn1))
7710	if (!single_set (insn1) || get_attr_type (insn1) != TYPE_STORE)
7711	  break;
7712
7713      /* Save were we are.  */
7714      succ0 = insn1;
7715
7716      /* Now, check the next two instructions for the following cases:
7717         1. next instruction is a LD => insert 2 nops between store
7718	    sequence and load.
7719	 2. next-next instruction is a LD => inset 1 nop after the store
7720	    sequence.  */
7721      if (insn1 && single_set (insn1)
7722	  && (get_attr_type (insn1) == TYPE_LOAD))
7723	{
7724	  found = true;
7725	  emit_insn_before (gen_nopv (), insn1);
7726	  emit_insn_before (gen_nopv (), insn1);
7727	}
7728      else
7729	{
7730	  if (insn1 && (get_attr_type (insn1) == TYPE_COMPARE))
7731	    {
7732	      /* REG_SAVE_NOTE is used by Haifa scheduler, we are in
7733		 reorg, so it is safe to reuse it for avoiding the
7734		 current compare insn to be part of a BRcc
7735		 optimization.  */
7736	      add_reg_note (insn1, REG_SAVE_NOTE, GEN_INT (3));
7737	    }
7738	  insn1 = arc_active_insn (insn1);
7739	  if (insn1 && single_set (insn1)
7740	      && (get_attr_type (insn1) == TYPE_LOAD))
7741	    {
7742	      found = true;
7743	      emit_insn_before (gen_nopv (), insn1);
7744	    }
7745	}
7746
7747      if (found)
7748	{
7749	  insn = insn1;
7750	  found = false;
7751	}
7752      else
7753	insn = succ0;
7754    }
7755}
7756
7757/* Return true if a load instruction (CONSUMER) uses the same address as a
7758   store instruction (PRODUCER).  This function is used to avoid st/ld
7759   address hazard in ARC700 cores.  */
7760
7761static bool
7762arc_store_addr_hazard_internal_p (rtx_insn* producer, rtx_insn* consumer)
7763{
7764  rtx in_set, out_set;
7765  rtx out_addr, in_addr;
7766
7767  if (!producer)
7768    return false;
7769
7770  if (!consumer)
7771    return false;
7772
7773  /* Peel the producer and the consumer for the address.  */
7774  out_set = single_set (producer);
7775  if (out_set)
7776    {
7777      out_addr = SET_DEST (out_set);
7778      if (!out_addr)
7779	return false;
7780      if (GET_CODE (out_addr) == ZERO_EXTEND
7781	  || GET_CODE (out_addr) == SIGN_EXTEND)
7782	out_addr = XEXP (out_addr, 0);
7783
7784      if (!MEM_P (out_addr))
7785	return false;
7786
7787      in_set = single_set (consumer);
7788      if (in_set)
7789	{
7790	  in_addr = SET_SRC (in_set);
7791	  if (!in_addr)
7792	    return false;
7793	  if (GET_CODE (in_addr) == ZERO_EXTEND
7794	      || GET_CODE (in_addr) == SIGN_EXTEND)
7795	    in_addr = XEXP (in_addr, 0);
7796
7797	  if (!MEM_P (in_addr))
7798	    return false;
7799	  /* Get rid of the MEM and check if the addresses are
7800	     equivalent.  */
7801	  in_addr = XEXP (in_addr, 0);
7802	  out_addr = XEXP (out_addr, 0);
7803
7804	  return exp_equiv_p (in_addr, out_addr, 0, true);
7805	}
7806    }
7807  return false;
7808}
7809
7810/* Return TRUE is we have an store address hazard.  */
7811
7812bool
7813arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
7814{
7815  if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
7816    return true;
7817  return arc_store_addr_hazard_internal_p (producer, consumer);
7818}
7819
7820/* The same functionality as arc_hazard.  It is called in machine
7821   reorg before any other optimization.  Hence, the NOP size is taken
7822   into account when doing branch shortening.  */
7823
7824static void
7825workaround_arc_anomaly (void)
7826{
7827  rtx_insn *insn, *succ0;
7828
7829  /* For any architecture: call arc_hazard here.  */
7830  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7831    {
7832      succ0 = next_real_insn (insn);
7833      if (arc_hazard (insn, succ0))
7834	{
7835	  emit_insn_before (gen_nopv (), succ0);
7836	}
7837    }
7838
7839  if (!TARGET_ARC700)
7840    return;
7841
7842  /* Old A7 are suffering of a cache hazard, and we need to insert two
7843     nops between any sequence of stores and a load.  */
7844  if (arc_tune != ARC_TUNE_ARC7XX)
7845    check_store_cacheline_hazard ();
7846}
7847
7848/* A callback for the hw-doloop pass.  Called when a loop we have discovered
7849   turns out not to be optimizable; we have to split the loop_end pattern into
7850   a subtract and a test.  */
7851
7852static void
7853hwloop_fail (hwloop_info loop)
7854{
7855  rtx test;
7856  rtx insn = loop->loop_end;
7857
7858  if (TARGET_DBNZ
7859      && (loop->length && (loop->length <= ARC_MAX_LOOP_LENGTH))
7860      && REG_P (loop->iter_reg))
7861    {
7862      /* TARGET_V2 core3 has dbnz instructions.  */
7863      test = gen_dbnz (loop->iter_reg, loop->start_label);
7864      insn = emit_jump_insn_before (test, loop->loop_end);
7865    }
7866  else if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg) == LP_COUNT))
7867    {
7868      /* We have the lp_count as loop iterator, try to use it.  */
7869      emit_insn_before (gen_loop_fail (), loop->loop_end);
7870      test = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG),
7871			 const0_rtx);
7872      test = gen_rtx_IF_THEN_ELSE (VOIDmode, test,
7873				   gen_rtx_LABEL_REF (Pmode, loop->start_label),
7874				   pc_rtx);
7875      insn = emit_jump_insn_before (gen_rtx_SET (pc_rtx, test),
7876				     loop->loop_end);
7877    }
7878  else
7879    {
7880      emit_insn_before (gen_addsi3 (loop->iter_reg,
7881				    loop->iter_reg,
7882				    constm1_rtx),
7883			loop->loop_end);
7884      test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
7885      insn = emit_jump_insn_before (gen_cbranchsi4 (test,
7886						    loop->iter_reg,
7887						    const0_rtx,
7888						    loop->start_label),
7889				    loop->loop_end);
7890    }
7891  JUMP_LABEL (insn) = loop->start_label;
7892  LABEL_NUSES (loop->start_label)++;
7893  delete_insn (loop->loop_end);
7894}
7895
7896/* Return the next insn after INSN that is not a NOTE, but stop the
7897   search before we enter another basic block.  This routine does not
7898   look inside SEQUENCEs.  */
7899
7900static rtx_insn *
7901next_nonnote_insn_bb (rtx_insn *insn)
7902{
7903  while (insn)
7904    {
7905      insn = NEXT_INSN (insn);
7906      if (insn == 0 || !NOTE_P (insn))
7907	break;
7908      if (NOTE_INSN_BASIC_BLOCK_P (insn))
7909	return NULL;
7910    }
7911
7912  return insn;
7913}
7914
7915/* Optimize LOOP.  */
7916
7917static bool
7918hwloop_optimize (hwloop_info loop)
7919{
7920  int i;
7921  edge entry_edge;
7922  basic_block entry_bb, bb;
7923  rtx iter_reg;
7924  rtx_insn *insn, *seq, *entry_after, *last_insn, *end_label;
7925  unsigned int length;
7926  bool need_fix = false;
7927  rtx lp_reg = gen_rtx_REG (SImode, LP_COUNT);
7928
7929  if (loop->depth > 1)
7930    {
7931      if (dump_file)
7932	fprintf (dump_file, ";; loop %d is not innermost\n",
7933		 loop->loop_no);
7934      return false;
7935    }
7936
7937  if (!loop->incoming_dest)
7938    {
7939      if (dump_file)
7940	fprintf (dump_file, ";; loop %d has more than one entry\n",
7941		 loop->loop_no);
7942      return false;
7943    }
7944
7945  if (loop->incoming_dest != loop->head)
7946    {
7947      if (dump_file)
7948	fprintf (dump_file, ";; loop %d is not entered from head\n",
7949		 loop->loop_no);
7950      return false;
7951    }
7952
7953  if (loop->has_call || loop->has_asm)
7954    {
7955      if (dump_file)
7956	fprintf (dump_file, ";; loop %d has invalid insn\n",
7957		 loop->loop_no);
7958      return false;
7959    }
7960
7961  /* Scan all the blocks to make sure they don't use iter_reg.  */
7962  if (loop->iter_reg_used || loop->iter_reg_used_outside)
7963    {
7964      if (dump_file)
7965	fprintf (dump_file, ";; loop %d uses iterator\n",
7966		 loop->loop_no);
7967      return false;
7968    }
7969
7970  /* Check if start_label appears before doloop_end.  */
7971  length = 0;
7972  for (insn = loop->start_label;
7973       insn && insn != loop->loop_end;
7974       insn = NEXT_INSN (insn))
7975    {
7976      length += NONDEBUG_INSN_P (insn) ? get_attr_length (insn) : 0;
7977      if (JUMP_TABLES_IN_TEXT_SECTION
7978	  && JUMP_TABLE_DATA_P (insn))
7979	{
7980	  if (dump_file)
7981	    fprintf (dump_file, ";; loop %d has a jump table\n",
7982		     loop->loop_no);
7983	  return false;
7984	}
7985    }
7986
7987  if (!insn)
7988    {
7989      if (dump_file)
7990	fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
7991		 loop->loop_no);
7992      return false;
7993    }
7994
7995  loop->length = length;
7996  if (loop->length > ARC_MAX_LOOP_LENGTH)
7997    {
7998      if (dump_file)
7999	fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
8000      return false;
8001    }
8002  else if (!loop->length)
8003    {
8004      if (dump_file)
8005	fprintf (dump_file, ";; loop %d is empty\n", loop->loop_no);
8006      return false;
8007    }
8008
8009  /* Check if we use a register or not.	 */
8010  if (!REG_P (loop->iter_reg))
8011    {
8012      if (dump_file)
8013	fprintf (dump_file, ";; loop %d iterator is MEM\n",
8014		 loop->loop_no);
8015      return false;
8016    }
8017
8018  /* Check if we use a register or not.	 */
8019  if (!REG_P (loop->iter_reg))
8020    {
8021      if (dump_file)
8022	fprintf (dump_file, ";; loop %d iterator is MEM\n",
8023		 loop->loop_no);
8024      return false;
8025    }
8026
8027  /* Check if loop register is lpcount.  */
8028  if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg)) != LP_COUNT)
8029    {
8030      if (dump_file)
8031        fprintf (dump_file, ";; loop %d doesn't use lp_count as loop"
8032		 " iterator\n",
8033                 loop->loop_no);
8034      /* This loop doesn't use the lp_count, check though if we can
8035	 fix it.  */
8036      if (TEST_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT)
8037	  /* In very unique cases we may have LP_COUNT alive.  */
8038	  || (loop->incoming_src
8039	      && REGNO_REG_SET_P (df_get_live_out (loop->incoming_src),
8040				  LP_COUNT)))
8041	{
8042	  if (dump_file)
8043	    fprintf (dump_file, ";; loop %d, lp_count is alive", loop->loop_no);
8044	  return false;
8045	}
8046      else
8047	need_fix = true;
8048    }
8049
8050  /* Check for control like instruction as the last instruction of a
8051     ZOL.  */
8052  bb = loop->tail;
8053  last_insn = PREV_INSN (loop->loop_end);
8054
8055  while (1)
8056    {
8057      for (; last_insn != BB_HEAD (bb);
8058	   last_insn = PREV_INSN (last_insn))
8059	if (NONDEBUG_INSN_P (last_insn))
8060	  break;
8061
8062      if (last_insn != BB_HEAD (bb))
8063	break;
8064
8065      if (single_pred_p (bb)
8066	  && single_pred_edge (bb)->flags & EDGE_FALLTHRU
8067	  && single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun))
8068	{
8069	  bb = single_pred (bb);
8070	  last_insn = BB_END (bb);
8071	  continue;
8072	}
8073      else
8074	{
8075	  last_insn = NULL;
8076	  break;
8077	}
8078    }
8079
8080  if (!last_insn)
8081    {
8082      if (dump_file)
8083	fprintf (dump_file, ";; loop %d has no last instruction\n",
8084		 loop->loop_no);
8085      return false;
8086    }
8087
8088  if ((TARGET_ARC600_FAMILY || TARGET_HS)
8089      && INSN_P (last_insn)
8090      && (JUMP_P (last_insn) || CALL_P (last_insn)
8091	  || GET_CODE (PATTERN (last_insn)) == SEQUENCE
8092	  /* At this stage we can have (insn (clobber (mem:BLK
8093	     (reg)))) instructions, ignore them.  */
8094	  || (GET_CODE (PATTERN (last_insn)) != CLOBBER
8095	      && (get_attr_type (last_insn) == TYPE_BRCC
8096		  || get_attr_type (last_insn) == TYPE_BRCC_NO_DELAY_SLOT))))
8097    {
8098      if (loop->length + 2 > ARC_MAX_LOOP_LENGTH)
8099	{
8100	  if (dump_file)
8101	    fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
8102	  return false;
8103	}
8104      if (dump_file)
8105	fprintf (dump_file, ";; loop %d has a control like last insn; "
8106		 "add a nop\n",
8107		 loop->loop_no);
8108
8109      last_insn = emit_insn_after (gen_nopv (), last_insn);
8110    }
8111
8112  if (LABEL_P (last_insn))
8113    {
8114      if (dump_file)
8115	fprintf (dump_file, ";; loop %d has a label as last insn; "
8116		 "add a nop\n",
8117		 loop->loop_no);
8118      last_insn = emit_insn_after (gen_nopv (), last_insn);
8119    }
8120
8121  /* SAVE_NOTE is used by haifa scheduler.  However, we are after it
8122     and we can use it to indicate the last ZOL instruction cannot be
8123     part of a delay slot.  */
8124  add_reg_note (last_insn, REG_SAVE_NOTE, GEN_INT (2));
8125
8126  loop->last_insn = last_insn;
8127
8128  /* Get the loop iteration register.  */
8129  iter_reg = loop->iter_reg;
8130
8131  gcc_assert (REG_P (iter_reg));
8132
8133  entry_edge = NULL;
8134
8135  FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
8136    if (entry_edge->flags & EDGE_FALLTHRU)
8137      break;
8138
8139  if (entry_edge == NULL)
8140    {
8141      if (dump_file)
8142	fprintf (dump_file, ";; loop %d has no fallthru edge jumping "
8143		 "into the loop\n",
8144		 loop->loop_no);
8145      return false;
8146    }
8147  /* The loop is good.  */
8148  end_label = gen_label_rtx ();
8149  loop->end_label = end_label;
8150
8151  /* Place the zero_cost_loop_start instruction before the loop.  */
8152  entry_bb = entry_edge->src;
8153
8154  start_sequence ();
8155
8156  if (need_fix)
8157    {
8158      /* The loop uses a R-register, but the lp_count is free, thus
8159	 use lp_count.  */
8160      emit_insn (gen_rtx_SET (lp_reg, iter_reg));
8161      SET_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT);
8162      iter_reg = lp_reg;
8163      if (dump_file)
8164	{
8165	  fprintf (dump_file, ";; fix loop %d to use lp_count\n",
8166		   loop->loop_no);
8167	}
8168    }
8169
8170  insn = emit_insn (gen_arc_lp (loop->start_label,
8171				loop->end_label));
8172
8173  seq = get_insns ();
8174  end_sequence ();
8175
8176  entry_after = BB_END (entry_bb);
8177  if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1
8178      || !entry_after)
8179    {
8180      basic_block new_bb;
8181      edge e;
8182      edge_iterator ei;
8183
8184      emit_insn_before (seq, BB_HEAD (loop->head));
8185      seq = emit_label_before (gen_label_rtx (), seq);
8186      new_bb = create_basic_block (seq, insn, entry_bb);
8187      FOR_EACH_EDGE (e, ei, loop->incoming)
8188	{
8189	  if (!(e->flags & EDGE_FALLTHRU))
8190	    redirect_edge_and_branch_force (e, new_bb);
8191	  else
8192	    redirect_edge_succ (e, new_bb);
8193	}
8194
8195      make_edge (new_bb, loop->head, 0);
8196    }
8197  else
8198    {
8199#if 0
8200      while (DEBUG_INSN_P (entry_after)
8201	     || (NOTE_P (entry_after)
8202		 && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK
8203		 /* Make sure we don't split a call and its corresponding
8204		    CALL_ARG_LOCATION note.  */
8205		 && NOTE_KIND (entry_after) != NOTE_INSN_CALL_ARG_LOCATION))
8206        entry_after = NEXT_INSN (entry_after);
8207#endif
8208      entry_after = next_nonnote_insn_bb (entry_after);
8209
8210      gcc_assert (entry_after);
8211      emit_insn_before (seq, entry_after);
8212    }
8213
8214  /* Insert the loop end label before the last instruction of the
8215     loop.  */
8216  emit_label_after (end_label, loop->last_insn);
8217  /* Make sure we mark the begining and end label as used.  */
8218  LABEL_NUSES (loop->end_label)++;
8219  LABEL_NUSES (loop->start_label)++;
8220
8221  return true;
8222}
8223
8224/* A callback for the hw-doloop pass.  This function examines INSN; if
8225   it is a loop_end pattern we recognize, return the reg rtx for the
8226   loop counter.  Otherwise, return NULL_RTX.  */
8227
8228static rtx
8229hwloop_pattern_reg (rtx_insn *insn)
8230{
8231  rtx reg;
8232
8233  if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
8234    return NULL_RTX;
8235
8236  reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
8237  if (!REG_P (reg))
8238    return NULL_RTX;
8239  return reg;
8240}
8241
8242static struct hw_doloop_hooks arc_doloop_hooks =
8243{
8244  hwloop_pattern_reg,
8245  hwloop_optimize,
8246  hwloop_fail
8247};
8248
8249/* Run from machine_dependent_reorg, this pass looks for doloop_end insns
8250   and tries to rewrite the RTL of these loops so that proper Blackfin
8251   hardware loops are generated.  */
8252
8253static void
8254arc_reorg_loops (void)
8255{
8256  reorg_loops (true, &arc_doloop_hooks);
8257}
8258
8259/* Scan all calls and add symbols to be emitted in the jli section if
8260   needed.  */
8261
8262static void
8263jli_call_scan (void)
8264{
8265  rtx_insn *insn;
8266
8267  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8268    {
8269      if (!CALL_P (insn))
8270	continue;
8271
8272      rtx pat = PATTERN (insn);
8273      if (GET_CODE (pat) == COND_EXEC)
8274	pat = COND_EXEC_CODE (pat);
8275      pat =  XVECEXP (pat, 0, 0);
8276      if (GET_CODE (pat) == SET)
8277	pat = SET_SRC (pat);
8278
8279      pat = XEXP (XEXP (pat, 0), 0);
8280      if (GET_CODE (pat) == SYMBOL_REF
8281	  && arc_is_jli_call_p (pat))
8282	arc_add_jli_section (pat);
8283    }
8284}
8285
8286/* Add padding if necessary to avoid a mispredict.  A return could
8287   happen immediately after the function start.  A call/return and
8288   return/return must be 6 bytes apart to avoid mispredict.  */
8289
8290static void
8291pad_return (void)
8292{
8293  rtx_insn *insn;
8294  long offset;
8295
8296  if (!TARGET_PAD_RETURN)
8297    return;
8298
8299  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8300    {
8301      rtx_insn *prev0 = prev_active_insn (insn);
8302      bool wantlong = false;
8303
8304      if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) != SIMPLE_RETURN)
8305	continue;
8306
8307      if (!prev0)
8308	{
8309	  prev0 = emit_insn_before (gen_nopv (), insn);
8310	  /* REG_SAVE_NOTE is used by Haifa scheduler, we are in reorg
8311	     so it is safe to reuse it for forcing a particular length
8312	     for an instruction.  */
8313	  add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
8314	  emit_insn_before (gen_nopv (), insn);
8315	  continue;
8316	}
8317      offset = get_attr_length (prev0);
8318
8319      if (get_attr_length (prev0) == 2
8320	  && get_attr_iscompact (prev0) != ISCOMPACT_TRUE)
8321	{
8322	  /* Force long version of the insn.  */
8323	  wantlong = true;
8324	  offset += 2;
8325	}
8326
8327     rtx_insn *prev = prev_active_insn (prev0);
8328      if (prev)
8329	offset += get_attr_length (prev);
8330
8331      prev = prev_active_insn (prev);
8332      if (prev)
8333	offset += get_attr_length (prev);
8334
8335      switch (offset)
8336	{
8337	case 2:
8338	  prev = emit_insn_before (gen_nopv (), insn);
8339	  add_reg_note (prev, REG_SAVE_NOTE, GEN_INT (1));
8340	  break;
8341	case 4:
8342	  emit_insn_before (gen_nopv (), insn);
8343	  break;
8344	default:
8345	  continue;
8346	}
8347
8348      if (wantlong)
8349	add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
8350
8351      /* Emit a blockage to avoid delay slot scheduling.  */
8352      emit_insn_before (gen_blockage (), insn);
8353    }
8354}
8355
8356static int arc_reorg_in_progress = 0;
8357
8358/* ARC's machince specific reorg function.  */
8359
8360static void
8361arc_reorg (void)
8362{
8363  rtx_insn *insn;
8364  rtx pattern;
8365  rtx pc_target;
8366  long offset;
8367  int changed;
8368
8369  cfun->machine->arc_reorg_started = 1;
8370  arc_reorg_in_progress = 1;
8371
8372  compute_bb_for_insn ();
8373
8374  df_analyze ();
8375
8376  /* Doloop optimization.  */
8377  arc_reorg_loops ();
8378
8379  workaround_arc_anomaly ();
8380  jli_call_scan ();
8381  pad_return ();
8382
8383/* FIXME: should anticipate ccfsm action, generate special patterns for
8384   to-be-deleted branches that have no delay slot and have at least the
8385   length of the size increase forced on other insns that are conditionalized.
8386   This can also have an insn_list inside that enumerates insns which are
8387   not actually conditionalized because the destinations are dead in the
8388   not-execute case.
8389   Could also tag branches that we want to be unaligned if they get no delay
8390   slot, or even ones that we don't want to do delay slot sheduling for
8391   because we can unalign them.
8392
8393   However, there are cases when conditional execution is only possible after
8394   delay slot scheduling:
8395
8396   - If a delay slot is filled with a nocond/set insn from above, the previous
8397     basic block can become elegible for conditional execution.
8398   - If a delay slot is filled with a nocond insn from the fall-through path,
8399     the branch with that delay slot can become eligble for conditional
8400     execution (however, with the same sort of data flow analysis that dbr
8401     does, we could have figured out before that we don't need to
8402     conditionalize this insn.)
8403     - If a delay slot insn is filled with an insn from the target, the
8404       target label gets its uses decremented (even deleted if falling to zero),
8405   thus possibly creating more condexec opportunities there.
8406   Therefore, we should still be prepared to apply condexec optimization on
8407   non-prepared branches if the size increase of conditionalized insns is no
8408   more than the size saved from eliminating the branch.  An invocation option
8409   could also be used to reserve a bit of extra size for condbranches so that
8410   this'll work more often (could also test in arc_reorg if the block is
8411   'close enough' to be eligible for condexec to make this likely, and
8412   estimate required size increase).  */
8413  /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible.  */
8414  if (TARGET_NO_BRCC_SET)
8415    return;
8416
8417  do
8418    {
8419      init_insn_lengths();
8420      changed = 0;
8421
8422      if (optimize > 1 && !TARGET_NO_COND_EXEC)
8423	{
8424	  arc_ifcvt ();
8425	  unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish;
8426	  df_finish_pass ((flags & TODO_df_verify) != 0);
8427
8428	  if (dump_file)
8429	    {
8430	      fprintf (dump_file, ";; After if conversion:\n\n");
8431	      print_rtl (dump_file, get_insns ());
8432	    }
8433	}
8434
8435      /* Call shorten_branches to calculate the insn lengths.  */
8436      shorten_branches (get_insns());
8437      cfun->machine->ccfsm_current_insn = NULL_RTX;
8438
8439      if (!INSN_ADDRESSES_SET_P())
8440	  fatal_error (input_location,
8441		       "insn addresses not set after shorten branches");
8442
8443      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8444	{
8445	  rtx label;
8446	  enum attr_type insn_type;
8447
8448	  /* If a non-jump insn (or a casesi jump table), continue.  */
8449	  if (GET_CODE (insn) != JUMP_INSN ||
8450	      GET_CODE (PATTERN (insn)) == ADDR_VEC
8451	      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
8452	    continue;
8453
8454	  /* If we already have a brcc, note if it is suitable for brcc_s.
8455	     Be a bit generous with the brcc_s range so that we can take
8456	     advantage of any code shortening from delay slot scheduling.  */
8457	  if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch)
8458	    {
8459	      rtx pat = PATTERN (insn);
8460	      rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0);
8461	      rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0);
8462
8463	      offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
8464	      if ((offset >= -140 && offset < 140)
8465		  && rtx_equal_p (XEXP (op, 1), const0_rtx)
8466		  && compact_register_operand (XEXP (op, 0), VOIDmode)
8467		  && equality_comparison_operator (op, VOIDmode))
8468		PUT_MODE (*ccp, CC_Zmode);
8469	      else if (GET_MODE (*ccp) == CC_Zmode)
8470		PUT_MODE (*ccp, CC_ZNmode);
8471	      continue;
8472	    }
8473	  if ((insn_type =  get_attr_type (insn)) == TYPE_BRCC
8474	      || insn_type == TYPE_BRCC_NO_DELAY_SLOT)
8475	    continue;
8476
8477	  /* OK. so we have a jump insn.  */
8478	  /* We need to check that it is a bcc.  */
8479	  /* Bcc => set (pc) (if_then_else ) */
8480	  pattern = PATTERN (insn);
8481	  if (GET_CODE (pattern) != SET
8482	      || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
8483	      || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1)))
8484	    continue;
8485
8486	  /* Now check if the jump is beyond the s9 range.  */
8487	  if (CROSSING_JUMP_P (insn))
8488	    continue;
8489	  offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
8490
8491	  if(offset > 253 || offset < -254)
8492	    continue;
8493
8494	  pc_target = SET_SRC (pattern);
8495
8496	  /* Avoid FPU instructions.  */
8497	  if ((GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPUmode)
8498	      || (GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPUEmode)
8499	      || (GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPU_UNEQmode))
8500	    continue;
8501
8502	  /* Now go back and search for the set cc insn.  */
8503
8504	  label = XEXP (pc_target, 1);
8505
8506	    {
8507	      rtx pat;
8508	      rtx_insn *scan, *link_insn = NULL;
8509
8510	      for (scan = PREV_INSN (insn);
8511		   scan && GET_CODE (scan) != CODE_LABEL;
8512		   scan = PREV_INSN (scan))
8513		{
8514		  if (! INSN_P (scan))
8515		    continue;
8516		  pat = PATTERN (scan);
8517		  if (GET_CODE (pat) == SET
8518		      && cc_register (SET_DEST (pat), VOIDmode))
8519		    {
8520		      link_insn = scan;
8521		      break;
8522		    }
8523		}
8524	      if (!link_insn)
8525		continue;
8526	      else
8527		{
8528		  /* Check if this is a data dependency.  */
8529		  rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
8530		  rtx cmp0, cmp1;
8531
8532		  /* Make sure we can use it for brcc insns.  */
8533		  if (find_reg_note (link_insn, REG_SAVE_NOTE, GEN_INT (3)))
8534		    continue;
8535
8536		  /* Ok this is the set cc. copy args here.  */
8537		  op = XEXP (pc_target, 0);
8538
8539		  op0 = cmp0 = XEXP (SET_SRC (pat), 0);
8540		  op1 = cmp1 = XEXP (SET_SRC (pat), 1);
8541		  if (GET_CODE (op0) == ZERO_EXTRACT
8542		      && XEXP (op0, 1) == const1_rtx
8543		      && (GET_CODE (op) == EQ
8544			  || GET_CODE (op) == NE))
8545		    {
8546		      /* btst / b{eq,ne} -> bbit{0,1} */
8547		      op0 = XEXP (cmp0, 0);
8548		      op1 = XEXP (cmp0, 2);
8549		    }
8550		  else if (!register_operand (op0, VOIDmode)
8551			  || !general_operand (op1, VOIDmode))
8552		    continue;
8553		  /* Be careful not to break what cmpsfpx_raw is
8554		     trying to create for checking equality of
8555		     single-precision floats.  */
8556		  else if (TARGET_SPFP
8557			   && GET_MODE (op0) == SFmode
8558			   && GET_MODE (op1) == SFmode)
8559		    continue;
8560
8561		  /* None of the two cmp operands should be set between the
8562		     cmp and the branch.  */
8563		  if (reg_set_between_p (op0, link_insn, insn))
8564		    continue;
8565
8566		  if (reg_set_between_p (op1, link_insn, insn))
8567		    continue;
8568
8569		  /* Since the MODE check does not work, check that this is
8570		     CC reg's last set location before insn, and also no
8571		     instruction between the cmp and branch uses the
8572		     condition codes.  */
8573		  if ((reg_set_between_p (SET_DEST (pat), link_insn, insn))
8574		      || (reg_used_between_p (SET_DEST (pat), link_insn, insn)))
8575		    continue;
8576
8577		  /* CC reg should be dead after insn.  */
8578		  if (!find_regno_note (insn, REG_DEAD, CC_REG))
8579		    continue;
8580
8581		  op = gen_rtx_fmt_ee (GET_CODE (op),
8582				       GET_MODE (op), cmp0, cmp1);
8583		  /* If we create a LIMM where there was none before,
8584		     we only benefit if we can avoid a scheduling bubble
8585		     for the ARC600.  Otherwise, we'd only forgo chances
8586		     at short insn generation, and risk out-of-range
8587		     branches.  */
8588		  if (!brcc_nolimm_operator (op, VOIDmode)
8589		      && !long_immediate_operand (op1, VOIDmode)
8590		      && (TARGET_ARC700
8591			  || (TARGET_V2 && optimize_size)
8592			  || next_active_insn (link_insn) != insn))
8593		    continue;
8594
8595		  /* Emit bbit / brcc (or brcc_s if possible).
8596		     CC_Zmode indicates that brcc_s is possible.  */
8597
8598		  if (op0 != cmp0)
8599		    cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG);
8600		  else if ((offset >= -140 && offset < 140)
8601			   && rtx_equal_p (op1, const0_rtx)
8602			   && compact_register_operand (op0, VOIDmode)
8603			   && (GET_CODE (op) == EQ
8604			       || GET_CODE (op) == NE))
8605		    cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG);
8606		  else
8607		    cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG);
8608
8609		  brcc_insn
8610		    = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx);
8611		  brcc_insn = gen_rtx_SET (pc_rtx, brcc_insn);
8612		  cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx);
8613		  brcc_insn
8614		    = gen_rtx_PARALLEL
8615			(VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx));
8616		  brcc_insn = emit_jump_insn_before (brcc_insn, insn);
8617
8618		  JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn);
8619		  note = find_reg_note (insn, REG_BR_PROB, 0);
8620		  if (note)
8621		    {
8622		      XEXP (note, 1) = REG_NOTES (brcc_insn);
8623		      REG_NOTES (brcc_insn) = note;
8624		    }
8625		  note = find_reg_note (link_insn, REG_DEAD, op0);
8626		  if (note)
8627		    {
8628		      remove_note (link_insn, note);
8629		      XEXP (note, 1) = REG_NOTES (brcc_insn);
8630		      REG_NOTES (brcc_insn) = note;
8631		    }
8632		  note = find_reg_note (link_insn, REG_DEAD, op1);
8633		  if (note)
8634		    {
8635		      XEXP (note, 1) = REG_NOTES (brcc_insn);
8636		      REG_NOTES (brcc_insn) = note;
8637		    }
8638
8639		  changed = 1;
8640
8641		  /* Delete the bcc insn.  */
8642		  set_insn_deleted (insn);
8643
8644		  /* Delete the cmp insn.  */
8645		  set_insn_deleted (link_insn);
8646
8647		}
8648	    }
8649	}
8650      /* Clear out insn_addresses.  */
8651      INSN_ADDRESSES_FREE ();
8652
8653    } while (changed);
8654
8655  if (INSN_ADDRESSES_SET_P())
8656    fatal_error (input_location, "insn addresses not freed");
8657
8658  arc_reorg_in_progress = 0;
8659}
8660
8661 /* Check if the operands are valid for BRcc.d generation
8662    Valid Brcc.d patterns are
8663        Brcc.d b, c, s9
8664        Brcc.d b, u6, s9
8665
8666        For cc={GT, LE, GTU, LEU}, u6=63 cannot be allowed,
8667      since they are encoded by the assembler as {GE, LT, HS, LS} 64, which
8668      does not have a delay slot
8669
8670  Assumed precondition: Second operand is either a register or a u6 value.  */
8671
8672bool
8673valid_brcc_with_delay_p (rtx *operands)
8674{
8675  if (optimize_size && GET_MODE (operands[4]) == CC_Zmode)
8676    return false;
8677  return brcc_nolimm_operator (operands[0], VOIDmode);
8678}
8679
8680/* Implement TARGET_IN_SMALL_DATA_P.  Return true if it would be safe to
8681   access DECL using %gp_rel(...)($gp).  */
8682
8683static bool
8684arc_in_small_data_p (const_tree decl)
8685{
8686  HOST_WIDE_INT size;
8687  tree attr;
8688
8689  /* Only variables are going into small data area.  */
8690  if (TREE_CODE (decl) != VAR_DECL)
8691    return false;
8692
8693  if (TARGET_NO_SDATA_SET)
8694    return false;
8695
8696  /* Disable sdata references to weak variables.  */
8697  if (DECL_WEAK (decl))
8698    return false;
8699
8700  /* Don't put constants into the small data section: we want them to
8701     be in ROM rather than RAM.  */
8702  if (TREE_READONLY (decl))
8703    return false;
8704
8705  /* To ensure -mvolatile-cache works ld.di does not have a
8706     gp-relative variant.  */
8707  if (!TARGET_VOLATILE_CACHE_SET
8708      && TREE_THIS_VOLATILE (decl))
8709    return false;
8710
8711  /* Likewise for uncached data.  */
8712  attr = TYPE_ATTRIBUTES (TREE_TYPE (decl));
8713  if (lookup_attribute ("uncached", attr))
8714    return false;
8715
8716  /* and for aux regs.  */
8717  attr = DECL_ATTRIBUTES (decl);
8718  if (lookup_attribute ("aux", attr))
8719    return false;
8720
8721  if (DECL_SECTION_NAME (decl) != 0)
8722    {
8723      const char *name = DECL_SECTION_NAME (decl);
8724      if (strcmp (name, ".sdata") == 0
8725	  || strcmp (name, ".sbss") == 0)
8726	return true;
8727    }
8728  /* If it's not public, there's no need to put it in the small data
8729     section.  */
8730  else if (TREE_PUBLIC (decl))
8731    {
8732      size = int_size_in_bytes (TREE_TYPE (decl));
8733      return (size > 0 && size <= g_switch_value);
8734    }
8735  return false;
8736}
8737
8738/* Return true if OP is an acceptable memory operand for ARCompact
8739   16-bit gp-relative load instructions.
8740*/
8741/* volatile cache option still to be handled.  */
8742
8743bool
8744compact_sda_memory_operand (rtx op, machine_mode mode, bool short_p)
8745{
8746  rtx addr;
8747  int size;
8748  int align = 0;
8749  int mask = 0;
8750
8751  /* Eliminate non-memory operations.  */
8752  if (GET_CODE (op) != MEM)
8753    return false;
8754
8755  if (mode == VOIDmode)
8756    mode = GET_MODE (op);
8757
8758  size = GET_MODE_SIZE (mode);
8759
8760  /* dword operations really put out 2 instructions, so eliminate them.  */
8761  if (size > UNITS_PER_WORD)
8762    return false;
8763
8764  /* Decode the address now.  */
8765  addr = XEXP (op, 0);
8766
8767  if (!legitimate_small_data_address_p (addr, mode))
8768    return false;
8769
8770  if (!short_p || size == 1)
8771    return true;
8772
8773  /* Now check for the alignment, the short loads using gp require the
8774     addresses to be aligned.  */
8775  align = get_symbol_alignment (addr);
8776  switch (mode)
8777    {
8778    case E_HImode:
8779      mask = 1;
8780      break;
8781    default:
8782      mask = 3;
8783      break;
8784    }
8785
8786  if (align && ((align & mask) == 0))
8787    return true;
8788  return false;
8789}
8790
8791/* Return TRUE if PAT is accessing an aux-reg.  */
8792
8793static bool
8794arc_is_aux_reg_p (rtx pat)
8795{
8796  tree attrs = NULL_TREE;
8797  tree addr;
8798
8799  if (!MEM_P (pat))
8800    return false;
8801
8802  /* Get the memory attributes.  */
8803  addr = MEM_EXPR (pat);
8804  if (!addr)
8805    return false;
8806
8807  /* Get the attributes.  */
8808  if (TREE_CODE (addr) == VAR_DECL)
8809    attrs = DECL_ATTRIBUTES (addr);
8810  else if (TREE_CODE (addr) == MEM_REF)
8811    attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 0)));
8812  else
8813    return false;
8814
8815  if (lookup_attribute ("aux", attrs))
8816    return true;
8817  return false;
8818}
8819
8820/* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL.  */
8821
8822void
8823arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name,
8824				   unsigned HOST_WIDE_INT size,
8825				   unsigned HOST_WIDE_INT align,
8826				   unsigned HOST_WIDE_INT globalize_p)
8827{
8828  int in_small_data = arc_in_small_data_p (decl);
8829  rtx mem = decl == NULL_TREE ? NULL_RTX : DECL_RTL (decl);
8830
8831  /* Don't output aux-reg symbols.  */
8832  if (mem != NULL_RTX && MEM_P (mem)
8833      && SYMBOL_REF_P (XEXP (mem, 0))
8834      && arc_is_aux_reg_p (mem))
8835    return;
8836
8837  if (in_small_data)
8838    switch_to_section (get_named_section (NULL, ".sbss", 0));
8839  /*    named_section (0,".sbss",0); */
8840  else
8841    switch_to_section (bss_section);
8842
8843  if (globalize_p)
8844    (*targetm.asm_out.globalize_label) (stream, name);
8845
8846  ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT));
8847  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8848  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8849  ASM_OUTPUT_LABEL (stream, name);
8850
8851  if (size != 0)
8852    ASM_OUTPUT_SKIP (stream, size);
8853}
8854
8855static bool
8856arc_preserve_reload_p (rtx in)
8857{
8858  return (GET_CODE (in) == PLUS
8859	  && RTX_OK_FOR_BASE_P (XEXP (in, 0), true)
8860	  && CONST_INT_P (XEXP (in, 1))
8861	  && !((INTVAL (XEXP (in, 1)) & 511)));
8862}
8863
8864/* Implement TARGET_REGISTER_MOVE_COST.  */
8865
8866static int
8867arc_register_move_cost (machine_mode,
8868			reg_class_t from_class, reg_class_t to_class)
8869{
8870  /* Force an attempt to 'mov Dy,Dx' to spill.  */
8871  if ((TARGET_ARC700 || TARGET_EM) && TARGET_DPFP
8872      && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS)
8873    return 100;
8874
8875  return 2;
8876}
8877
8878/* Emit code for an addsi3 instruction with OPERANDS.
8879   COND_P indicates if this will use conditional execution.
8880   Return the length of the instruction.
8881   If OUTPUT_P is false, don't actually output the instruction, just return
8882   its length.  */
8883int
8884arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
8885{
8886  char format[35];
8887
8888  int match = operands_match_p (operands[0], operands[1]);
8889  int match2 = operands_match_p (operands[0], operands[2]);
8890  int intval = (REG_P (operands[2]) ? 1
8891		: CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057);
8892  int neg_intval = -intval;
8893  int short_0 = satisfies_constraint_Rcq (operands[0]);
8894  int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1]));
8895  int ret = 0;
8896
8897#define REG_H_P(OP) (REG_P (OP) && ((TARGET_V2 && REGNO (OP) <= 31	\
8898				     && REGNO (OP) != 30)		\
8899				    || !TARGET_V2))
8900
8901#define ADDSI_OUTPUT1(FORMAT) do {\
8902  if (output_p) \
8903    output_asm_insn (FORMAT, operands);\
8904  return ret; \
8905} while (0)
8906#define ADDSI_OUTPUT(LIST) do {\
8907  if (output_p) \
8908    sprintf LIST;\
8909  ADDSI_OUTPUT1 (format);\
8910  return ret; \
8911} while (0)
8912
8913  /* First try to emit a 16 bit insn.  */
8914  ret = 2;
8915  if (!cond_p
8916      /* If we are actually about to output this insn, don't try a 16 bit
8917	 variant if we already decided that we don't want that
8918	 (I.e. we upsized this insn to align some following insn.)
8919	 E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM -
8920	 but add1 r0,sp,35 doesn't.  */
8921      && (!output_p || (get_attr_length (current_output_insn) & 2)))
8922    {
8923      /* Generate add_s a,b,c; add_s b,b,u7; add_s c,b,u3; add_s b,b,h
8924	 patterns.  */
8925      if (short_p
8926	  && ((REG_H_P (operands[2])
8927	       && (match || satisfies_constraint_Rcq (operands[2])))
8928	      || (CONST_INT_P (operands[2])
8929		  && ((unsigned) intval <= (match ? 127 : 7)))))
8930	ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;1");
8931
8932      /* Generate add_s b,b,h patterns.  */
8933      if (short_0 && match2 && REG_H_P (operands[1]))
8934	ADDSI_OUTPUT1 ("add%? %0,%2,%1 ;2");
8935
8936      /* Generate add_s b,sp,u7; add_s sp,sp,u7 patterns.  */
8937      if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM)
8938	  && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124))
8939	ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;3");
8940
8941      if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7))
8942	  || (REGNO (operands[0]) == STACK_POINTER_REGNUM
8943	      && match && !(neg_intval & ~124)))
8944	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2 ;4");
8945
8946      /* Generate add_s h,h,s3 patterns.  */
8947      if (REG_H_P (operands[0]) && match && TARGET_V2
8948	  && CONST_INT_P (operands[2]) && ((intval>= -1) && (intval <= 6)))
8949	ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;5");
8950
8951      /* Generate add_s r0,b,u6; add_s r1,b,u6 patterns.  */
8952      if (TARGET_CODE_DENSITY && REG_P (operands[0]) && REG_P (operands[1])
8953	  && ((REGNO (operands[0]) == 0) || (REGNO (operands[0]) == 1))
8954	  && satisfies_constraint_Rcq (operands[1])
8955	  && satisfies_constraint_L (operands[2]))
8956	ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;6");
8957    }
8958
8959  /* Now try to emit a 32 bit insn without long immediate.  */
8960  ret = 4;
8961  if (!match && match2 && REG_P (operands[1]))
8962    ADDSI_OUTPUT1 ("add%? %0,%2,%1");
8963  if (match || !cond_p)
8964    {
8965      int limit = (match && !cond_p) ? 0x7ff : 0x3f;
8966      int range_factor = neg_intval & intval;
8967      int shift;
8968
8969      if (intval == (HOST_WIDE_INT) (HOST_WIDE_INT_M1U << 31))
8970	ADDSI_OUTPUT1 ("bxor%? %0,%1,31");
8971
8972      /* If we can use a straight add / sub instead of a {add,sub}[123] of
8973	 same size, do, so - the insn latency is lower.  */
8974      /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but
8975	 0x800 is not.  */
8976      if ((intval >= 0 && intval <= limit)
8977	       || (intval == -0x800 && limit == 0x7ff))
8978	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
8979      else if ((intval < 0 && neg_intval <= limit)
8980	       || (intval == 0x800 && limit == 0x7ff))
8981	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
8982      shift = range_factor >= 8 ? 3 : (range_factor >> 1);
8983      gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3);
8984      gcc_assert ((((1 << shift) - 1) & intval) == 0);
8985      if (((intval < 0 && intval != -0x4000)
8986	   /* sub[123] is slower than add_s / sub, only use it if it
8987	      avoids a long immediate.  */
8988	   && neg_intval <= limit << shift)
8989	  || (intval == 0x4000 && limit == 0x7ff))
8990	ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d",
8991		       shift, neg_intval >> shift));
8992      else if ((intval >= 0 && intval <= limit << shift)
8993	       || (intval == -0x4000 && limit == 0x7ff))
8994	ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift));
8995    }
8996  /* Try to emit a 16 bit opcode with long immediate.  */
8997  ret = 6;
8998  if (short_p && match)
8999    ADDSI_OUTPUT1 ("add%? %0,%1,%2");
9000
9001  /* We have to use a 32 bit opcode, and with a long immediate.  */
9002  ret = 8;
9003  ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%2");
9004}
9005
9006/* Emit code for an commutative_cond_exec instruction with OPERANDS.
9007   Return the length of the instruction.
9008   If OUTPUT_P is false, don't actually output the instruction, just return
9009   its length.  */
9010int
9011arc_output_commutative_cond_exec (rtx *operands, bool output_p)
9012{
9013  enum rtx_code commutative_op = GET_CODE (operands[3]);
9014  const char *pat = NULL;
9015
9016  /* Canonical rtl should not have a constant in the first operand position.  */
9017  gcc_assert (!CONSTANT_P (operands[1]));
9018
9019  switch (commutative_op)
9020    {
9021      case AND:
9022	if (satisfies_constraint_C1p (operands[2]))
9023	  pat = "bmsk%? %0,%1,%Z2";
9024	else if (satisfies_constraint_C2p (operands[2]))
9025	  {
9026	    operands[2] = GEN_INT ((~INTVAL (operands[2])));
9027	    pat = "bmskn%? %0,%1,%Z2";
9028	  }
9029	else if (satisfies_constraint_Ccp (operands[2]))
9030	  pat = "bclr%? %0,%1,%M2";
9031	else if (satisfies_constraint_CnL (operands[2]))
9032	  pat = "bic%? %0,%1,%n2-1";
9033	break;
9034      case IOR:
9035	if (satisfies_constraint_C0p (operands[2]))
9036	  pat = "bset%? %0,%1,%z2";
9037	break;
9038      case XOR:
9039	if (satisfies_constraint_C0p (operands[2]))
9040	  pat = "bxor%? %0,%1,%z2";
9041	break;
9042      case PLUS:
9043	return arc_output_addsi (operands, true, output_p);
9044      default: break;
9045    }
9046  if (output_p)
9047    output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands);
9048  if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2]))
9049    return 4;
9050  return 8;
9051}
9052
9053/* Helper function of arc_expand_cpymem.  ADDR points to a chunk of memory.
9054   Emit code and return an potentially modified address such that offsets
9055   up to SIZE are can be added to yield a legitimate address.
9056   if REUSE is set, ADDR is a register that may be modified.  */
9057
9058static rtx
9059force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
9060{
9061  rtx base = addr;
9062  rtx offs = const0_rtx;
9063
9064  if (GET_CODE (base) == PLUS)
9065    {
9066      offs = XEXP (base, 1);
9067      base = XEXP (base, 0);
9068    }
9069  if (!REG_P (base)
9070      || (REGNO (base) != STACK_POINTER_REGNUM
9071	  && REGNO_PTR_FRAME_P (REGNO (base)))
9072      || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs))
9073      || !SMALL_INT (INTVAL (offs) + size))
9074    {
9075      if (reuse)
9076	emit_insn (gen_add2_insn (addr, offs));
9077      else
9078	addr = copy_to_mode_reg (Pmode, addr);
9079    }
9080  return addr;
9081}
9082
9083/* Like move_by_pieces, but take account of load latency, and actual
9084   offset ranges.  Return true on success.  */
9085
9086bool
9087arc_expand_cpymem (rtx *operands)
9088{
9089  rtx dst = operands[0];
9090  rtx src = operands[1];
9091  rtx dst_addr, src_addr;
9092  HOST_WIDE_INT size;
9093  int align = INTVAL (operands[3]);
9094  unsigned n_pieces;
9095  int piece = align;
9096  rtx store[2];
9097  rtx tmpx[2];
9098  int i;
9099
9100  if (!CONST_INT_P (operands[2]))
9101    return false;
9102  size = INTVAL (operands[2]);
9103  /* move_by_pieces_ninsns is static, so we can't use it.  */
9104  if (align >= 4)
9105    {
9106      if (TARGET_LL64)
9107	n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1);
9108      else
9109	n_pieces = (size + 2) / 4U + (size & 1);
9110    }
9111  else if (align == 2)
9112    n_pieces = (size + 1) / 2U;
9113  else
9114    n_pieces = size;
9115  if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
9116    return false;
9117  /* Force 32 bit aligned and larger datum to use 64 bit transfers, if
9118     possible.  */
9119  if (TARGET_LL64 && (piece >= 4) && (size >= 8))
9120    piece = 8;
9121  else if (piece > 4)
9122    piece = 4;
9123  dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
9124  src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
9125  store[0] = store[1] = NULL_RTX;
9126  tmpx[0] = tmpx[1] = NULL_RTX;
9127  for (i = 0; size > 0; i ^= 1, size -= piece)
9128    {
9129      rtx tmp;
9130      machine_mode mode;
9131
9132      while (piece > size)
9133	piece >>= 1;
9134      mode = smallest_int_mode_for_size (piece * BITS_PER_UNIT);
9135      /* If we don't re-use temporaries, the scheduler gets carried away,
9136	 and the register pressure gets unnecessarily high.  */
9137      if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode)
9138	tmp = tmpx[i];
9139      else
9140	tmpx[i] = tmp = gen_reg_rtx (mode);
9141      dst_addr = force_offsettable (dst_addr, piece, 1);
9142      src_addr = force_offsettable (src_addr, piece, 1);
9143      if (store[i])
9144	emit_insn (store[i]);
9145      emit_move_insn (tmp, change_address (src, mode, src_addr));
9146      store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp);
9147      dst_addr = plus_constant (Pmode, dst_addr, piece);
9148      src_addr = plus_constant (Pmode, src_addr, piece);
9149    }
9150  if (store[i])
9151    emit_insn (store[i]);
9152  if (store[i^1])
9153    emit_insn (store[i^1]);
9154  return true;
9155}
9156
9157static bool
9158arc_get_aux_arg (rtx pat, int *auxr)
9159{
9160  tree attr, addr = MEM_EXPR (pat);
9161  if (TREE_CODE (addr) != VAR_DECL)
9162    return false;
9163
9164  attr = DECL_ATTRIBUTES (addr);
9165  if (lookup_attribute ("aux", attr))
9166    {
9167      tree arg = TREE_VALUE (attr);
9168      if (arg)
9169	{
9170	  *auxr = TREE_INT_CST_LOW (TREE_VALUE (arg));
9171	  return true;
9172	}
9173    }
9174
9175  return false;
9176}
9177
9178/* Prepare operands for move in MODE.  Return true iff the move has
9179   been emitted.  */
9180
9181bool
9182prepare_move_operands (rtx *operands, machine_mode mode)
9183{
9184  if ((MEM_P (operands[0]) || MEM_P (operands[1]))
9185      && SCALAR_INT_MODE_P (mode))
9186    {
9187      /* First handle aux attribute.  */
9188      if (mode == SImode)
9189	{
9190	  rtx tmp;
9191	  int auxr = 0;
9192	  if (MEM_P (operands[0]) && arc_is_aux_reg_p (operands[0]))
9193	    {
9194	      /* Save operation.  */
9195	      if (arc_get_aux_arg (operands[0], &auxr))
9196		{
9197		  tmp = gen_reg_rtx (SImode);
9198		  emit_move_insn (tmp, GEN_INT (auxr));
9199		}
9200	      else
9201		tmp = XEXP (operands[0], 0);
9202
9203	      operands[1] = force_reg (SImode, operands[1]);
9204	      emit_insn (gen_rtx_UNSPEC_VOLATILE
9205			 (VOIDmode, gen_rtvec (2, operands[1], tmp),
9206			  VUNSPEC_ARC_SR));
9207	      return true;
9208	    }
9209	  if (MEM_P (operands[1]) && arc_is_aux_reg_p (operands[1]))
9210	    {
9211	      if (arc_get_aux_arg (operands[1], &auxr))
9212		{
9213		  tmp = gen_reg_rtx (SImode);
9214		  emit_move_insn (tmp, GEN_INT (auxr));
9215		}
9216	      else
9217		{
9218		  tmp = XEXP (operands[1], 0);
9219		  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
9220		}
9221	      /* Load operation.  */
9222	      gcc_assert (REG_P (operands[0]));
9223	      emit_insn (gen_rtx_SET (operands[0],
9224				      gen_rtx_UNSPEC_VOLATILE
9225				      (SImode, gen_rtvec (1, tmp),
9226				       VUNSPEC_ARC_LR)));
9227	      return true;
9228	    }
9229	}
9230      /* Second, we check for the uncached.  */
9231      if (arc_is_uncached_mem_p (operands[0]))
9232	{
9233	  if (!REG_P (operands[1]))
9234	    operands[1] = force_reg (mode, operands[1]);
9235	  emit_insn (gen_rtx_UNSPEC_VOLATILE
9236		     (VOIDmode, gen_rtvec (2, operands[0], operands[1]),
9237		      VUNSPEC_ARC_STDI));
9238	  return true;
9239	}
9240      if (arc_is_uncached_mem_p (operands[1]))
9241	{
9242	  rtx tmp = operands[0];
9243
9244	  if (MEM_P (operands[0]))
9245	    tmp = gen_reg_rtx (mode);
9246
9247	  emit_insn (gen_rtx_SET
9248		     (tmp,
9249		      gen_rtx_UNSPEC_VOLATILE
9250		      (mode, gen_rtvec (1, operands[1]),
9251		       VUNSPEC_ARC_LDDI)));
9252	  if (MEM_P (operands[0]))
9253	    {
9254	      operands[1] = tmp;
9255	      return false;
9256	    }
9257	  return true;
9258	}
9259    }
9260
9261  if (GET_CODE (operands[1]) == SYMBOL_REF)
9262    {
9263      enum tls_model model = SYMBOL_REF_TLS_MODEL (operands[1]);
9264      if (MEM_P (operands[0]))
9265	operands[1] = force_reg (mode, operands[1]);
9266      else if (model)
9267	operands[1] = arc_legitimize_tls_address (operands[1], model);
9268    }
9269
9270  operands[1] = arc_legitimize_pic_address (operands[1]);
9271
9272  /* Store instructions are limited, they only accept as address an
9273     immediate, a register or a register plus a small immediate.  */
9274  if (MEM_P (operands[0])
9275      && !move_dest_operand (operands[0], mode))
9276    {
9277      rtx tmp0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
9278      rtx tmp1 = change_address (operands[0], mode, tmp0);
9279      MEM_COPY_ATTRIBUTES (tmp1, operands[0]);
9280      operands[0] = tmp1;
9281    }
9282
9283  /* Check if it is constant but it is not legitimized.  */
9284  if (CONSTANT_P (operands[1])
9285      && !arc_legitimate_constant_p (mode, operands[1]))
9286    operands[1] = force_reg (mode, XEXP (operands[1], 0));
9287  else if (MEM_P (operands[0])
9288	   && ((CONSTANT_P (operands[1])
9289		&& !satisfies_constraint_Cm3 (operands[1]))
9290	       || MEM_P (operands[1])))
9291    operands[1] = force_reg (mode, operands[1]);
9292
9293  return false;
9294}
9295
9296/* Output a library call to a function called FNAME that has been arranged
9297   to be local to any dso.  */
9298
9299const char *
9300arc_output_libcall (const char *fname)
9301{
9302  unsigned len = strlen (fname);
9303  static char buf[64];
9304
9305  gcc_assert (len < sizeof buf - 35);
9306  if (TARGET_LONG_CALLS_SET
9307     || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ()))
9308    {
9309      if (flag_pic)
9310	sprintf (buf, "add r12,pcl,@%s@pcl\n\tjl%%!%%* [r12]", fname);
9311      else
9312	sprintf (buf, "jl%%! @%s", fname);
9313    }
9314  else
9315    sprintf (buf, "bl%%!%%* @%s", fname);
9316  return buf;
9317}
9318
9319/* Return the SImode highpart of the DImode value IN.  */
9320
9321rtx
9322disi_highpart (rtx in)
9323{
9324  return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
9325}
9326
9327/* Return length adjustment for INSN.
9328   For ARC600:
9329   A write to a core reg greater or equal to 32 must not be immediately
9330   followed by a use.  Anticipate the length requirement to insert a nop
9331   between PRED and SUCC to prevent a hazard.  */
9332
9333static int
9334arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
9335{
9336  if (!TARGET_ARC600)
9337    return 0;
9338  if (GET_CODE (PATTERN (pred)) == SEQUENCE)
9339    pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
9340  if (GET_CODE (PATTERN (succ)) == SEQUENCE)
9341    succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
9342  if (recog_memoized (pred) == CODE_FOR_mulsi_600
9343      || recog_memoized (pred) == CODE_FOR_umul_600
9344      || recog_memoized (pred) == CODE_FOR_mac_600
9345      || recog_memoized (pred) == CODE_FOR_mul64_600
9346      || recog_memoized (pred) == CODE_FOR_mac64_600
9347      || recog_memoized (pred) == CODE_FOR_umul64_600
9348      || recog_memoized (pred) == CODE_FOR_umac64_600)
9349    return 0;
9350  subrtx_iterator::array_type array;
9351  FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
9352    {
9353      const_rtx x = *iter;
9354      switch (GET_CODE (x))
9355	{
9356	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
9357	  break;
9358	default:
9359	  /* This is also fine for PRE/POST_MODIFY, because they
9360	     contain a SET.  */
9361	  continue;
9362	}
9363      rtx dest = XEXP (x, 0);
9364      /* Check if this sets an extension register.  N.B. we use 61 for the
9365	 condition codes, which is definitely not an extension register.  */
9366      if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
9367	  /* Check if the same register is used by the PAT.  */
9368	  && (refers_to_regno_p
9369	      (REGNO (dest),
9370	       REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
9371	       PATTERN (succ), 0)))
9372	return 4;
9373    }
9374  return 0;
9375}
9376
9377/* Given a rtx, check if it is an assembly instruction or not.  */
9378
9379static int
9380arc_asm_insn_p (rtx x)
9381{
9382  int i, j;
9383
9384  if (x == 0)
9385    return 0;
9386
9387  switch (GET_CODE (x))
9388    {
9389    case ASM_OPERANDS:
9390    case ASM_INPUT:
9391      return 1;
9392
9393    case SET:
9394      return arc_asm_insn_p (SET_SRC (x));
9395
9396    case PARALLEL:
9397      j = 0;
9398      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
9399	j += arc_asm_insn_p (XVECEXP (x, 0, i));
9400      if ( j > 0)
9401	return 1;
9402      break;
9403
9404    default:
9405      break;
9406    }
9407
9408  return 0;
9409}
9410
9411/* For ARC600:
9412   A write to a core reg greater or equal to 32 must not be immediately
9413   followed by a use.  Anticipate the length requirement to insert a nop
9414   between PRED and SUCC to prevent a hazard.  */
9415
9416int
9417arc_hazard (rtx_insn *pred, rtx_insn *succ)
9418{
9419  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
9420    return 0;
9421
9422  if (TARGET_ARC600)
9423    return arc600_corereg_hazard (pred, succ);
9424
9425  return 0;
9426}
9427
9428/* Return length adjustment for INSN.  */
9429
9430int
9431arc_adjust_insn_length (rtx_insn *insn, int len, bool)
9432{
9433  if (!INSN_P (insn))
9434    return len;
9435  /* We already handle sequences by ignoring the delay sequence flag.  */
9436  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9437    return len;
9438
9439  /* Check for return with but one preceding insn since function
9440     start / call.  */
9441  if (TARGET_PAD_RETURN
9442      && JUMP_P (insn)
9443      && GET_CODE (PATTERN (insn)) != ADDR_VEC
9444      && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
9445      && get_attr_type (insn) == TYPE_RETURN)
9446    {
9447      rtx_insn *prev = prev_active_insn (insn);
9448
9449      if (!prev || !(prev = prev_active_insn (prev))
9450	  || ((NONJUMP_INSN_P (prev)
9451	       && GET_CODE (PATTERN (prev)) == SEQUENCE)
9452	      ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
9453			   NON_SIBCALL)
9454	      : CALL_ATTR (prev, NON_SIBCALL)))
9455	return len + 4;
9456    }
9457  if (TARGET_ARC600)
9458    {
9459      rtx_insn *succ = next_real_insn (insn);
9460
9461      /* One the ARC600, a write to an extension register must be separated
9462	 from a read.  */
9463      if (succ && INSN_P (succ))
9464	len += arc600_corereg_hazard (insn, succ);
9465    }
9466
9467  /* Restore extracted operands - otherwise splitters like the addsi3_mixed one
9468     can go awry.  */
9469  extract_constrain_insn_cached (insn);
9470
9471  return len;
9472}
9473
9474/* Return a copy of COND from *STATEP, inverted if that is indicated by the
9475   CC field of *STATEP.  */
9476
9477static rtx
9478arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse)
9479{
9480  rtx cond = statep->cond;
9481  int raw_cc = get_arc_condition_code (cond);
9482  if (reverse)
9483    raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc);
9484
9485  if (statep->cc == raw_cc)
9486    return copy_rtx (cond);
9487
9488  gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc);
9489
9490  machine_mode ccm = GET_MODE (XEXP (cond, 0));
9491  enum rtx_code code = reverse_condition (GET_CODE (cond));
9492  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
9493    code = reverse_condition_maybe_unordered (GET_CODE (cond));
9494
9495  return gen_rtx_fmt_ee (code, GET_MODE (cond),
9496			 copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1)));
9497}
9498
9499/* Return version of PAT conditionalized with COND, which is part of INSN.
9500   ANNULLED indicates if INSN is an annulled delay-slot insn.
9501   Register further changes if necessary.  */
9502static rtx
9503conditionalize_nonjump (rtx pat, rtx cond, rtx insn, bool annulled)
9504{
9505  /* For commutative operators, we generally prefer to have
9506     the first source match the destination.  */
9507  if (GET_CODE (pat) == SET)
9508    {
9509      rtx src = SET_SRC (pat);
9510
9511      if (COMMUTATIVE_P (src))
9512	{
9513	  rtx src0 = XEXP (src, 0);
9514	  rtx src1 = XEXP (src, 1);
9515	  rtx dst = SET_DEST (pat);
9516
9517	  if (rtx_equal_p (src1, dst) && !rtx_equal_p (src0, dst)
9518	      /* Leave add_n alone - the canonical form is to
9519		 have the complex summand first.  */
9520	      && REG_P (src0))
9521	    pat = gen_rtx_SET (dst,
9522			       gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src),
9523					       src1, src0));
9524	}
9525    }
9526
9527  /* dwarf2out.cc:dwarf2out_frame_debug_expr doesn't know
9528     what to do with COND_EXEC.  */
9529  if (RTX_FRAME_RELATED_P (insn))
9530    {
9531      /* If this is the delay slot insn of an anulled branch,
9532	 dwarf2out.cc:scan_trace understands the anulling semantics
9533	 without the COND_EXEC.  */
9534      gcc_assert (annulled);
9535      rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat,
9536				 REG_NOTES (insn));
9537      validate_change (insn, &REG_NOTES (insn), note, 1);
9538    }
9539  pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
9540  return pat;
9541}
9542
9543/* Use the ccfsm machinery to do if conversion.  */
9544
9545static unsigned
9546arc_ifcvt (void)
9547{
9548  struct arc_ccfsm *statep = &cfun->machine->ccfsm_current;
9549
9550  memset (statep, 0, sizeof *statep);
9551  for (rtx_insn *insn = get_insns (); insn; insn = next_insn (insn))
9552    {
9553      arc_ccfsm_advance (insn, statep);
9554
9555      switch (statep->state)
9556	{
9557	case 0:
9558	  break;
9559	case 1: case 2:
9560	  {
9561	    /* Deleted branch.  */
9562	    arc_ccfsm_post_advance (insn, statep);
9563	    gcc_assert (!IN_RANGE (statep->state, 1, 2));
9564	    rtx_insn *seq = NEXT_INSN (PREV_INSN (insn));
9565	    if (GET_CODE (PATTERN (seq)) == SEQUENCE)
9566	      {
9567		rtx slot = XVECEXP (PATTERN (seq), 0, 1);
9568		rtx pat = PATTERN (slot);
9569		if (INSN_ANNULLED_BRANCH_P (insn))
9570		  {
9571		    rtx cond
9572		      = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot));
9573		    pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
9574		  }
9575		if (!validate_change (seq, &PATTERN (seq), pat, 0))
9576		  gcc_unreachable ();
9577		PUT_CODE (slot, NOTE);
9578		NOTE_KIND (slot) = NOTE_INSN_DELETED;
9579	      }
9580	    else
9581	      {
9582		set_insn_deleted (insn);
9583	      }
9584	    continue;
9585	  }
9586	case 3:
9587	  if (LABEL_P (insn)
9588	      && statep->target_label == CODE_LABEL_NUMBER (insn))
9589	    {
9590	      arc_ccfsm_post_advance (insn, statep);
9591	      if (--LABEL_NUSES (insn) == 0)
9592		delete_insn (insn);
9593	      continue;
9594	    }
9595	  /* Fall through.  */
9596	case 4: case 5:
9597	  if (!NONDEBUG_INSN_P (insn))
9598	    break;
9599
9600	  /* Conditionalized insn.  */
9601
9602	  rtx_insn *prev, *pprev;
9603	  rtx *patp, pat, cond;
9604	  bool annulled; annulled = false;
9605
9606	  /* If this is a delay slot insn in a non-annulled branch,
9607	     don't conditionalize it.  N.B., this should be fine for
9608	     conditional return too.  However, don't do this for
9609	     unconditional branches, as these would be encountered when
9610	     processing an 'else' part.  */
9611	  prev = PREV_INSN (insn);
9612	  pprev = PREV_INSN (prev);
9613	  if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn)
9614	      && JUMP_P (prev) && get_attr_cond (prev) == COND_USE)
9615	    {
9616	      if (!INSN_ANNULLED_BRANCH_P (prev))
9617		break;
9618	      annulled = true;
9619	    }
9620
9621	  patp = &PATTERN (insn);
9622	  pat = *patp;
9623	  cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn));
9624	  if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9625	    {
9626	      /* ??? don't conditionalize if all side effects are dead
9627		 in the not-execute case.  */
9628
9629	      pat = conditionalize_nonjump (pat, cond, insn, annulled);
9630	    }
9631	  else if (simplejump_p (insn))
9632	    {
9633	      patp = &SET_SRC (pat);
9634	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx);
9635	    }
9636	  else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
9637	    {
9638	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx);
9639	      pat = gen_rtx_SET (pc_rtx, pat);
9640	    }
9641	  else
9642	    gcc_unreachable ();
9643	  validate_change (insn, patp, pat, 1);
9644	  if (!apply_change_group ())
9645	    gcc_unreachable ();
9646	  if (JUMP_P (insn))
9647	    {
9648	      rtx_insn *next = next_nonnote_insn (insn);
9649	      if (GET_CODE (next) == BARRIER)
9650		delete_insn (next);
9651	      if (statep->state == 3)
9652		continue;
9653	    }
9654	  break;
9655	default:
9656	  gcc_unreachable ();
9657	}
9658      arc_ccfsm_post_advance (insn, statep);
9659    }
9660  return 0;
9661}
9662
9663/* Find annulled delay insns and convert them to use the appropriate predicate.
9664   This allows branch shortening to size up these insns properly.  */
9665
9666static unsigned
9667arc_predicate_delay_insns (void)
9668{
9669  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9670    {
9671      rtx pat, jump, dlay, src, cond, *patp;
9672      int reverse;
9673
9674      if (!NONJUMP_INSN_P (insn)
9675	  || GET_CODE (pat = PATTERN (insn)) != SEQUENCE)
9676	continue;
9677      jump = XVECEXP (pat, 0, 0);
9678      dlay = XVECEXP (pat, 0, 1);
9679      if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump))
9680	continue;
9681      /* If the branch insn does the annulling, leave the delay insn alone.  */
9682      if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay))
9683	continue;
9684      /* ??? Could also leave DLAY un-conditionalized if its target is dead
9685	 on the other path.  */
9686      gcc_assert (GET_CODE (PATTERN (jump)) == SET);
9687      gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx);
9688      src = SET_SRC (PATTERN (jump));
9689      gcc_assert (GET_CODE (src) == IF_THEN_ELSE);
9690      cond = XEXP (src, 0);
9691      if (XEXP (src, 2) == pc_rtx)
9692	reverse = 0;
9693      else if (XEXP (src, 1) == pc_rtx)
9694	reverse = 1;
9695      else
9696	gcc_unreachable ();
9697      if (reverse != !INSN_FROM_TARGET_P (dlay))
9698	{
9699	  machine_mode ccm = GET_MODE (XEXP (cond, 0));
9700	  enum rtx_code code = reverse_condition (GET_CODE (cond));
9701	  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
9702	    code = reverse_condition_maybe_unordered (GET_CODE (cond));
9703
9704	  cond = gen_rtx_fmt_ee (code, GET_MODE (cond),
9705				 copy_rtx (XEXP (cond, 0)),
9706				 copy_rtx (XEXP (cond, 1)));
9707	}
9708      else
9709	cond = copy_rtx (cond);
9710      patp = &PATTERN (dlay);
9711      pat = *patp;
9712      pat = conditionalize_nonjump (pat, cond, dlay, true);
9713      validate_change (dlay, patp, pat, 1);
9714      if (!apply_change_group ())
9715	gcc_unreachable ();
9716    }
9717  return 0;
9718}
9719
9720/* For ARC600: If a write to a core reg >=32 appears in a delay slot
9721  (other than of a forward brcc), it creates a hazard when there is a read
9722  of the same register at the branch target.  We can't know what is at the
9723  branch target of calls, and for branches, we don't really know before the
9724  end of delay slot scheduling, either.  Not only can individual instruction
9725  be hoisted out into a delay slot, a basic block can also be emptied this
9726  way, and branch and/or fall through targets be redirected.  Hence we don't
9727  want such writes in a delay slot.  */
9728
9729/* Return nonzreo iff INSN writes to an extension core register.  */
9730
9731int
9732arc_write_ext_corereg (rtx insn)
9733{
9734  subrtx_iterator::array_type array;
9735  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
9736    {
9737      const_rtx x = *iter;
9738      switch (GET_CODE (x))
9739	{
9740	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
9741	  break;
9742	default:
9743	  /* This is also fine for PRE/POST_MODIFY, because they
9744	     contain a SET.  */
9745	  continue;
9746	}
9747      const_rtx dest = XEXP (x, 0);
9748      if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61)
9749	return 1;
9750    }
9751  return 0;
9752}
9753
9754/* This is like the hook, but returns NULL when it can't / won't generate
9755   a legitimate address.  */
9756
9757static rtx
9758arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9759			  machine_mode mode)
9760{
9761  rtx addr, inner;
9762
9763  addr = x;
9764  if (GET_CODE (addr) == CONST)
9765    addr = XEXP (addr, 0);
9766
9767  if (GET_CODE (addr) == PLUS
9768      && CONST_INT_P (XEXP (addr, 1))
9769      && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
9770	   && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0)))
9771	  || (REG_P (XEXP (addr, 0))
9772	      && (INTVAL (XEXP (addr, 1)) & 252))))
9773    {
9774      HOST_WIDE_INT offs, upper;
9775      int size = GET_MODE_SIZE (mode);
9776
9777      offs = INTVAL (XEXP (addr, 1));
9778      upper = (offs + 256 * size) & ~511 * size;
9779      inner = plus_constant (Pmode, XEXP (addr, 0), upper);
9780#if 0 /* ??? this produces worse code for EEMBC idctrn01  */
9781      if (GET_CODE (x) == CONST)
9782	inner = gen_rtx_CONST (Pmode, inner);
9783#endif
9784      addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper);
9785      x = addr;
9786    }
9787  else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr))
9788    x = force_reg (Pmode, x);
9789  if (memory_address_p ((machine_mode) mode, x))
9790     return x;
9791  return NULL_RTX;
9792}
9793
9794static rtx
9795arc_legitimize_address (rtx orig_x, rtx oldx, machine_mode mode)
9796{
9797  rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode);
9798
9799  if (new_x)
9800    return new_x;
9801  return orig_x;
9802}
9803
9804static rtx
9805arc_delegitimize_address_0 (rtx op)
9806{
9807  switch (GET_CODE (op))
9808    {
9809    case CONST:
9810      return arc_delegitimize_address_0 (XEXP (op, 0));
9811
9812    case UNSPEC:
9813      switch (XINT (op, 1))
9814	{
9815	case ARC_UNSPEC_GOT:
9816	case ARC_UNSPEC_GOTOFFPC:
9817	  return XVECEXP (op, 0, 0);
9818	default:
9819	  break;
9820	}
9821      break;
9822
9823    case PLUS:
9824      {
9825	rtx t1 = arc_delegitimize_address_0 (XEXP (op, 0));
9826	rtx t2 = XEXP (op, 1);
9827
9828	if (t1 && t2)
9829	  return gen_rtx_PLUS (GET_MODE (op), t1, t2);
9830	break;
9831      }
9832
9833    default:
9834      break;
9835    }
9836  return NULL_RTX;
9837}
9838
9839static rtx
9840arc_delegitimize_address (rtx orig_x)
9841{
9842  rtx x = orig_x;
9843
9844  if (MEM_P (x))
9845    x = XEXP (x, 0);
9846
9847  x = arc_delegitimize_address_0 (x);
9848  if (!x)
9849    return orig_x;
9850
9851  if (MEM_P (orig_x))
9852    x = replace_equiv_address_nv (orig_x, x);
9853  return x;
9854}
9855
9856/* Return a REG rtx for acc1.  N.B. the gcc-internal representation may
9857   differ from the hardware register number in order to allow the generic
9858   code to correctly split the concatenation of acc1 and acc2.  */
9859
9860rtx
9861gen_acc1 (void)
9862{
9863  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57);
9864}
9865
9866/* Return a REG rtx for acc2.  N.B. the gcc-internal representation may
9867   differ from the hardware register number in order to allow the generic
9868   code to correctly split the concatenation of acc1 and acc2.  */
9869
9870rtx
9871gen_acc2 (void)
9872{
9873  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56);
9874}
9875
9876/* When estimating sizes during arc_reorg, when optimizing for speed, there
9877   are three reasons why we need to consider branches to be length 6:
9878   - annull-false delay slot insns are implemented using conditional execution,
9879     thus preventing short insn formation where used.
9880   - for ARC600: annul-true delay slot insns are implemented where possible
9881     using conditional execution, preventing short insn formation where used.
9882   - for ARC700: likely or somewhat likely taken branches are made long and
9883     unaligned if possible to avoid branch penalty.  */
9884
9885bool
9886arc_branch_size_unknown_p (void)
9887{
9888  return !optimize_size && arc_reorg_in_progress;
9889}
9890
9891/* The usual; we set up our machine_function data.  */
9892
9893static struct machine_function *
9894arc_init_machine_status (void)
9895{
9896  struct machine_function *machine;
9897  machine = ggc_cleared_alloc<machine_function> ();
9898  machine->fn_type = ARC_FUNCTION_UNKNOWN;
9899
9900  return machine;
9901}
9902
9903/* Implements INIT_EXPANDERS.  We just set up to call the above
9904   function.  */
9905
9906void
9907arc_init_expanders (void)
9908{
9909  init_machine_status = arc_init_machine_status;
9910}
9911
9912/* Check if OP is a proper parallel of a millicode call pattern.  OFFSET
9913   indicates a number of elements to ignore - that allows to have a
9914   sibcall pattern that starts with (return).  LOAD_P is zero for store
9915   multiple (for prologues), and one for load multiples (for epilogues),
9916   and two for load multiples where no final clobber of blink is required.
9917   We also skip the first load / store element since this is supposed to
9918   be checked in the instruction pattern.  */
9919
9920int
9921arc_check_millicode (rtx op, int offset, int load_p)
9922{
9923  int len = XVECLEN (op, 0) - offset;
9924  int i;
9925
9926  if (load_p == 2)
9927    {
9928      if (len < 2 || len > 13)
9929	return 0;
9930      load_p = 1;
9931    }
9932  else
9933    {
9934      rtx elt = XVECEXP (op, 0, --len);
9935
9936      if (GET_CODE (elt) != CLOBBER
9937	  || !REG_P (XEXP (elt, 0))
9938	  || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM
9939	  || len < 3 || len > 13)
9940	return 0;
9941    }
9942  for (i = 1; i < len; i++)
9943    {
9944      rtx elt = XVECEXP (op, 0, i + offset);
9945      rtx reg, mem, addr;
9946
9947      if (GET_CODE (elt) != SET)
9948	return 0;
9949      mem = XEXP (elt, load_p);
9950      reg = XEXP (elt, 1-load_p);
9951      if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem))
9952	return 0;
9953      addr = XEXP (mem, 0);
9954      if (GET_CODE (addr) != PLUS
9955	  || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
9956	  || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4)
9957	return 0;
9958    }
9959  return 1;
9960}
9961
9962/* Accessor functions for cfun->machine->unalign.  */
9963
9964void
9965arc_clear_unalign (void)
9966{
9967  if (cfun)
9968    cfun->machine->unalign = 0;
9969}
9970
9971void
9972arc_toggle_unalign (void)
9973{
9974  cfun->machine->unalign ^= 2;
9975}
9976
9977/* Operands 0..2 are the operands of a addsi which uses a 12 bit
9978   constant in operand 2, but which would require a LIMM because of
9979   operand mismatch.
9980   operands 3 and 4 are new SET_SRCs for operands 0.  */
9981
9982void
9983split_addsi (rtx *operands)
9984{
9985  int val = INTVAL (operands[2]);
9986
9987  /* Try for two short insns first.  Lengths being equal, we prefer
9988     expansions with shorter register lifetimes.  */
9989  if (val > 127 && val <= 255
9990      && satisfies_constraint_Rcq (operands[0]))
9991    {
9992      operands[3] = operands[2];
9993      operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
9994    }
9995  else
9996    {
9997      operands[3] = operands[1];
9998      operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]);
9999    }
10000}
10001
10002/* Operands 0..2 are the operands of a subsi which uses a 12 bit
10003   constant in operand 1, but which would require a LIMM because of
10004   operand mismatch.
10005   operands 3 and 4 are new SET_SRCs for operands 0.  */
10006
10007void
10008split_subsi (rtx *operands)
10009{
10010  int val = INTVAL (operands[1]);
10011
10012  /* Try for two short insns first.  Lengths being equal, we prefer
10013     expansions with shorter register lifetimes.  */
10014  if (satisfies_constraint_Rcq (operands[0])
10015      && satisfies_constraint_Rcq (operands[2]))
10016    {
10017      if (val >= -31 && val <= 127)
10018	{
10019	  operands[3] = gen_rtx_NEG (SImode, operands[2]);
10020	  operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
10021	  return;
10022	}
10023      else if (val >= 0 && val < 255)
10024	{
10025	  operands[3] = operands[1];
10026	  operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]);
10027	  return;
10028	}
10029    }
10030  /* If the destination is not an ARCompact16 register, we might
10031     still have a chance to make a short insn if the source is;
10032      we need to start with a reg-reg move for this.  */
10033  operands[3] = operands[2];
10034  operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]);
10035}
10036
10037/* Handle DOUBLE_REGS uses.
10038   Operand 0: destination register
10039   Operand 1: source register  */
10040
10041static bool
10042arc_process_double_reg_moves (rtx *operands)
10043{
10044  enum usesDxState { none, srcDx, destDx, maxDx };
10045  enum usesDxState state = none;
10046  rtx dest = operands[0];
10047  rtx src  = operands[1];
10048
10049  if (refers_to_regno_p (40, 44, src, 0))
10050    {
10051      state = srcDx;
10052      gcc_assert (REG_P (dest));
10053    }
10054  if (refers_to_regno_p (40, 44, dest, 0))
10055    {
10056      /* Via arc_register_move_cost, we should never see D,D moves.  */
10057      gcc_assert (REG_P (src));
10058      gcc_assert (state == none);
10059      state = destDx;
10060    }
10061
10062  if (state == none)
10063    return false;
10064
10065  if (state == srcDx)
10066    {
10067      /* Without the LR insn, we need to split this into a
10068	 sequence of insns which will use the DEXCLx and DADDHxy
10069	 insns to be able to read the Dx register in question.  */
10070      if (TARGET_DPFP_DISABLE_LRSR)
10071	{
10072	  /* gen *movdf_insn_nolrsr */
10073	  rtx set = gen_rtx_SET (dest, src);
10074	  rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx);
10075	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1)));
10076	}
10077      else
10078	{
10079	  /* When we have 'mov D, r' or 'mov D, D' then get the target
10080	     register pair for use with LR insn.  */
10081	  rtx destHigh = simplify_gen_subreg (SImode, dest, DFmode,
10082					     TARGET_BIG_ENDIAN ? 0 : 4);
10083	  rtx destLow  = simplify_gen_subreg (SImode, dest, DFmode,
10084					     TARGET_BIG_ENDIAN ? 4 : 0);
10085
10086	  /* Produce the two LR insns to get the high and low parts.  */
10087	  emit_insn (gen_rtx_SET (destHigh,
10088				  gen_rtx_UNSPEC_VOLATILE (Pmode,
10089							   gen_rtvec (1, src),
10090				  VUNSPEC_ARC_LR_HIGH)));
10091	  emit_insn (gen_rtx_SET (destLow,
10092				  gen_rtx_UNSPEC_VOLATILE (Pmode,
10093							   gen_rtvec (1, src),
10094				  VUNSPEC_ARC_LR)));
10095	}
10096    }
10097  else if (state == destDx)
10098    {
10099      /* When we have 'mov r, D' or 'mov D, D' and we have access to the
10100	 LR insn get the target register pair.  */
10101      rtx srcHigh = simplify_gen_subreg (SImode, src, DFmode,
10102					TARGET_BIG_ENDIAN ? 0 : 4);
10103      rtx srcLow  = simplify_gen_subreg (SImode, src, DFmode,
10104					TARGET_BIG_ENDIAN ? 4 : 0);
10105
10106      emit_insn (gen_dexcl_2op (dest, srcHigh, srcLow));
10107    }
10108  else
10109    gcc_unreachable ();
10110
10111  return true;
10112}
10113
10114
10115/* Check if we need to split a 64bit move.  We do not need to split it if we can
10116   use vadd2 or ldd/std instructions.  */
10117
10118bool
10119arc_split_move_p (rtx *operands)
10120{
10121  machine_mode mode = GET_MODE (operands[0]);
10122
10123  if (TARGET_LL64
10124      && ((memory_operand (operands[0], mode)
10125	   && (even_register_operand (operands[1], mode)
10126	       || satisfies_constraint_Cm3 (operands[1])))
10127	  || (memory_operand (operands[1], mode)
10128	      && even_register_operand (operands[0], mode))))
10129    return false;
10130
10131  if (TARGET_PLUS_QMACW
10132      && even_register_operand (operands[0], mode)
10133      && even_register_operand (operands[1], mode))
10134    return false;
10135
10136  return true;
10137}
10138
10139/* operands 0..1 are the operands of a 64 bit move instruction.
10140   split it into two moves with operands 2/3 and 4/5.  */
10141
10142void
10143arc_split_move (rtx *operands)
10144{
10145  machine_mode mode = GET_MODE (operands[0]);
10146  int i;
10147  int swap = 0;
10148  rtx xop[4];
10149
10150  if (TARGET_DPFP)
10151  {
10152    if (arc_process_double_reg_moves (operands))
10153      return;
10154  }
10155
10156  if (TARGET_PLUS_QMACW
10157      && GET_CODE (operands[1]) == CONST_VECTOR)
10158    {
10159      HOST_WIDE_INT intval0, intval1;
10160      if (GET_MODE (operands[1]) == V2SImode)
10161	{
10162	  intval0 = INTVAL (XVECEXP (operands[1], 0, 0));
10163	  intval1 = INTVAL (XVECEXP (operands[1], 0, 1));
10164	}
10165      else
10166	{
10167	  intval1  = INTVAL (XVECEXP (operands[1], 0, 3)) << 16;
10168	  intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF;
10169	  intval0  = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
10170	  intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
10171	}
10172      xop[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
10173      xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
10174      xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode));
10175      xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode));
10176      emit_move_insn (xop[0], xop[2]);
10177      emit_move_insn (xop[3], xop[1]);
10178      return;
10179    }
10180
10181  for (i = 0; i < 2; i++)
10182    {
10183      if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
10184	{
10185	  rtx addr = XEXP (operands[i], 0);
10186	  rtx r, o;
10187	  enum rtx_code code;
10188
10189	  gcc_assert (!reg_overlap_mentioned_p (operands[0], addr));
10190	  switch (GET_CODE (addr))
10191	    {
10192	    case PRE_DEC: o = GEN_INT (-8); goto pre_modify;
10193	    case PRE_INC: o = GEN_INT (8); goto pre_modify;
10194	    case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1);
10195	    pre_modify:
10196	      code = PRE_MODIFY;
10197	      break;
10198	    case POST_DEC: o = GEN_INT (-8); goto post_modify;
10199	    case POST_INC: o = GEN_INT (8); goto post_modify;
10200	    case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1);
10201	    post_modify:
10202	      code = POST_MODIFY;
10203	      swap = 2;
10204	      break;
10205	    default:
10206	      gcc_unreachable ();
10207	    }
10208	  r = XEXP (addr, 0);
10209	  xop[0+i] = adjust_automodify_address_nv
10210		      (operands[i], SImode,
10211		       gen_rtx_fmt_ee (code, Pmode, r,
10212				       gen_rtx_PLUS (Pmode, r, o)),
10213		       0);
10214	  xop[2+i] = adjust_automodify_address_nv
10215		      (operands[i], SImode, plus_constant (Pmode, r, 4), 4);
10216	}
10217      else
10218	{
10219	  xop[0+i] = operand_subword (operands[i], 0, 0, mode);
10220	  xop[2+i] = operand_subword (operands[i], 1, 0, mode);
10221	}
10222    }
10223  if (reg_overlap_mentioned_p (xop[0], xop[3]))
10224    {
10225      swap = 2;
10226      gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
10227    }
10228
10229  emit_move_insn (xop[0 + swap], xop[1 + swap]);
10230  emit_move_insn (xop[2 - swap], xop[3 - swap]);
10231
10232}
10233
10234/* Select between the instruction output templates s_tmpl (for short INSNs)
10235   and l_tmpl (for long INSNs).  */
10236
10237const char *
10238arc_short_long (rtx_insn *insn, const char *s_tmpl, const char *l_tmpl)
10239{
10240  int is_short = arc_verify_short (insn, cfun->machine->unalign, -1);
10241
10242  extract_constrain_insn_cached (insn);
10243  return is_short ? s_tmpl : l_tmpl;
10244}
10245
10246/* Searches X for any reference to REGNO, returning the rtx of the
10247   reference found if any.  Otherwise, returns NULL_RTX.  */
10248
10249rtx
10250arc_regno_use_in (unsigned int regno, rtx x)
10251{
10252  const char *fmt;
10253  int i, j;
10254  rtx tem;
10255
10256  if (REG_P (x) && refers_to_regno_p (regno, x))
10257    return x;
10258
10259  fmt = GET_RTX_FORMAT (GET_CODE (x));
10260  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10261    {
10262      if (fmt[i] == 'e')
10263	{
10264	  if ((tem = regno_use_in (regno, XEXP (x, i))))
10265	    return tem;
10266	}
10267      else if (fmt[i] == 'E')
10268	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10269	  if ((tem = regno_use_in (regno , XVECEXP (x, i, j))))
10270	    return tem;
10271    }
10272
10273  return NULL_RTX;
10274}
10275
10276/* Code has a minimum p2 alignment of 1, which we must restore after
10277   an ADDR_DIFF_VEC.  */
10278
10279int
10280arc_label_align (rtx_insn *label)
10281{
10282  if (align_labels.levels[0].log < 1)
10283    {
10284      rtx_insn *next = next_nonnote_nondebug_insn (label);
10285      if (INSN_P (next) && recog_memoized (next) >= 0)
10286	return 1;
10287    }
10288  return align_labels.levels[0].log;
10289}
10290
10291/* Return true if LABEL is in executable code.  */
10292
10293bool
10294arc_text_label (rtx_insn *label)
10295{
10296  rtx_insn *next;
10297
10298  /* ??? We use deleted labels like they were still there, see
10299     gcc.c-torture/compile/20000326-2.c .  */
10300  gcc_assert (GET_CODE (label) == CODE_LABEL
10301	      || (GET_CODE (label) == NOTE
10302		  && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL));
10303  next = next_nonnote_insn (label);
10304  if (next)
10305    return (!JUMP_TABLE_DATA_P (next)
10306	    || GET_CODE (PATTERN (next)) != ADDR_VEC);
10307  else if (!PREV_INSN (label))
10308    /* ??? sometimes text labels get inserted very late, see
10309       gcc.dg/torture/stackalign/comp-goto-1.c */
10310    return true;
10311  return false;
10312}
10313
10314/* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble
10315  when compiling with -O2 -freorder-blocks-and-partition -fprofile-use
10316  -D_PROFILE_USE; delay branch scheduling then follows a crossing jump
10317  to redirect two breqs.  */
10318
10319static bool
10320arc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
10321{
10322  /* ??? get_attr_type is declared to take an rtx.  */
10323  union { const rtx_insn *c; rtx_insn *r; } u;
10324
10325  u.c = follower;
10326  if (CROSSING_JUMP_P (followee))
10327    switch (get_attr_type (u.r))
10328      {
10329      case TYPE_BRANCH:
10330	if (get_attr_length (u.r) != 2)
10331	  break;
10332      /*  Fall through. */
10333      case TYPE_BRCC:
10334      case TYPE_BRCC_NO_DELAY_SLOT:
10335	return false;
10336      default:
10337	return true;
10338      }
10339  return true;
10340}
10341
10342
10343/* Implement EPILOGUE_USES.
10344   Return true if REGNO should be added to the deemed uses of the epilogue.
10345
10346   We have to make sure all the register restore instructions are
10347   known to be live in interrupt functions, plus the blink register if
10348   it is clobbered by the isr.  */
10349
10350bool
10351arc_epilogue_uses (int regno)
10352{
10353  unsigned int fn_type;
10354  fn_type = arc_compute_function_type (cfun);
10355
10356  if (regno == arc_tp_regno)
10357    return true;
10358
10359  if (regno == RETURN_ADDR_REGNUM)
10360    return true;
10361
10362  if (regno == arc_return_address_register (fn_type))
10363    return true;
10364
10365  if (epilogue_completed && ARC_INTERRUPT_P (fn_type))
10366    {
10367      /* An interrupt function restores more registers.  */
10368      if (df_regs_ever_live_p (regno) || call_used_or_fixed_reg_p (regno))
10369	return true;
10370    }
10371
10372  return false;
10373}
10374
10375/* Helper for EH_USES macro.  */
10376
10377bool
10378arc_eh_uses (int regno)
10379{
10380  if (regno == arc_tp_regno)
10381    return true;
10382  return false;
10383}
10384
10385/* Return true if we use LRA instead of reload pass.  */
10386
10387bool
10388arc_lra_p (void)
10389{
10390  return arc_lra_flag;
10391}
10392
10393/* ??? Should we define TARGET_REGISTER_PRIORITY?  We might perfer to use
10394   Rcq registers, because some insn are shorter with them.  OTOH we already
10395   have separate alternatives for this purpose, and other insns don't
10396   mind, so maybe we should rather prefer the other registers?
10397   We need more data, and we can only get that if we allow people to
10398   try all options.  */
10399static int
10400arc_register_priority (int r)
10401{
10402  switch (arc_lra_priority_tag)
10403    {
10404    case ARC_LRA_PRIORITY_NONE:
10405      return 0;
10406    case ARC_LRA_PRIORITY_NONCOMPACT:
10407      return ((((r & 7) ^ 4) - 4) & 15) != r;
10408    case ARC_LRA_PRIORITY_COMPACT:
10409      return ((((r & 7) ^ 4) - 4) & 15) == r;
10410    default:
10411      gcc_unreachable ();
10412    }
10413}
10414
10415static reg_class_t
10416arc_spill_class (reg_class_t /* orig_class */, machine_mode)
10417{
10418  return GENERAL_REGS;
10419}
10420
10421bool
10422arc_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10423			       int itype)
10424{
10425  rtx x = *p;
10426  enum reload_type type = (enum reload_type) itype;
10427
10428  if (GET_CODE (x) == PLUS
10429      && CONST_INT_P (XEXP (x, 1))
10430      && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true)
10431	  || (REG_P (XEXP (x, 0))
10432	      && reg_equiv_constant (REGNO (XEXP (x, 0))))))
10433    {
10434      int scale = GET_MODE_SIZE (mode);
10435      int shift;
10436      rtx index_rtx = XEXP (x, 1);
10437      HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
10438      rtx reg, sum, sum2;
10439
10440      if (scale > 4)
10441	scale = 4;
10442      if ((scale-1) & offset)
10443	scale = 1;
10444      shift = scale >> 1;
10445      offset_base
10446	= ((offset + (256 << shift))
10447	   & ((HOST_WIDE_INT)((unsigned HOST_WIDE_INT) -512 << shift)));
10448      /* Sometimes the normal form does not suit DImode.  We
10449	 could avoid that by using smaller ranges, but that
10450	 would give less optimized code when SImode is
10451	 prevalent.  */
10452      if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift))
10453	{
10454	  int regno;
10455
10456	  reg = XEXP (x, 0);
10457	  regno = REGNO (reg);
10458	  sum2 = sum = plus_constant (Pmode, reg, offset_base);
10459
10460	  if (reg_equiv_constant (regno))
10461	    {
10462	      sum2 = plus_constant (Pmode, reg_equiv_constant (regno),
10463				    offset_base);
10464	      if (GET_CODE (sum2) == PLUS)
10465		sum2 = gen_rtx_CONST (Pmode, sum2);
10466	    }
10467	  *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
10468	  push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL,
10469		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum,
10470		       type);
10471	  return true;
10472	}
10473    }
10474  /* We must re-recognize what we created before.  */
10475  else if (GET_CODE (x) == PLUS
10476	   && GET_CODE (XEXP (x, 0)) == PLUS
10477	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10478	   && REG_P  (XEXP (XEXP (x, 0), 0))
10479	   && CONST_INT_P (XEXP (x, 1)))
10480    {
10481      /* Because this address is so complex, we know it must have
10482	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10483	 it is already unshared, and needs no further unsharing.  */
10484      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10485		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10486      return true;
10487    }
10488  return false;
10489}
10490
10491/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
10492
10493static bool
10494arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
10495				    unsigned int align,
10496				    enum by_pieces_operation op,
10497				    bool speed_p)
10498{
10499  /* Let the cpymem expander handle small block moves.  */
10500  if (op == MOVE_BY_PIECES)
10501    return false;
10502
10503  return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10504}
10505
10506/* Emit a (pre) memory barrier around an atomic sequence according to
10507   MODEL.  */
10508
10509static void
10510arc_pre_atomic_barrier (enum memmodel model)
10511{
10512  if (need_atomic_barrier_p (model, true))
10513    emit_insn (gen_memory_barrier ());
10514}
10515
10516/* Emit a (post) memory barrier around an atomic sequence according to
10517   MODEL.  */
10518
10519static void
10520arc_post_atomic_barrier (enum memmodel model)
10521{
10522  if (need_atomic_barrier_p (model, false))
10523    emit_insn (gen_memory_barrier ());
10524}
10525
10526/* Expand a compare and swap pattern.  */
10527
10528static void
10529emit_unlikely_jump (rtx insn)
10530{
10531  rtx_insn *jump = emit_jump_insn (insn);
10532  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
10533}
10534
10535/* Expand code to perform a 8 or 16-bit compare and swap by doing
10536   32-bit compare and swap on the word containing the byte or
10537   half-word.  The difference between a weak and a strong CAS is that
10538   the weak version may simply fail.  The strong version relies on two
10539   loops, one checks if the SCOND op is succsfully or not, the other
10540   checks if the 32 bit accessed location which contains the 8 or 16
10541   bit datum is not changed by other thread.  The first loop is
10542   implemented by the atomic_compare_and_swapsi_1 pattern.  The second
10543   loops is implemented by this routine.  */
10544
10545static void
10546arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem,
10547				rtx oldval, rtx newval, rtx weak,
10548				rtx mod_s, rtx mod_f)
10549{
10550  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
10551  rtx addr = gen_reg_rtx (Pmode);
10552  rtx off = gen_reg_rtx (SImode);
10553  rtx oldv = gen_reg_rtx (SImode);
10554  rtx newv = gen_reg_rtx (SImode);
10555  rtx oldvalue = gen_reg_rtx (SImode);
10556  rtx newvalue = gen_reg_rtx (SImode);
10557  rtx res = gen_reg_rtx (SImode);
10558  rtx resv = gen_reg_rtx (SImode);
10559  rtx memsi, val, mask, end_label, loop_label, cc, x;
10560  machine_mode mode;
10561  bool is_weak = (weak != const0_rtx);
10562
10563  /* Truncate the address.  */
10564  emit_insn (gen_rtx_SET (addr,
10565			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
10566
10567  /* Compute the datum offset.  */
10568  emit_insn (gen_rtx_SET (off,
10569			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
10570  if (TARGET_BIG_ENDIAN)
10571    emit_insn (gen_rtx_SET (off,
10572			    gen_rtx_MINUS (SImode,
10573					   (GET_MODE (mem) == QImode) ?
10574					   GEN_INT (3) : GEN_INT (2), off)));
10575
10576  /* Normal read from truncated address.  */
10577  memsi = gen_rtx_MEM (SImode, addr);
10578  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
10579  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
10580
10581  val = copy_to_reg (memsi);
10582
10583  /* Convert the offset in bits.  */
10584  emit_insn (gen_rtx_SET (off,
10585			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
10586
10587  /* Get the proper mask.  */
10588  if (GET_MODE (mem) == QImode)
10589    mask = force_reg (SImode, GEN_INT (0xff));
10590  else
10591    mask = force_reg (SImode, GEN_INT (0xffff));
10592
10593  emit_insn (gen_rtx_SET (mask,
10594			  gen_rtx_ASHIFT (SImode, mask, off)));
10595
10596  /* Prepare the old and new values.  */
10597  emit_insn (gen_rtx_SET (val,
10598			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
10599				       val)));
10600
10601  oldval = gen_lowpart (SImode, oldval);
10602  emit_insn (gen_rtx_SET (oldv,
10603			  gen_rtx_ASHIFT (SImode, oldval, off)));
10604
10605  newval = gen_lowpart_common (SImode, newval);
10606  emit_insn (gen_rtx_SET (newv,
10607			  gen_rtx_ASHIFT (SImode, newval, off)));
10608
10609  emit_insn (gen_rtx_SET (oldv,
10610			  gen_rtx_AND (SImode, oldv, mask)));
10611
10612  emit_insn (gen_rtx_SET (newv,
10613			  gen_rtx_AND (SImode, newv, mask)));
10614
10615  if (!is_weak)
10616    {
10617      end_label = gen_label_rtx ();
10618      loop_label = gen_label_rtx ();
10619      emit_label (loop_label);
10620    }
10621
10622  /* Make the old and new values.  */
10623  emit_insn (gen_rtx_SET (oldvalue,
10624			  gen_rtx_IOR (SImode, oldv, val)));
10625
10626  emit_insn (gen_rtx_SET (newvalue,
10627			  gen_rtx_IOR (SImode, newv, val)));
10628
10629  /* Try an 32bit atomic compare and swap.  It clobbers the CC
10630     register.  */
10631  emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue,
10632					      weak, mod_s, mod_f));
10633
10634  /* Regardless of the weakness of the operation, a proper boolean
10635     result needs to be provided.  */
10636  x = gen_rtx_REG (CC_Zmode, CC_REG);
10637  x = gen_rtx_EQ (SImode, x, const0_rtx);
10638  emit_insn (gen_rtx_SET (bool_result, x));
10639
10640  if (!is_weak)
10641    {
10642      /* Check the results: if the atomic op is successfully the goto
10643	 to end label.  */
10644      x = gen_rtx_REG (CC_Zmode, CC_REG);
10645      x = gen_rtx_EQ (VOIDmode, x, const0_rtx);
10646      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10647				gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx);
10648      emit_jump_insn (gen_rtx_SET (pc_rtx, x));
10649
10650      /* Wait for the right moment when the accessed 32-bit location
10651	 is stable.  */
10652      emit_insn (gen_rtx_SET (resv,
10653			      gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
10654					   res)));
10655      mode = SELECT_CC_MODE (NE, resv, val);
10656      cc = gen_rtx_REG (mode, CC_REG);
10657      emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val)));
10658
10659      /* Set the new value of the 32 bit location, proper masked.  */
10660      emit_insn (gen_rtx_SET (val, resv));
10661
10662      /* Try again if location is unstable.  Fall through if only
10663	 scond op failed.  */
10664      x = gen_rtx_NE (VOIDmode, cc, const0_rtx);
10665      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10666				gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx);
10667      emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10668
10669      emit_label (end_label);
10670    }
10671
10672  /* End: proper return the result for the given mode.  */
10673  emit_insn (gen_rtx_SET (res,
10674			  gen_rtx_AND (SImode, res, mask)));
10675
10676  emit_insn (gen_rtx_SET (res,
10677			  gen_rtx_LSHIFTRT (SImode, res, off)));
10678
10679  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
10680}
10681
10682/* Helper function used by "atomic_compare_and_swap" expand
10683   pattern.  */
10684
10685void
10686arc_expand_compare_and_swap (rtx operands[])
10687{
10688  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
10689  machine_mode mode;
10690
10691  bval = operands[0];
10692  rval = operands[1];
10693  mem = operands[2];
10694  oldval = operands[3];
10695  newval = operands[4];
10696  is_weak = operands[5];
10697  mod_s = operands[6];
10698  mod_f = operands[7];
10699  mode = GET_MODE (mem);
10700
10701  if (reg_overlap_mentioned_p (rval, oldval))
10702    oldval = copy_to_reg (oldval);
10703
10704  if (mode == SImode)
10705    {
10706      emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval,
10707						  is_weak, mod_s, mod_f));
10708      x = gen_rtx_REG (CC_Zmode, CC_REG);
10709      x = gen_rtx_EQ (SImode, x, const0_rtx);
10710      emit_insn (gen_rtx_SET (bval, x));
10711    }
10712  else
10713    {
10714      arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval,
10715				      is_weak, mod_s, mod_f);
10716    }
10717}
10718
10719/* Helper function used by the "atomic_compare_and_swapsi_1"
10720   pattern.  */
10721
10722void
10723arc_split_compare_and_swap (rtx operands[])
10724{
10725  rtx rval, mem, oldval, newval;
10726  machine_mode mode;
10727  enum memmodel mod_s, mod_f;
10728  bool is_weak;
10729  rtx label1, label2, x, cond;
10730
10731  rval = operands[0];
10732  mem = operands[1];
10733  oldval = operands[2];
10734  newval = operands[3];
10735  is_weak = (operands[4] != const0_rtx);
10736  mod_s = (enum memmodel) INTVAL (operands[5]);
10737  mod_f = (enum memmodel) INTVAL (operands[6]);
10738  mode = GET_MODE (mem);
10739
10740  /* ARC atomic ops work only with 32-bit aligned memories.  */
10741  gcc_assert (mode == SImode);
10742
10743  arc_pre_atomic_barrier (mod_s);
10744
10745  label1 = NULL_RTX;
10746  if (!is_weak)
10747    {
10748      label1 = gen_label_rtx ();
10749      emit_label (label1);
10750    }
10751  label2 = gen_label_rtx ();
10752
10753  /* Load exclusive.  */
10754  emit_insn (gen_arc_load_exclusivesi (rval, mem));
10755
10756  /* Check if it is oldval.  */
10757  mode = SELECT_CC_MODE (NE, rval, oldval);
10758  cond = gen_rtx_REG (mode, CC_REG);
10759  emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode, rval, oldval)));
10760
10761  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10762  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10763			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
10764  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10765
10766  /* Exclusively store new item.  Store clobbers CC reg.  */
10767  emit_insn (gen_arc_store_exclusivesi (mem, newval));
10768
10769  if (!is_weak)
10770    {
10771      /* Check the result of the store.  */
10772      cond = gen_rtx_REG (CC_Zmode, CC_REG);
10773      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10774      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10775				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
10776      emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10777    }
10778
10779  if (mod_f != MEMMODEL_RELAXED)
10780    emit_label (label2);
10781
10782  arc_post_atomic_barrier (mod_s);
10783
10784  if (mod_f == MEMMODEL_RELAXED)
10785    emit_label (label2);
10786}
10787
10788/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
10789   to perform.  MEM is the memory on which to operate.  VAL is the second
10790   operand of the binary operator.  BEFORE and AFTER are optional locations to
10791   return the value of MEM either before of after the operation.  MODEL_RTX
10792   is a CONST_INT containing the memory model to use.  */
10793
10794void
10795arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
10796			 rtx orig_before, rtx orig_after, rtx model_rtx)
10797{
10798  enum memmodel model = (enum memmodel) INTVAL (model_rtx);
10799  machine_mode mode = GET_MODE (mem);
10800  rtx label, x, cond;
10801  rtx before = orig_before, after = orig_after;
10802
10803  /* ARC atomic ops work only with 32-bit aligned memories.  */
10804  gcc_assert (mode == SImode);
10805
10806  arc_pre_atomic_barrier (model);
10807
10808  label = gen_label_rtx ();
10809  emit_label (label);
10810  label = gen_rtx_LABEL_REF (VOIDmode, label);
10811
10812  if (before == NULL_RTX)
10813    before = gen_reg_rtx (mode);
10814
10815  if (after == NULL_RTX)
10816    after = gen_reg_rtx (mode);
10817
10818  /* Load exclusive.  */
10819  emit_insn (gen_arc_load_exclusivesi (before, mem));
10820
10821  switch (code)
10822    {
10823    case NOT:
10824      x = gen_rtx_AND (mode, before, val);
10825      emit_insn (gen_rtx_SET (after, x));
10826      x = gen_rtx_NOT (mode, after);
10827      emit_insn (gen_rtx_SET (after, x));
10828      break;
10829
10830    case MINUS:
10831      if (CONST_INT_P (val))
10832	{
10833	  val = GEN_INT (-INTVAL (val));
10834	  code = PLUS;
10835	}
10836
10837      /* FALLTHRU.  */
10838    default:
10839      x = gen_rtx_fmt_ee (code, mode, before, val);
10840      emit_insn (gen_rtx_SET (after, x));
10841      break;
10842   }
10843
10844  /* Exclusively store new item.  Store clobbers CC reg.  */
10845  emit_insn (gen_arc_store_exclusivesi (mem, after));
10846
10847  /* Check the result of the store.  */
10848  cond = gen_rtx_REG (CC_Zmode, CC_REG);
10849  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10850  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10851			    label, pc_rtx);
10852  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10853
10854  arc_post_atomic_barrier (model);
10855}
10856
10857/* Implement TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P.  */
10858
10859static bool
10860arc_no_speculation_in_delay_slots_p ()
10861{
10862  return true;
10863}
10864
10865/* Return a parallel of registers to represent where to find the
10866   register pieces if required, otherwise NULL_RTX.  */
10867
10868static rtx
10869arc_dwarf_register_span (rtx rtl)
10870{
10871   machine_mode mode = GET_MODE (rtl);
10872   unsigned regno;
10873   rtx p;
10874
10875   if (GET_MODE_SIZE (mode) != 8)
10876     return NULL_RTX;
10877
10878   p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
10879   regno = REGNO (rtl);
10880   XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
10881   XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
10882
10883   return p;
10884}
10885
10886/* Return true if OP is an acceptable memory operand for ARCompact
10887   16-bit load instructions of MODE.
10888
10889   AV2SHORT: TRUE if address needs to fit into the new ARCv2 short
10890   non scaled instructions.
10891
10892   SCALED: TRUE if address can be scaled.  */
10893
10894bool
10895compact_memory_operand_p (rtx op, machine_mode mode,
10896			  bool av2short, bool scaled)
10897{
10898  rtx addr, plus0, plus1;
10899  int size, off;
10900
10901  /* Eliminate non-memory operations.  */
10902  if (GET_CODE (op) != MEM)
10903    return 0;
10904
10905  /* .di instructions have no 16-bit form.  */
10906  if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET)
10907    return false;
10908
10909  /* likewise for uncached types.  */
10910  if (arc_is_uncached_mem_p (op))
10911    return false;
10912
10913  if (mode == VOIDmode)
10914    mode = GET_MODE (op);
10915
10916  size = GET_MODE_SIZE (mode);
10917
10918  /* dword operations really put out 2 instructions, so eliminate
10919     them.  */
10920  if (size > UNITS_PER_WORD)
10921    return false;
10922
10923  /* Decode the address now.  */
10924  addr = XEXP (op, 0);
10925  switch (GET_CODE (addr))
10926    {
10927    case REG:
10928      return (REGNO (addr) >= FIRST_PSEUDO_REGISTER
10929	      || COMPACT_GP_REG_P (REGNO (addr))
10930	      || (SP_REG_P (REGNO (addr)) && (size != 2)));
10931    case PLUS:
10932      plus0 = XEXP (addr, 0);
10933      plus1 = XEXP (addr, 1);
10934
10935      if ((GET_CODE (plus0) == REG)
10936	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
10937	      || COMPACT_GP_REG_P (REGNO (plus0)))
10938	  && ((GET_CODE (plus1) == REG)
10939	      && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER)
10940		  || COMPACT_GP_REG_P (REGNO (plus1)))))
10941	{
10942	  return !av2short;
10943	}
10944
10945      if ((GET_CODE (plus0) == REG)
10946	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
10947	      || (COMPACT_GP_REG_P (REGNO (plus0)) && !av2short)
10948	      || (IN_RANGE (REGNO (plus0), 0, 31) && av2short))
10949	  && (GET_CODE (plus1) == CONST_INT))
10950	{
10951	  bool valid = false;
10952
10953	  off = INTVAL (plus1);
10954
10955	  /* Negative offset is not supported in 16-bit load/store insns.  */
10956	  if (off < 0)
10957	    return 0;
10958
10959	  /* Only u5 immediates allowed in code density instructions.  */
10960	  if (av2short)
10961	    {
10962	      switch (size)
10963		{
10964		case 1:
10965		  return false;
10966		case 2:
10967		  /* This is an ldh_s.x instruction, check the u6
10968		     immediate.  */
10969		  if (COMPACT_GP_REG_P (REGNO (plus0)))
10970		    valid = true;
10971		  break;
10972		case 4:
10973		  /* Only u5 immediates allowed in 32bit access code
10974		     density instructions.  */
10975		  if (REGNO (plus0) <= 31)
10976		    return ((off < 32) && (off % 4 == 0));
10977		  break;
10978		default:
10979		  return false;
10980		}
10981	    }
10982	  else
10983	    if (COMPACT_GP_REG_P (REGNO (plus0)))
10984	      valid = true;
10985
10986	  if (valid)
10987	    {
10988
10989	      switch (size)
10990		{
10991		case 1:
10992		  return (off < 32);
10993		case 2:
10994		  /* The 6-bit constant get shifted to fit the real
10995		     5-bits field.  Check also for the alignment.  */
10996		  return ((off < 64) && (off % 2 == 0));
10997		case 4:
10998		  return ((off < 128) && (off % 4 == 0));
10999		default:
11000		  return false;
11001		}
11002	    }
11003	}
11004
11005      if (REG_P (plus0) && CONST_INT_P (plus1)
11006	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
11007	      || SP_REG_P (REGNO (plus0)))
11008	  && !av2short)
11009	{
11010	  off = INTVAL (plus1);
11011	  return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0));
11012	}
11013
11014      if ((GET_CODE (plus0) == MULT)
11015	  && (GET_CODE (XEXP (plus0, 0)) == REG)
11016	  && ((REGNO (XEXP (plus0, 0)) >= FIRST_PSEUDO_REGISTER)
11017	      || COMPACT_GP_REG_P (REGNO (XEXP (plus0, 0))))
11018	  && (GET_CODE (plus1) == REG)
11019	  && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER)
11020	      || COMPACT_GP_REG_P (REGNO (plus1))))
11021	return scaled;
11022    default:
11023      break ;
11024      /* TODO: 'gp' and 'pcl' are to supported as base address operand
11025	 for 16-bit load instructions.  */
11026    }
11027  return false;
11028}
11029
11030/* Return nonzero if a jli call should be generated for a call from
11031   the current function to DECL.  */
11032
11033bool
11034arc_is_jli_call_p (rtx pat)
11035{
11036  tree attrs;
11037  tree decl = SYMBOL_REF_DECL (pat);
11038
11039  /* If it is not a well defined public function then return false.  */
11040  if (!decl || !SYMBOL_REF_FUNCTION_P (pat) || !TREE_PUBLIC (decl))
11041    return false;
11042
11043  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
11044  if (lookup_attribute ("jli_always", attrs))
11045    return true;
11046
11047  if (lookup_attribute ("jli_fixed", attrs))
11048    return true;
11049
11050  return TARGET_JLI_ALWAYS;
11051}
11052
11053/* Handle and "jli" attribute; arguments as in struct
11054   attribute_spec.handler.  */
11055
11056static tree
11057arc_handle_jli_attribute (tree *node ATTRIBUTE_UNUSED,
11058			  tree name, tree args, int,
11059			  bool *no_add_attrs)
11060{
11061  if (!TARGET_V2)
11062    {
11063      warning (OPT_Wattributes,
11064	       "%qE attribute only valid for ARCv2 architecture",
11065	       name);
11066      *no_add_attrs = true;
11067    }
11068
11069  if (args == NULL_TREE)
11070    {
11071      warning (OPT_Wattributes,
11072	       "argument of %qE attribute is missing",
11073	       name);
11074      *no_add_attrs = true;
11075    }
11076  else
11077    {
11078      if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
11079	TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
11080      tree arg = TREE_VALUE (args);
11081      if (TREE_CODE (arg) != INTEGER_CST)
11082	{
11083	  warning (0, "%qE attribute allows only an integer constant argument",
11084		   name);
11085	  *no_add_attrs = true;
11086	}
11087      /* FIXME! add range check.  TREE_INT_CST_LOW (arg) */
11088    }
11089   return NULL_TREE;
11090}
11091
11092/* Handle and "scure" attribute; arguments as in struct
11093   attribute_spec.handler.  */
11094
11095static tree
11096arc_handle_secure_attribute (tree *node ATTRIBUTE_UNUSED,
11097			  tree name, tree args, int,
11098			  bool *no_add_attrs)
11099{
11100  if (!TARGET_EM)
11101    {
11102      warning (OPT_Wattributes,
11103	       "%qE attribute only valid for ARC EM architecture",
11104	       name);
11105      *no_add_attrs = true;
11106    }
11107
11108  if (args == NULL_TREE)
11109    {
11110      warning (OPT_Wattributes,
11111	       "argument of %qE attribute is missing",
11112	       name);
11113      *no_add_attrs = true;
11114    }
11115  else
11116    {
11117      if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
11118	TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
11119      tree arg = TREE_VALUE (args);
11120      if (TREE_CODE (arg) != INTEGER_CST)
11121	{
11122	  warning (0, "%qE attribute allows only an integer constant argument",
11123		   name);
11124	  *no_add_attrs = true;
11125	}
11126    }
11127   return NULL_TREE;
11128}
11129
11130/* Return nonzero if the symbol is a secure function.  */
11131
11132bool
11133arc_is_secure_call_p (rtx pat)
11134{
11135  tree attrs;
11136  tree decl = SYMBOL_REF_DECL (pat);
11137
11138  if (!decl)
11139    return false;
11140
11141  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
11142  if (lookup_attribute ("secure_call", attrs))
11143    return true;
11144
11145  return false;
11146}
11147
11148/* Handle "uncached" qualifier.  */
11149
11150static tree
11151arc_handle_uncached_attribute (tree *node,
11152			       tree name, tree args,
11153			       int flags ATTRIBUTE_UNUSED,
11154			       bool *no_add_attrs)
11155{
11156  if (DECL_P (*node) && TREE_CODE (*node) != TYPE_DECL)
11157    {
11158      error ("%qE attribute only applies to types",
11159	     name);
11160      *no_add_attrs = true;
11161    }
11162  else if (args)
11163    {
11164      warning (OPT_Wattributes, "argument of %qE attribute ignored", name);
11165    }
11166  return NULL_TREE;
11167}
11168
11169/* Return TRUE if PAT is a memory addressing an uncached data.  */
11170
11171bool
11172arc_is_uncached_mem_p (rtx pat)
11173{
11174  tree attrs = NULL_TREE;
11175  tree addr;
11176
11177  if (!MEM_P (pat))
11178    return false;
11179
11180  /* Get the memory attributes.  */
11181  addr = MEM_EXPR (pat);
11182  if (!addr)
11183    return false;
11184
11185  /* Get the attributes.  */
11186  if (TREE_CODE (addr) == MEM_REF
11187      || TREE_CODE (addr) == VAR_DECL)
11188    {
11189      attrs = TYPE_ATTRIBUTES (TREE_TYPE (addr));
11190      if (lookup_attribute ("uncached", attrs))
11191	return true;
11192    }
11193  if (TREE_CODE (addr) == MEM_REF)
11194    {
11195      attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 0)));
11196      if (lookup_attribute ("uncached", attrs))
11197	return true;
11198      attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 1)));
11199      if (lookup_attribute ("uncached", attrs))
11200	return true;
11201    }
11202
11203  /* Check the definitions of the structs.  */
11204  while (handled_component_p (addr))
11205    {
11206      if (TREE_CODE (addr) == COMPONENT_REF)
11207	{
11208	  attrs = TYPE_ATTRIBUTES (TREE_TYPE (addr));
11209	  if (lookup_attribute ("uncached", attrs))
11210	    return true;
11211	  attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 0)));
11212	  if (lookup_attribute ("uncached", attrs))
11213	    return true;
11214	  attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 1)));
11215	  if (lookup_attribute ("uncached", attrs))
11216	    return true;
11217	}
11218      addr = TREE_OPERAND (addr, 0);
11219    }
11220  return false;
11221}
11222
11223/* Handle aux attribute.  The auxiliary registers are addressed using
11224   special instructions lr and sr.  The attribute 'aux' indicates if a
11225   variable refers to the aux-regs and what is the register number
11226   desired.  */
11227
11228static tree
11229arc_handle_aux_attribute (tree *node,
11230			  tree name, tree args, int,
11231			  bool *no_add_attrs)
11232{
11233  /* Isn't it better to use address spaces for the aux-regs?  */
11234  if (DECL_P (*node))
11235    {
11236      if (TREE_CODE (*node) != VAR_DECL)
11237	{
11238	  error ("%qE attribute only applies to variables",  name);
11239	  *no_add_attrs = true;
11240	}
11241      else if (args)
11242	{
11243	  if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
11244	    TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
11245	  tree arg = TREE_VALUE (args);
11246	  if (TREE_CODE (arg) != INTEGER_CST)
11247	    {
11248	      warning (OPT_Wattributes, "%qE attribute allows only an integer "
11249		       "constant argument", name);
11250	      *no_add_attrs = true;
11251	    }
11252	  /* FIXME! add range check.  TREE_INT_CST_LOW (arg) */
11253	}
11254
11255      if (TREE_CODE (*node) == VAR_DECL)
11256	{
11257	  tree fntype = TREE_TYPE (*node);
11258	  if (fntype && TREE_CODE (fntype) == POINTER_TYPE)
11259	    {
11260	      tree attrs = tree_cons (get_identifier ("aux"), NULL_TREE,
11261				      TYPE_ATTRIBUTES (fntype));
11262	      TYPE_ATTRIBUTES (fntype) = attrs;
11263	    }
11264	}
11265    }
11266  return NULL_TREE;
11267}
11268
11269/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P.  We don't want to use
11270   anchors for small data: the GP register acts as an anchor in that
11271   case.  We also don't want to use them for PC-relative accesses,
11272   where the PC acts as an anchor.  Prohibit also TLS symbols to use
11273   anchors.  */
11274
11275static bool
11276arc_use_anchors_for_symbol_p (const_rtx symbol)
11277{
11278  if (SYMBOL_REF_TLS_MODEL (symbol))
11279    return false;
11280
11281  if (flag_pic)
11282    return false;
11283
11284  if (SYMBOL_REF_SMALL_P (symbol))
11285    return false;
11286
11287  return default_use_anchors_for_symbol_p (symbol);
11288}
11289
11290/* Return true if SUBST can't safely replace its equivalent during RA.  */
11291static bool
11292arc_cannot_substitute_mem_equiv_p (rtx)
11293{
11294  /* If SUBST is mem[base+index], the address may not fit ISA,
11295     thus return true.  */
11296  return true;
11297}
11298
11299/* Checks whether the operands are valid for use in an LDD/STD
11300   instruction.  Assumes that RT, and RT2 are REG.  This is guaranteed
11301   by the patterns.  Assumes that the address in the base register RN
11302   is word aligned.  Pattern guarantees that both memory accesses use
11303   the same base register, the offsets are constants within the range,
11304   and the gap between the offsets is 4.  If reload complete then
11305   check that registers are legal.  */
11306
11307static bool
11308operands_ok_ldd_std (rtx rt, rtx rt2, HOST_WIDE_INT offset)
11309{
11310  unsigned int t, t2;
11311
11312  if (!reload_completed)
11313    return true;
11314
11315  if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & (~0x03),
11316			 (offset & (GET_MODE_SIZE (DImode) - 1) & 3
11317			  ? 0 : -(-GET_MODE_SIZE (DImode) | (~0x03)) >> 1))))
11318    return false;
11319
11320  t = REGNO (rt);
11321  t2 = REGNO (rt2);
11322
11323  if ((t2 == PCL_REG)
11324      || (t % 2 != 0)	/* First destination register is not even.  */
11325      || (t2 != t + 1))
11326      return false;
11327
11328  return true;
11329}
11330
11331/* Helper for gen_operands_ldd_std.  Returns true iff the memory
11332   operand MEM's address contains an immediate offset from the base
11333   register and has no side effects, in which case it sets BASE and
11334   OFFSET accordingly.  */
11335
11336static bool
11337mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset)
11338{
11339  rtx addr;
11340
11341  gcc_assert (base != NULL && offset != NULL);
11342
11343  /* TODO: Handle more general memory operand patterns, such as
11344     PRE_DEC and PRE_INC.  */
11345
11346  if (side_effects_p (mem))
11347    return false;
11348
11349  /* Can't deal with subregs.  */
11350  if (GET_CODE (mem) == SUBREG)
11351    return false;
11352
11353  gcc_assert (MEM_P (mem));
11354
11355  *offset = const0_rtx;
11356
11357  addr = XEXP (mem, 0);
11358
11359  /* If addr isn't valid for DImode, then we can't handle it.  */
11360  if (!arc_legitimate_address_p (DImode, addr,
11361				reload_in_progress || reload_completed))
11362    return false;
11363
11364  if (REG_P (addr))
11365    {
11366      *base = addr;
11367      return true;
11368    }
11369  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
11370    {
11371      *base = XEXP (addr, 0);
11372      *offset = XEXP (addr, 1);
11373      return (REG_P (*base) && CONST_INT_P (*offset));
11374    }
11375
11376  return false;
11377}
11378
11379/* Called from peephole2 to replace two word-size accesses with a
11380   single LDD/STD instruction.  Returns true iff we can generate a new
11381   instruction sequence.  That is, both accesses use the same base
11382   register and the gap between constant offsets is 4.  OPERANDS are
11383   the operands found by the peephole matcher; OPERANDS[0,1] are
11384   register operands, and OPERANDS[2,3] are the corresponding memory
11385   operands.  LOAD indicates whether the access is load or store.  */
11386
11387bool
11388gen_operands_ldd_std (rtx *operands, bool load, bool commute)
11389{
11390  int i, gap;
11391  HOST_WIDE_INT offsets[2], offset;
11392  int nops = 2;
11393  rtx cur_base, cur_offset, tmp;
11394  rtx base = NULL_RTX;
11395
11396  /* Check that the memory references are immediate offsets from the
11397     same base register.  Extract the base register, the destination
11398     registers, and the corresponding memory offsets.  */
11399  for (i = 0; i < nops; i++)
11400    {
11401      if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset))
11402	return false;
11403
11404      if (i == 0)
11405	base = cur_base;
11406      else if (REGNO (base) != REGNO (cur_base))
11407	return false;
11408
11409      offsets[i] = INTVAL (cur_offset);
11410      if (GET_CODE (operands[i]) == SUBREG)
11411	{
11412	  tmp = SUBREG_REG (operands[i]);
11413	  gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
11414	  operands[i] = tmp;
11415	}
11416    }
11417
11418  /* Make sure there is no dependency between the individual loads.  */
11419  if (load && REGNO (operands[0]) == REGNO (base))
11420    return false; /* RAW.  */
11421
11422  if (load && REGNO (operands[0]) == REGNO (operands[1]))
11423    return false; /* WAW.  */
11424
11425  /* Make sure the instructions are ordered with lower memory access first.  */
11426  if (offsets[0] > offsets[1])
11427    {
11428      gap = offsets[0] - offsets[1];
11429      offset = offsets[1];
11430
11431      /* Swap the instructions such that lower memory is accessed first.  */
11432      std::swap (operands[0], operands[1]);
11433      std::swap (operands[2], operands[3]);
11434    }
11435  else
11436    {
11437      gap = offsets[1] - offsets[0];
11438      offset = offsets[0];
11439    }
11440
11441  /* Make sure accesses are to consecutive memory locations.  */
11442  if (gap != 4)
11443    return false;
11444
11445  /* Make sure we generate legal instructions.  */
11446  if (operands_ok_ldd_std (operands[0], operands[1], offset))
11447    return true;
11448
11449  if (load && commute)
11450    {
11451      /* Try reordering registers.  */
11452      std::swap (operands[0], operands[1]);
11453      if (operands_ok_ldd_std (operands[0], operands[1], offset))
11454	return true;
11455    }
11456
11457  return false;
11458}
11459
11460/* This order of allocation is used when we compile for size.  It
11461   allocates first the registers which are most probably to end up in
11462   a short instruction.  */
11463static const int size_alloc_order[] =
11464{
11465 0, 1, 2, 3, 12, 13, 14, 15,
11466 4, 5, 6, 7, 8, 9, 10, 11
11467};
11468
11469/* Adjust register allocation order when compiling for size.  */
11470void
11471arc_adjust_reg_alloc_order (void)
11472{
11473  const int arc_default_alloc_order[] = REG_ALLOC_ORDER;
11474  memcpy (reg_alloc_order, arc_default_alloc_order, sizeof (reg_alloc_order));
11475  if (optimize_size)
11476    memcpy (reg_alloc_order, size_alloc_order, sizeof (size_alloc_order));
11477}
11478
11479/* Implement TARGET_MEMORY_MOVE_COST.  */
11480
11481static int
11482arc_memory_move_cost (machine_mode mode,
11483		      reg_class_t rclass ATTRIBUTE_UNUSED,
11484		      bool in ATTRIBUTE_UNUSED)
11485{
11486  if ((GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
11487      || ((GET_MODE_SIZE (mode) <= UNITS_PER_WORD * 2) && TARGET_LL64))
11488    return 6;
11489
11490  return (2 * GET_MODE_SIZE (mode));
11491}
11492
11493/* Split an OR instruction into multiple BSET/OR instructions in a
11494   attempt to avoid long immediate constants.  The next strategies are
11495   employed when destination is 'q' reg.
11496
11497   1. if there are up to three bits set in the mask, a succession of
11498   three bset instruction will be emitted:
11499   OR rA, rB, mask ->
11500   BSET(_S) rA,rB,mask1/BSET_S rA,rA,mask2/BSET_S rA,rA,mask3
11501
11502   2. if the lower 6 bits of the mask is set and there is only one
11503   bit set in the upper remaining bits then we will emit one bset and
11504   one OR instruction:
11505   OR rA, rB, mask -> OR rA,rB,mask1/BSET_S rA,mask2
11506
11507   3. otherwise an OR with limm will be emmitted.  */
11508
11509void
11510arc_split_ior (rtx *operands)
11511{
11512  unsigned HOST_WIDE_INT mask, maskx;
11513  rtx op1 = operands[1];
11514
11515  gcc_assert (CONST_INT_P (operands[2]));
11516  mask =  INTVAL (operands[2]) & 0xffffffff;
11517
11518  if (__builtin_popcount (mask) > 3 || (mask & 0x3f))
11519    {
11520      maskx = mask & 0x3f;
11521      emit_insn (gen_rtx_SET (operands[0],
11522			      gen_rtx_IOR (SImode, op1, GEN_INT (maskx))));
11523      op1 = operands[0];
11524      mask &= ~maskx;
11525    }
11526
11527  switch (__builtin_popcount (mask))
11528    {
11529    case 3:
11530      maskx = 1 << (__builtin_ffs (mask) - 1);
11531      emit_insn (gen_rtx_SET (operands[0],
11532			      gen_rtx_IOR (SImode, op1, GEN_INT (maskx))));
11533      mask &= ~maskx;
11534      op1 = operands[0];
11535      /* FALLTHRU */
11536    case 2:
11537      maskx = 1 << (__builtin_ffs (mask) - 1);
11538      emit_insn (gen_rtx_SET (operands[0],
11539			      gen_rtx_IOR (SImode, op1, GEN_INT (maskx))));
11540      mask &= ~maskx;
11541      op1 = operands[0];
11542      /* FALLTHRU */
11543    case 1:
11544      maskx = 1 << (__builtin_ffs (mask) - 1);
11545      emit_insn (gen_rtx_SET (operands[0],
11546			      gen_rtx_IOR (SImode, op1, GEN_INT (maskx))));
11547      break;
11548    case 0:
11549      break;
11550    default:
11551      gcc_unreachable ();
11552    }
11553}
11554
11555/* Helper to check C0x constraint.  */
11556
11557bool
11558arc_check_ior_const (HOST_WIDE_INT ival)
11559{
11560  unsigned int mask = (unsigned int) (ival & 0xffffffff);
11561
11562  if (UNSIGNED_INT6 (ival)
11563      || IS_POWEROF2_P (mask))
11564    return false;
11565  if (__builtin_popcount (mask) <= 3)
11566    return true;
11567  if (__builtin_popcount (mask & ~0x3f) <= 1)
11568    return true;
11569  return false;
11570}
11571
11572/* Split a mov with long immediate instruction into smaller, size
11573   friendly instructions.  */
11574
11575bool
11576arc_split_mov_const (rtx *operands)
11577{
11578  unsigned HOST_WIDE_INT ival;
11579  HOST_WIDE_INT shimm;
11580  machine_mode mode = GET_MODE (operands[0]);
11581
11582  /* Manage a constant.  */
11583  gcc_assert (CONST_INT_P (operands[1]));
11584  ival = INTVAL (operands[1]) & 0xffffffff;
11585
11586  /* 1. Check if we can just rotate limm by 8 but using ROR8.  */
11587  if (TARGET_BARREL_SHIFTER && TARGET_V2
11588      && ((ival & ~0x3f000000) == 0))
11589    {
11590      shimm = (ival >> 24) & 0x3f;
11591      emit_insn (gen_rtx_SET (operands[0],
11592			      gen_rtx_ROTATERT (mode, GEN_INT (shimm),
11593						GEN_INT (8))));
11594      return true;
11595    }
11596  /* 2. Check if we can just shift by 8 to fit into the u6 of LSL8.  */
11597  if (TARGET_BARREL_SHIFTER && TARGET_V2
11598      && ((ival & ~0x3f00) == 0))
11599    {
11600      shimm = (ival >> 8) & 0x3f;
11601      emit_insn (gen_rtx_SET (operands[0],
11602			      gen_rtx_ASHIFT (mode, GEN_INT (shimm),
11603					      GEN_INT (8))));
11604      return true;
11605    }
11606
11607  /* 3. Check if we can just shift by 16 to fit into the u6 of LSL16.  */
11608  if (TARGET_BARREL_SHIFTER && TARGET_V2
11609      && ((ival & ~0x3f0000) == 0))
11610    {
11611      shimm = (ival >> 16) & 0x3f;
11612      emit_insn (gen_rtx_SET (operands[0],
11613			      gen_rtx_ASHIFT (mode, GEN_INT (shimm),
11614					      GEN_INT (16))));
11615      return true;
11616    }
11617
11618  /* 4. Check if we can do something like mov_s h,u8 / asl_s ra,h,#nb.  */
11619  if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0
11620      && TARGET_BARREL_SHIFTER)
11621    {
11622      HOST_WIDE_INT shift = __builtin_ffs (ival);
11623      shimm = (ival >> (shift - 1)) & 0xff;
11624      emit_insn (gen_rtx_SET (operands[0], GEN_INT (shimm)));
11625      emit_insn (gen_rtx_SET (operands[0],
11626			      gen_rtx_ASHIFT (mode, operands[0],
11627					      GEN_INT (shift - 1))));
11628      return true;
11629    }
11630
11631  /* 5. Check if we can just rotate the limm, useful when no barrel
11632     shifter is present.  */
11633  if ((ival & ~0x8000001f) == 0)
11634    {
11635      shimm = (ival * 2 + 1) & 0x3f;
11636      emit_insn (gen_rtx_SET (operands[0],
11637			      gen_rtx_ROTATERT (mode, GEN_INT (shimm),
11638						const1_rtx)));
11639      return true;
11640    }
11641
11642  /* 6. Check if we can do something with bmask.  */
11643  if (IS_POWEROF2_P (ival + 1))
11644    {
11645      emit_insn (gen_rtx_SET (operands[0], constm1_rtx));
11646      emit_insn (gen_rtx_SET (operands[0],
11647			      gen_rtx_AND (mode, operands[0],
11648					   GEN_INT (ival))));
11649      return true;
11650    }
11651
11652  gcc_unreachable ();
11653}
11654
11655/* Helper to check Cax constraint.  */
11656
11657bool
11658arc_check_mov_const (HOST_WIDE_INT ival)
11659{
11660  ival = ival & 0xffffffff;
11661
11662  if (SIGNED_INT12 (ival))
11663    return false;
11664
11665  if ((ival & ~0x8000001f) == 0)
11666    return true;
11667
11668  if (IS_POWEROF2_P (ival + 1))
11669    return true;
11670
11671  /* The next rules requires a barrel shifter.  */
11672  if (!TARGET_BARREL_SHIFTER)
11673    return false;
11674
11675  if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0)
11676    return true;
11677
11678  if ((ival & ~0x3f00) == 0)
11679    return true;
11680
11681  if ((ival & ~0x3f0000) == 0)
11682    return true;
11683
11684  if ((ival & ~0x3f000000) == 0)
11685    return true;
11686
11687  return false;
11688}
11689
11690/* Return nonzero if this function is known to have a null epilogue.
11691   This allows the optimizer to omit jumps to jumps if no stack
11692   was created.  */
11693
11694bool
11695arc_can_use_return_insn (void)
11696{
11697  return (reload_completed && cfun->machine->frame_info.total_size == 0
11698	  && !ARC_INTERRUPT_P (arc_compute_function_type (cfun)));
11699}
11700
11701/* Helper for INSN_COST.
11702
11703   Per Segher Boessenkool: rtx_costs computes the cost for any rtx (an
11704   insn, a set, a set source, any random piece of one).  set_src_cost,
11705   set_rtx_cost, etc. are helper functions that use that.
11706
11707   Those functions do not work for parallels.  Also, costs are not
11708   additive like this simplified model assumes.  Also, more complex
11709   backends tend to miss many cases in their rtx_costs function.
11710
11711   Many passes that want costs want to know the cost of a full insn.  Like
11712   combine.  That's why I created insn_cost: it solves all of the above
11713   problems.  */
11714
11715static int
11716arc_insn_cost (rtx_insn *insn, bool speed)
11717{
11718  int cost;
11719  if (recog_memoized (insn) < 0)
11720    return 0;
11721
11722  /* If optimizing for size, we want the insn size.  */
11723  if (!speed)
11724    return get_attr_length (insn);
11725
11726  /* Use cost if provided.  */
11727  cost = get_attr_cost (insn);
11728  if (cost > 0)
11729    return cost;
11730
11731  /* For speed make a simple cost model: memory access is more
11732     expensive than any other instruction.  */
11733  enum attr_type type = get_attr_type (insn);
11734
11735  switch (type)
11736    {
11737    case TYPE_LOAD:
11738    case TYPE_STORE:
11739      cost = COSTS_N_INSNS (2);
11740      break;
11741
11742    default:
11743      cost = COSTS_N_INSNS (1);
11744      break;
11745    }
11746
11747  return cost;
11748}
11749
11750#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
11751#define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p
11752
11753#undef TARGET_CONSTANT_ALIGNMENT
11754#define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
11755
11756#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
11757#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P arc_cannot_substitute_mem_equiv_p
11758
11759#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11760#define TARGET_ASM_TRAMPOLINE_TEMPLATE arc_asm_trampoline_template
11761
11762#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
11763#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
11764
11765#undef TARGET_REGISTER_MOVE_COST
11766#define TARGET_REGISTER_MOVE_COST arc_register_move_cost
11767
11768#undef TARGET_MEMORY_MOVE_COST
11769#define TARGET_MEMORY_MOVE_COST arc_memory_move_cost
11770
11771#undef  TARGET_INSN_COST
11772#define TARGET_INSN_COST arc_insn_cost
11773
11774struct gcc_target targetm = TARGET_INITIALIZER;
11775
11776#include "gt-arc.h"
11777