1/* Output routines for Motorola MCore processor
2   Copyright (C) 1993-2015 Free Software Foundation, Inc.
3
4   This file is part of GCC.
5
6   GCC is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published
8   by the Free Software Foundation; either version 3, or (at your
9   option) any later version.
10
11   GCC is distributed in the hope that it will be useful, but WITHOUT
12   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14   License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with GCC; see the file COPYING3.  If not see
18   <http://www.gnu.org/licenses/>.  */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
24#include "rtl.h"
25#include "hash-set.h"
26#include "machmode.h"
27#include "vec.h"
28#include "double-int.h"
29#include "input.h"
30#include "alias.h"
31#include "symtab.h"
32#include "wide-int.h"
33#include "inchash.h"
34#include "tree.h"
35#include "fold-const.h"
36#include "stor-layout.h"
37#include "varasm.h"
38#include "stringpool.h"
39#include "calls.h"
40#include "tm_p.h"
41#include "mcore.h"
42#include "regs.h"
43#include "hard-reg-set.h"
44#include "insn-config.h"
45#include "conditions.h"
46#include "output.h"
47#include "insn-attr.h"
48#include "flags.h"
49#include "obstack.h"
50#include "hashtab.h"
51#include "function.h"
52#include "statistics.h"
53#include "real.h"
54#include "fixed-value.h"
55#include "expmed.h"
56#include "dojump.h"
57#include "explow.h"
58#include "emit-rtl.h"
59#include "stmt.h"
60#include "expr.h"
61#include "reload.h"
62#include "recog.h"
63#include "ggc.h"
64#include "diagnostic-core.h"
65#include "target.h"
66#include "target-def.h"
67#include "dominance.h"
68#include "cfg.h"
69#include "cfgrtl.h"
70#include "cfganal.h"
71#include "lcm.h"
72#include "cfgbuild.h"
73#include "cfgcleanup.h"
74#include "predict.h"
75#include "basic-block.h"
76#include "df.h"
77#include "builtins.h"
78
79/* For dumping information about frame sizes.  */
80char * mcore_current_function_name = 0;
81long   mcore_current_compilation_timestamp = 0;
82
83/* Global variables for machine-dependent things.  */
84
85/* Provides the class number of the smallest class containing
86   reg number.  */
87const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
88{
89  GENERAL_REGS,	ONLYR1_REGS,  LRW_REGS,	    LRW_REGS,
90  LRW_REGS,	LRW_REGS,     LRW_REGS,	    LRW_REGS,
91  LRW_REGS,	LRW_REGS,     LRW_REGS,	    LRW_REGS,
92  LRW_REGS,	LRW_REGS,     LRW_REGS,	    GENERAL_REGS,
93  GENERAL_REGS, C_REGS,       NO_REGS,      NO_REGS,
94};
95
96struct mcore_frame
97{
98  int arg_size;			/* Stdarg spills (bytes).  */
99  int reg_size;			/* Non-volatile reg saves (bytes).  */
100  int reg_mask;			/* Non-volatile reg saves.  */
101  int local_size;		/* Locals.  */
102  int outbound_size;		/* Arg overflow on calls out.  */
103  int pad_outbound;
104  int pad_local;
105  int pad_reg;
106  /* Describe the steps we'll use to grow it.  */
107#define	MAX_STACK_GROWS	4	/* Gives us some spare space.  */
108  int growth[MAX_STACK_GROWS];
109  int arg_offset;
110  int reg_offset;
111  int reg_growth;
112  int local_growth;
113};
114
115typedef enum
116{
117  COND_NO,
118  COND_MOV_INSN,
119  COND_CLR_INSN,
120  COND_INC_INSN,
121  COND_DEC_INSN,
122  COND_BRANCH_INSN
123}
124cond_type;
125
126static void       output_stack_adjust           (int, int);
127static int        calc_live_regs                (int *);
128static int        try_constant_tricks           (HOST_WIDE_INT, HOST_WIDE_INT *, HOST_WIDE_INT *);
129static const char *     output_inline_const     (machine_mode, rtx *);
130static void       layout_mcore_frame            (struct mcore_frame *);
131static void       mcore_setup_incoming_varargs	(cumulative_args_t, machine_mode, tree, int *, int);
132static cond_type  is_cond_candidate             (rtx);
133static rtx_insn  *emit_new_cond_insn            (rtx, int);
134static rtx_insn  *conditionalize_block          (rtx_insn *);
135static void       conditionalize_optimization   (void);
136static void       mcore_reorg                   (void);
137static rtx        handle_structs_in_regs        (machine_mode, const_tree, int);
138static void       mcore_mark_dllexport          (tree);
139static void       mcore_mark_dllimport          (tree);
140static int        mcore_dllexport_p             (tree);
141static int        mcore_dllimport_p             (tree);
142static tree       mcore_handle_naked_attribute  (tree *, tree, tree, int, bool *);
143#ifdef OBJECT_FORMAT_ELF
144static void	  mcore_asm_named_section       (const char *,
145						 unsigned int, tree);
146#endif
147static void       mcore_print_operand           (FILE *, rtx, int);
148static void       mcore_print_operand_address   (FILE *, rtx);
149static bool       mcore_print_operand_punct_valid_p (unsigned char code);
150static void       mcore_unique_section	        (tree, int);
151static void mcore_encode_section_info		(tree, rtx, int);
152static const char *mcore_strip_name_encoding	(const char *);
153static int        mcore_const_costs             (rtx, RTX_CODE);
154static int        mcore_and_cost                (rtx);
155static int        mcore_ior_cost                (rtx);
156static bool       mcore_rtx_costs		(rtx, int, int, int,
157						 int *, bool);
158static void       mcore_external_libcall	(rtx);
159static bool       mcore_return_in_memory	(const_tree, const_tree);
160static int        mcore_arg_partial_bytes       (cumulative_args_t,
161						 machine_mode,
162						 tree, bool);
163static rtx        mcore_function_arg            (cumulative_args_t,
164						 machine_mode,
165						 const_tree, bool);
166static void       mcore_function_arg_advance    (cumulative_args_t,
167						 machine_mode,
168						 const_tree, bool);
169static unsigned int mcore_function_arg_boundary (machine_mode,
170						 const_tree);
171static void       mcore_asm_trampoline_template (FILE *);
172static void       mcore_trampoline_init		(rtx, tree, rtx);
173static bool       mcore_warn_func_return        (tree);
174static void       mcore_option_override		(void);
175static bool       mcore_legitimate_constant_p   (machine_mode, rtx);
176
177/* MCore specific attributes.  */
178
179static const struct attribute_spec mcore_attribute_table[] =
180{
181  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
182       affects_type_identity } */
183  { "dllexport", 0, 0, true,  false, false, NULL, false },
184  { "dllimport", 0, 0, true,  false, false, NULL, false },
185  { "naked",     0, 0, true,  false, false, mcore_handle_naked_attribute,
186    false },
187  { NULL,        0, 0, false, false, false, NULL, false }
188};
189
190/* Initialize the GCC target structure.  */
191#undef  TARGET_ASM_EXTERNAL_LIBCALL
192#define TARGET_ASM_EXTERNAL_LIBCALL	mcore_external_libcall
193
194#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
195#undef  TARGET_MERGE_DECL_ATTRIBUTES
196#define TARGET_MERGE_DECL_ATTRIBUTES	merge_dllimport_decl_attributes
197#endif
198
199#ifdef OBJECT_FORMAT_ELF
200#undef  TARGET_ASM_UNALIGNED_HI_OP
201#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
202#undef  TARGET_ASM_UNALIGNED_SI_OP
203#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
204#endif
205
206#undef  TARGET_PRINT_OPERAND
207#define TARGET_PRINT_OPERAND		mcore_print_operand
208#undef  TARGET_PRINT_OPERAND_ADDRESS
209#define TARGET_PRINT_OPERAND_ADDRESS	mcore_print_operand_address
210#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
211#define TARGET_PRINT_OPERAND_PUNCT_VALID_P mcore_print_operand_punct_valid_p
212
213#undef  TARGET_ATTRIBUTE_TABLE
214#define TARGET_ATTRIBUTE_TABLE 		mcore_attribute_table
215#undef  TARGET_ASM_UNIQUE_SECTION
216#define TARGET_ASM_UNIQUE_SECTION 	mcore_unique_section
217#undef  TARGET_ASM_FUNCTION_RODATA_SECTION
218#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
219#undef  TARGET_ENCODE_SECTION_INFO
220#define TARGET_ENCODE_SECTION_INFO 	mcore_encode_section_info
221#undef  TARGET_STRIP_NAME_ENCODING
222#define TARGET_STRIP_NAME_ENCODING	mcore_strip_name_encoding
223#undef  TARGET_RTX_COSTS
224#define TARGET_RTX_COSTS 		mcore_rtx_costs
225#undef  TARGET_ADDRESS_COST
226#define TARGET_ADDRESS_COST 		hook_int_rtx_mode_as_bool_0
227#undef  TARGET_MACHINE_DEPENDENT_REORG
228#define TARGET_MACHINE_DEPENDENT_REORG	mcore_reorg
229
230#undef  TARGET_PROMOTE_FUNCTION_MODE
231#define TARGET_PROMOTE_FUNCTION_MODE	default_promote_function_mode_always_promote
232#undef  TARGET_PROMOTE_PROTOTYPES
233#define TARGET_PROMOTE_PROTOTYPES	hook_bool_const_tree_true
234
235#undef  TARGET_RETURN_IN_MEMORY
236#define TARGET_RETURN_IN_MEMORY		mcore_return_in_memory
237#undef  TARGET_MUST_PASS_IN_STACK
238#define TARGET_MUST_PASS_IN_STACK	must_pass_in_stack_var_size
239#undef  TARGET_PASS_BY_REFERENCE
240#define TARGET_PASS_BY_REFERENCE  hook_pass_by_reference_must_pass_in_stack
241#undef  TARGET_ARG_PARTIAL_BYTES
242#define TARGET_ARG_PARTIAL_BYTES	mcore_arg_partial_bytes
243#undef  TARGET_FUNCTION_ARG
244#define TARGET_FUNCTION_ARG		mcore_function_arg
245#undef  TARGET_FUNCTION_ARG_ADVANCE
246#define TARGET_FUNCTION_ARG_ADVANCE	mcore_function_arg_advance
247#undef  TARGET_FUNCTION_ARG_BOUNDARY
248#define TARGET_FUNCTION_ARG_BOUNDARY	mcore_function_arg_boundary
249
250#undef  TARGET_SETUP_INCOMING_VARARGS
251#define TARGET_SETUP_INCOMING_VARARGS	mcore_setup_incoming_varargs
252
253#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
254#define TARGET_ASM_TRAMPOLINE_TEMPLATE	mcore_asm_trampoline_template
255#undef  TARGET_TRAMPOLINE_INIT
256#define TARGET_TRAMPOLINE_INIT		mcore_trampoline_init
257
258#undef TARGET_OPTION_OVERRIDE
259#define TARGET_OPTION_OVERRIDE mcore_option_override
260
261#undef TARGET_LEGITIMATE_CONSTANT_P
262#define TARGET_LEGITIMATE_CONSTANT_P mcore_legitimate_constant_p
263
264#undef TARGET_WARN_FUNC_RETURN
265#define TARGET_WARN_FUNC_RETURN mcore_warn_func_return
266
267struct gcc_target targetm = TARGET_INITIALIZER;
268
269/* Adjust the stack and return the number of bytes taken to do it.  */
270static void
271output_stack_adjust (int direction, int size)
272{
273  /* If extending stack a lot, we do it incrementally.  */
274  if (direction < 0 && size > mcore_stack_increment && mcore_stack_increment > 0)
275    {
276      rtx tmp = gen_rtx_REG (SImode, 1);
277      rtx memref;
278
279      emit_insn (gen_movsi (tmp, GEN_INT (mcore_stack_increment)));
280      do
281	{
282	  emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
283	  memref = gen_rtx_MEM (SImode, stack_pointer_rtx);
284	  MEM_VOLATILE_P (memref) = 1;
285	  emit_insn (gen_movsi (memref, stack_pointer_rtx));
286	  size -= mcore_stack_increment;
287	}
288      while (size > mcore_stack_increment);
289
290      /* SIZE is now the residual for the last adjustment,
291	 which doesn't require a probe.  */
292    }
293
294  if (size)
295    {
296      rtx insn;
297      rtx val = GEN_INT (size);
298
299      if (size > 32)
300	{
301	  rtx nval = gen_rtx_REG (SImode, 1);
302	  emit_insn (gen_movsi (nval, val));
303	  val = nval;
304	}
305
306      if (direction > 0)
307	insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, val);
308      else
309	insn = gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, val);
310
311      emit_insn (insn);
312    }
313}
314
315/* Work out the registers which need to be saved,
316   both as a mask and a count.  */
317
318static int
319calc_live_regs (int * count)
320{
321  int reg;
322  int live_regs_mask = 0;
323
324  * count = 0;
325
326  for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
327    {
328      if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
329	{
330	  (*count)++;
331	  live_regs_mask |= (1 << reg);
332	}
333    }
334
335  return live_regs_mask;
336}
337
338/* Print the operand address in x to the stream.  */
339
340static void
341mcore_print_operand_address (FILE * stream, rtx x)
342{
343  switch (GET_CODE (x))
344    {
345    case REG:
346      fprintf (stream, "(%s)", reg_names[REGNO (x)]);
347      break;
348
349    case PLUS:
350      {
351	rtx base = XEXP (x, 0);
352	rtx index = XEXP (x, 1);
353
354	if (GET_CODE (base) != REG)
355	  {
356	    /* Ensure that BASE is a register (one of them must be).  */
357	    rtx temp = base;
358	    base = index;
359	    index = temp;
360	  }
361
362	switch (GET_CODE (index))
363	  {
364	  case CONST_INT:
365	    fprintf (stream, "(%s," HOST_WIDE_INT_PRINT_DEC ")",
366		     reg_names[REGNO(base)], INTVAL (index));
367	    break;
368
369	  default:
370	    gcc_unreachable ();
371	  }
372      }
373
374      break;
375
376    default:
377      output_addr_const (stream, x);
378      break;
379    }
380}
381
382static bool
383mcore_print_operand_punct_valid_p (unsigned char code)
384{
385  return (code == '.' || code == '#' || code == '*' || code == '^'
386	  || code == '!');
387}
388
389/* Print operand x (an rtx) in assembler syntax to file stream
390   according to modifier code.
391
392   'R'  print the next register or memory location along, i.e. the lsw in
393        a double word value
394   'O'  print a constant without the #
395   'M'  print a constant as its negative
396   'P'  print log2 of a power of two
397   'Q'  print log2 of an inverse of a power of two
398   'U'  print register for ldm/stm instruction
399   'X'  print byte number for xtrbN instruction.  */
400
401static void
402mcore_print_operand (FILE * stream, rtx x, int code)
403{
404  switch (code)
405    {
406    case 'N':
407      if (INTVAL(x) == -1)
408	fprintf (asm_out_file, "32");
409      else
410	fprintf (asm_out_file, "%d", exact_log2 (INTVAL (x) + 1));
411      break;
412    case 'P':
413      fprintf (asm_out_file, "%d", exact_log2 (INTVAL (x) & 0xffffffff));
414      break;
415    case 'Q':
416      fprintf (asm_out_file, "%d", exact_log2 (~INTVAL (x)));
417      break;
418    case 'O':
419      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
420      break;
421    case 'M':
422      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, - INTVAL (x));
423      break;
424    case 'R':
425      /* Next location along in memory or register.  */
426      switch (GET_CODE (x))
427	{
428	case REG:
429	  fputs (reg_names[REGNO (x) + 1], (stream));
430	  break;
431	case MEM:
432	  mcore_print_operand_address
433	    (stream, XEXP (adjust_address (x, SImode, 4), 0));
434	  break;
435	default:
436	  gcc_unreachable ();
437	}
438      break;
439    case 'U':
440      fprintf (asm_out_file, "%s-%s", reg_names[REGNO (x)],
441	       reg_names[REGNO (x) + 3]);
442      break;
443    case 'x':
444      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
445      break;
446    case 'X':
447      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, 3 - INTVAL (x) / 8);
448      break;
449
450    default:
451      switch (GET_CODE (x))
452	{
453	case REG:
454	  fputs (reg_names[REGNO (x)], (stream));
455	  break;
456	case MEM:
457	  output_address (XEXP (x, 0));
458	  break;
459	default:
460	  output_addr_const (stream, x);
461	  break;
462	}
463      break;
464    }
465}
466
467/* What does a constant cost ?  */
468
469static int
470mcore_const_costs (rtx exp, enum rtx_code code)
471{
472  HOST_WIDE_INT val = INTVAL (exp);
473
474  /* Easy constants.  */
475  if (   CONST_OK_FOR_I (val)
476      || CONST_OK_FOR_M (val)
477      || CONST_OK_FOR_N (val)
478      || (code == PLUS && CONST_OK_FOR_L (val)))
479    return 1;
480  else if (code == AND
481	   && (   CONST_OK_FOR_M (~val)
482	       || CONST_OK_FOR_N (~val)))
483    return 2;
484  else if (code == PLUS
485	   && (   CONST_OK_FOR_I (-val)
486	       || CONST_OK_FOR_M (-val)
487	       || CONST_OK_FOR_N (-val)))
488    return 2;
489
490  return 5;
491}
492
493/* What does an and instruction cost - we do this b/c immediates may
494   have been relaxed.   We want to ensure that cse will cse relaxed immeds
495   out.  Otherwise we'll get bad code (multiple reloads of the same const).  */
496
497static int
498mcore_and_cost (rtx x)
499{
500  HOST_WIDE_INT val;
501
502  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
503    return 2;
504
505  val = INTVAL (XEXP (x, 1));
506
507  /* Do it directly.  */
508  if (CONST_OK_FOR_K (val) || CONST_OK_FOR_M (~val))
509    return 2;
510  /* Takes one instruction to load.  */
511  else if (const_ok_for_mcore (val))
512    return 3;
513  /* Takes two instructions to load.  */
514  else if (TARGET_HARDLIT && mcore_const_ok_for_inline (val))
515    return 4;
516
517  /* Takes a lrw to load.  */
518  return 5;
519}
520
521/* What does an or cost - see and_cost().  */
522
523static int
524mcore_ior_cost (rtx x)
525{
526  HOST_WIDE_INT val;
527
528  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
529    return 2;
530
531  val = INTVAL (XEXP (x, 1));
532
533  /* Do it directly with bclri.  */
534  if (CONST_OK_FOR_M (val))
535    return 2;
536  /* Takes one instruction to load.  */
537  else if (const_ok_for_mcore (val))
538    return 3;
539  /* Takes two instructions to load.  */
540  else if (TARGET_HARDLIT && mcore_const_ok_for_inline (val))
541    return 4;
542
543  /* Takes a lrw to load.  */
544  return 5;
545}
546
547static bool
548mcore_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
549		 int * total, bool speed ATTRIBUTE_UNUSED)
550{
551  switch (code)
552    {
553    case CONST_INT:
554      *total = mcore_const_costs (x, (enum rtx_code) outer_code);
555      return true;
556    case CONST:
557    case LABEL_REF:
558    case SYMBOL_REF:
559      *total = 5;
560      return true;
561    case CONST_DOUBLE:
562      *total = 10;
563      return true;
564
565    case AND:
566      *total = COSTS_N_INSNS (mcore_and_cost (x));
567      return true;
568
569    case IOR:
570      *total = COSTS_N_INSNS (mcore_ior_cost (x));
571      return true;
572
573    case DIV:
574    case UDIV:
575    case MOD:
576    case UMOD:
577    case FLOAT:
578    case FIX:
579      *total = COSTS_N_INSNS (100);
580      return true;
581
582    default:
583      return false;
584    }
585}
586
587/* Prepare the operands for a comparison.  Return whether the branch/setcc
588   should reverse the operands.  */
589
590bool
591mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1)
592{
593  rtx cc_reg = gen_rtx_REG (CCmode, CC_REG);
594  bool invert;
595
596  if (GET_CODE (op1) == CONST_INT)
597    {
598      HOST_WIDE_INT val = INTVAL (op1);
599
600      switch (code)
601	{
602	case GTU:
603	  /* Unsigned > 0 is the same as != 0; everything else is converted
604	     below to LEU (reversed cmphs).  */
605	  if (val == 0)
606	    code = NE;
607	  break;
608
609        /* Check whether (LE A imm) can become (LT A imm + 1),
610	   or (GT A imm) can become (GE A imm + 1).  */
611	case GT:
612	case LE:
613	  if (CONST_OK_FOR_J (val + 1))
614	    {
615	      op1 = GEN_INT (val + 1);
616	      code = code == LE ? LT : GE;
617	    }
618	  break;
619
620	default:
621	  break;
622	}
623    }
624
625  if (CONSTANT_P (op1) && GET_CODE (op1) != CONST_INT)
626    op1 = force_reg (SImode, op1);
627
628  /* cmpnei: 0-31 (K immediate)
629     cmplti: 1-32 (J immediate, 0 using btsti x,31).  */
630  invert = false;
631  switch (code)
632    {
633    case EQ:	/* Use inverted condition, cmpne.  */
634      code = NE;
635      invert = true;
636      /* Drop through.  */
637
638    case NE:	/* Use normal condition, cmpne.  */
639      if (GET_CODE (op1) == CONST_INT && ! CONST_OK_FOR_K (INTVAL (op1)))
640	op1 = force_reg (SImode, op1);
641      break;
642
643    case LE:	/* Use inverted condition, reversed cmplt.  */
644      code = GT;
645      invert = true;
646      /* Drop through.  */
647
648    case GT:	/* Use normal condition, reversed cmplt.  */
649      if (GET_CODE (op1) == CONST_INT)
650	op1 = force_reg (SImode, op1);
651      break;
652
653    case GE:	/* Use inverted condition, cmplt.  */
654      code = LT;
655      invert = true;
656      /* Drop through.  */
657
658    case LT:	/* Use normal condition, cmplt.  */
659      if (GET_CODE (op1) == CONST_INT &&
660	  /* covered by btsti x,31.  */
661	  INTVAL (op1) != 0 &&
662	  ! CONST_OK_FOR_J (INTVAL (op1)))
663	op1 = force_reg (SImode, op1);
664      break;
665
666    case GTU:	/* Use inverted condition, cmple.  */
667      /* We coped with unsigned > 0 above.  */
668      gcc_assert (GET_CODE (op1) != CONST_INT || INTVAL (op1) != 0);
669      code = LEU;
670      invert = true;
671      /* Drop through.  */
672
673    case LEU:	/* Use normal condition, reversed cmphs.  */
674      if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0)
675	op1 = force_reg (SImode, op1);
676      break;
677
678    case LTU:	/* Use inverted condition, cmphs.  */
679      code = GEU;
680      invert = true;
681      /* Drop through.  */
682
683    case GEU:	/* Use normal condition, cmphs.  */
684      if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0)
685	op1 = force_reg (SImode, op1);
686      break;
687
688    default:
689      break;
690    }
691
692  emit_insn (gen_rtx_SET (VOIDmode,
693			  cc_reg,
694			  gen_rtx_fmt_ee (code, CCmode, op0, op1)));
695  return invert;
696}
697
698int
699mcore_symbolic_address_p (rtx x)
700{
701  switch (GET_CODE (x))
702    {
703    case SYMBOL_REF:
704    case LABEL_REF:
705      return 1;
706    case CONST:
707      x = XEXP (x, 0);
708      return (   (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
709	       || GET_CODE (XEXP (x, 0)) == LABEL_REF)
710	      && GET_CODE (XEXP (x, 1)) == CONST_INT);
711    default:
712      return 0;
713    }
714}
715
716/* Functions to output assembly code for a function call.  */
717
718char *
719mcore_output_call (rtx operands[], int index)
720{
721  static char buffer[20];
722  rtx addr = operands [index];
723
724  if (REG_P (addr))
725    {
726      if (TARGET_CG_DATA)
727	{
728	  gcc_assert (mcore_current_function_name);
729
730	  ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name,
731			      "unknown", 1);
732	}
733
734      sprintf (buffer, "jsr\t%%%d", index);
735    }
736  else
737    {
738      if (TARGET_CG_DATA)
739	{
740	  gcc_assert (mcore_current_function_name);
741	  gcc_assert (GET_CODE (addr) == SYMBOL_REF);
742
743	  ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name,
744			      XSTR (addr, 0), 0);
745	}
746
747      sprintf (buffer, "jbsr\t%%%d", index);
748    }
749
750  return buffer;
751}
752
753/* Can we load a constant with a single instruction ?  */
754
755int
756const_ok_for_mcore (HOST_WIDE_INT value)
757{
758  if (value >= 0 && value <= 127)
759    return 1;
760
761  /* Try exact power of two.  */
762  if (CONST_OK_FOR_M (value))
763    return 1;
764
765  /* Try exact power of two - 1.  */
766  if (CONST_OK_FOR_N (value) && value != -1)
767    return 1;
768
769  return 0;
770}
771
772/* Can we load a constant inline with up to 2 instructions ?  */
773
774int
775mcore_const_ok_for_inline (HOST_WIDE_INT value)
776{
777  HOST_WIDE_INT x, y;
778
779  return try_constant_tricks (value, & x, & y) > 0;
780}
781
782/* Are we loading the constant using a not ?  */
783
784int
785mcore_const_trick_uses_not (HOST_WIDE_INT value)
786{
787  HOST_WIDE_INT x, y;
788
789  return try_constant_tricks (value, & x, & y) == 2;
790}
791
792/* Try tricks to load a constant inline and return the trick number if
793   success (0 is non-inlinable).
794
795   0: not inlinable
796   1: single instruction (do the usual thing)
797   2: single insn followed by a 'not'
798   3: single insn followed by a subi
799   4: single insn followed by an addi
800   5: single insn followed by rsubi
801   6: single insn followed by bseti
802   7: single insn followed by bclri
803   8: single insn followed by rotli
804   9: single insn followed by lsli
805   10: single insn followed by ixh
806   11: single insn followed by ixw.  */
807
808static int
809try_constant_tricks (HOST_WIDE_INT value, HOST_WIDE_INT * x, HOST_WIDE_INT * y)
810{
811  HOST_WIDE_INT i;
812  unsigned HOST_WIDE_INT bit, shf, rot;
813
814  if (const_ok_for_mcore (value))
815    return 1;	/* Do the usual thing.  */
816
817  if (! TARGET_HARDLIT)
818    return 0;
819
820  if (const_ok_for_mcore (~value))
821    {
822      *x = ~value;
823      return 2;
824    }
825
826  for (i = 1; i <= 32; i++)
827    {
828      if (const_ok_for_mcore (value - i))
829	{
830	  *x = value - i;
831	  *y = i;
832
833	  return 3;
834	}
835
836      if (const_ok_for_mcore (value + i))
837	{
838	  *x = value + i;
839	  *y = i;
840
841	  return 4;
842	}
843    }
844
845  bit = 0x80000000ULL;
846
847  for (i = 0; i <= 31; i++)
848    {
849      if (const_ok_for_mcore (i - value))
850	{
851	  *x = i - value;
852	  *y = i;
853
854	  return 5;
855	}
856
857      if (const_ok_for_mcore (value & ~bit))
858	{
859	  *y = bit;
860	  *x = value & ~bit;
861	  return 6;
862	}
863
864      if (const_ok_for_mcore (value | bit))
865	{
866	  *y = ~bit;
867	  *x = value | bit;
868
869	  return 7;
870	}
871
872      bit >>= 1;
873    }
874
875  shf = value;
876  rot = value;
877
878  for (i = 1; i < 31; i++)
879    {
880      int c;
881
882      /* MCore has rotate left.  */
883      c = rot << 31;
884      rot >>= 1;
885      rot &= 0x7FFFFFFF;
886      rot |= c;   /* Simulate rotate.  */
887
888      if (const_ok_for_mcore (rot))
889	{
890	  *y = i;
891	  *x = rot;
892
893	  return 8;
894	}
895
896      if (shf & 1)
897	shf = 0;	/* Can't use logical shift, low order bit is one.  */
898
899      shf >>= 1;
900
901      if (shf != 0 && const_ok_for_mcore (shf))
902	{
903	  *y = i;
904	  *x = shf;
905
906	  return 9;
907	}
908    }
909
910  if ((value % 3) == 0 && const_ok_for_mcore (value / 3))
911    {
912      *x = value / 3;
913
914      return 10;
915    }
916
917  if ((value % 5) == 0 && const_ok_for_mcore (value / 5))
918    {
919      *x = value / 5;
920
921      return 11;
922    }
923
924  return 0;
925}
926
927/* Check whether reg is dead at first.  This is done by searching ahead
928   for either the next use (i.e., reg is live), a death note, or a set of
929   reg.  Don't just use dead_or_set_p() since reload does not always mark
930   deaths (especially if PRESERVE_DEATH_NOTES_REGNO_P is not defined). We
931   can ignore subregs by extracting the actual register.  BRC  */
932
933int
934mcore_is_dead (rtx_insn *first, rtx reg)
935{
936  rtx_insn *insn;
937
938  /* For mcore, subregs can't live independently of their parent regs.  */
939  if (GET_CODE (reg) == SUBREG)
940    reg = SUBREG_REG (reg);
941
942  /* Dies immediately.  */
943  if (dead_or_set_p (first, reg))
944    return 1;
945
946  /* Look for conclusive evidence of live/death, otherwise we have
947     to assume that it is live.  */
948  for (insn = NEXT_INSN (first); insn; insn = NEXT_INSN (insn))
949    {
950      if (JUMP_P (insn))
951	return 0;	/* We lose track, assume it is alive.  */
952
953      else if (CALL_P (insn))
954	{
955	  /* Call's might use it for target or register parms.  */
956	  if (reg_referenced_p (reg, PATTERN (insn))
957	      || find_reg_fusage (insn, USE, reg))
958	    return 0;
959	  else if (dead_or_set_p (insn, reg))
960            return 1;
961	}
962      else if (NONJUMP_INSN_P (insn))
963	{
964	  if (reg_referenced_p (reg, PATTERN (insn)))
965            return 0;
966	  else if (dead_or_set_p (insn, reg))
967            return 1;
968	}
969    }
970
971  /* No conclusive evidence either way, we cannot take the chance
972     that control flow hid the use from us -- "I'm not dead yet".  */
973  return 0;
974}
975
976/* Count the number of ones in mask.  */
977
978int
979mcore_num_ones (HOST_WIDE_INT mask)
980{
981  /* A trick to count set bits recently posted on comp.compilers.  */
982  mask =  (mask >> 1  & 0x55555555) + (mask & 0x55555555);
983  mask = ((mask >> 2) & 0x33333333) + (mask & 0x33333333);
984  mask = ((mask >> 4) + mask) & 0x0f0f0f0f;
985  mask = ((mask >> 8) + mask);
986
987  return (mask + (mask >> 16)) & 0xff;
988}
989
990/* Count the number of zeros in mask.  */
991
992int
993mcore_num_zeros (HOST_WIDE_INT mask)
994{
995  return 32 - mcore_num_ones (mask);
996}
997
998/* Determine byte being masked.  */
999
1000int
1001mcore_byte_offset (unsigned int mask)
1002{
1003  if (mask == 0x00ffffffL)
1004    return 0;
1005  else if (mask == 0xff00ffffL)
1006    return 1;
1007  else if (mask == 0xffff00ffL)
1008    return 2;
1009  else if (mask == 0xffffff00L)
1010    return 3;
1011
1012  return -1;
1013}
1014
1015/* Determine halfword being masked.  */
1016
1017int
1018mcore_halfword_offset (unsigned int mask)
1019{
1020  if (mask == 0x0000ffffL)
1021    return 0;
1022  else if (mask == 0xffff0000L)
1023    return 1;
1024
1025  return -1;
1026}
1027
1028/* Output a series of bseti's corresponding to mask.  */
1029
1030const char *
1031mcore_output_bseti (rtx dst, int mask)
1032{
1033  rtx out_operands[2];
1034  int bit;
1035
1036  out_operands[0] = dst;
1037
1038  for (bit = 0; bit < 32; bit++)
1039    {
1040      if ((mask & 0x1) == 0x1)
1041	{
1042	  out_operands[1] = GEN_INT (bit);
1043
1044	  output_asm_insn ("bseti\t%0,%1", out_operands);
1045	}
1046      mask >>= 1;
1047    }
1048
1049  return "";
1050}
1051
1052/* Output a series of bclri's corresponding to mask.  */
1053
1054const char *
1055mcore_output_bclri (rtx dst, int mask)
1056{
1057  rtx out_operands[2];
1058  int bit;
1059
1060  out_operands[0] = dst;
1061
1062  for (bit = 0; bit < 32; bit++)
1063    {
1064      if ((mask & 0x1) == 0x0)
1065	{
1066	  out_operands[1] = GEN_INT (bit);
1067
1068	  output_asm_insn ("bclri\t%0,%1", out_operands);
1069	}
1070
1071      mask >>= 1;
1072    }
1073
1074  return "";
1075}
1076
1077/* Output a conditional move of two constants that are +/- 1 within each
1078   other.  See the "movtK" patterns in mcore.md.   I'm not sure this is
1079   really worth the effort.  */
1080
1081const char *
1082mcore_output_cmov (rtx operands[], int cmp_t, const char * test)
1083{
1084  HOST_WIDE_INT load_value;
1085  HOST_WIDE_INT adjust_value;
1086  rtx out_operands[4];
1087
1088  out_operands[0] = operands[0];
1089
1090  /* Check to see which constant is loadable.  */
1091  if (const_ok_for_mcore (INTVAL (operands[1])))
1092    {
1093      out_operands[1] = operands[1];
1094      out_operands[2] = operands[2];
1095    }
1096  else if (const_ok_for_mcore (INTVAL (operands[2])))
1097    {
1098      out_operands[1] = operands[2];
1099      out_operands[2] = operands[1];
1100
1101      /* Complement test since constants are swapped.  */
1102      cmp_t = (cmp_t == 0);
1103    }
1104  load_value   = INTVAL (out_operands[1]);
1105  adjust_value = INTVAL (out_operands[2]);
1106
1107  /* First output the test if folded into the pattern.  */
1108
1109  if (test)
1110    output_asm_insn (test, operands);
1111
1112  /* Load the constant - for now, only support constants that can be
1113     generated with a single instruction.  maybe add general inlinable
1114     constants later (this will increase the # of patterns since the
1115     instruction sequence has a different length attribute).  */
1116  if (load_value >= 0 && load_value <= 127)
1117    output_asm_insn ("movi\t%0,%1", out_operands);
1118  else if (CONST_OK_FOR_M (load_value))
1119    output_asm_insn ("bgeni\t%0,%P1", out_operands);
1120  else if (CONST_OK_FOR_N (load_value))
1121    output_asm_insn ("bmaski\t%0,%N1", out_operands);
1122
1123  /* Output the constant adjustment.  */
1124  if (load_value > adjust_value)
1125    {
1126      if (cmp_t)
1127	output_asm_insn ("decf\t%0", out_operands);
1128      else
1129	output_asm_insn ("dect\t%0", out_operands);
1130    }
1131  else
1132    {
1133      if (cmp_t)
1134	output_asm_insn ("incf\t%0", out_operands);
1135      else
1136	output_asm_insn ("inct\t%0", out_operands);
1137    }
1138
1139  return "";
1140}
1141
1142/* Outputs the peephole for moving a constant that gets not'ed followed
1143   by an and (i.e. combine the not and the and into andn). BRC  */
1144
1145const char *
1146mcore_output_andn (rtx insn ATTRIBUTE_UNUSED, rtx operands[])
1147{
1148  HOST_WIDE_INT x, y;
1149  rtx out_operands[3];
1150  const char * load_op;
1151  char buf[256];
1152  int trick_no;
1153
1154  trick_no = try_constant_tricks (INTVAL (operands[1]), &x, &y);
1155  gcc_assert (trick_no == 2);
1156
1157  out_operands[0] = operands[0];
1158  out_operands[1] = GEN_INT (x);
1159  out_operands[2] = operands[2];
1160
1161  if (x >= 0 && x <= 127)
1162    load_op = "movi\t%0,%1";
1163
1164  /* Try exact power of two.  */
1165  else if (CONST_OK_FOR_M (x))
1166    load_op = "bgeni\t%0,%P1";
1167
1168  /* Try exact power of two - 1.  */
1169  else if (CONST_OK_FOR_N (x))
1170    load_op = "bmaski\t%0,%N1";
1171
1172  else
1173    {
1174      load_op = "BADMOVI-andn\t%0, %1";
1175      gcc_unreachable ();
1176    }
1177
1178  sprintf (buf, "%s\n\tandn\t%%2,%%0", load_op);
1179  output_asm_insn (buf, out_operands);
1180
1181  return "";
1182}
1183
1184/* Output an inline constant.  */
1185
1186static const char *
1187output_inline_const (machine_mode mode, rtx operands[])
1188{
1189  HOST_WIDE_INT x = 0, y = 0;
1190  int trick_no;
1191  rtx out_operands[3];
1192  char buf[256];
1193  char load_op[256];
1194  const char *dst_fmt;
1195  HOST_WIDE_INT value;
1196
1197  value = INTVAL (operands[1]);
1198
1199  trick_no = try_constant_tricks (value, &x, &y);
1200  /* lrw's are handled separately: Large inlinable constants never get
1201     turned into lrw's.  Our caller uses try_constant_tricks to back
1202     off to an lrw rather than calling this routine.  */
1203  gcc_assert (trick_no != 0);
1204
1205  if (trick_no == 1)
1206    x = value;
1207
1208  /* operands: 0 = dst, 1 = load immed., 2 = immed. adjustment.  */
1209  out_operands[0] = operands[0];
1210  out_operands[1] = GEN_INT (x);
1211
1212  if (trick_no > 2)
1213    out_operands[2] = GEN_INT (y);
1214
1215  /* Select dst format based on mode.  */
1216  if (mode == DImode && (! TARGET_LITTLE_END))
1217    dst_fmt = "%R0";
1218  else
1219    dst_fmt = "%0";
1220
1221  if (x >= 0 && x <= 127)
1222    sprintf (load_op, "movi\t%s,%%1", dst_fmt);
1223
1224  /* Try exact power of two.  */
1225  else if (CONST_OK_FOR_M (x))
1226    sprintf (load_op, "bgeni\t%s,%%P1", dst_fmt);
1227
1228  /* Try exact power of two - 1.  */
1229  else if (CONST_OK_FOR_N (x))
1230    sprintf (load_op, "bmaski\t%s,%%N1", dst_fmt);
1231
1232  else
1233    {
1234      sprintf (load_op, "BADMOVI-inline_const %s, %%1", dst_fmt);
1235      gcc_unreachable ();
1236    }
1237
1238  switch (trick_no)
1239    {
1240    case 1:
1241      strcpy (buf, load_op);
1242      break;
1243    case 2:   /* not */
1244      sprintf (buf, "%s\n\tnot\t%s\t// %ld 0x%lx", load_op, dst_fmt, value, value);
1245      break;
1246    case 3:   /* add */
1247      sprintf (buf, "%s\n\taddi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
1248      break;
1249    case 4:   /* sub */
1250      sprintf (buf, "%s\n\tsubi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
1251      break;
1252    case 5:   /* rsub */
1253      /* Never happens unless -mrsubi, see try_constant_tricks().  */
1254      sprintf (buf, "%s\n\trsubi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
1255      break;
1256    case 6:   /* bseti */
1257      sprintf (buf, "%s\n\tbseti\t%s,%%P2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
1258      break;
1259    case 7:   /* bclr */
1260      sprintf (buf, "%s\n\tbclri\t%s,%%Q2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
1261      break;
1262    case 8:   /* rotl */
1263      sprintf (buf, "%s\n\trotli\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
1264      break;
1265    case 9:   /* lsl */
1266      sprintf (buf, "%s\n\tlsli\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
1267      break;
1268    case 10:  /* ixh */
1269      sprintf (buf, "%s\n\tixh\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, dst_fmt, value, value);
1270      break;
1271    case 11:  /* ixw */
1272      sprintf (buf, "%s\n\tixw\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, dst_fmt, value, value);
1273      break;
1274    default:
1275      return "";
1276    }
1277
1278  output_asm_insn (buf, out_operands);
1279
1280  return "";
1281}
1282
1283/* Output a move of a word or less value.  */
1284
1285const char *
1286mcore_output_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1287		   machine_mode mode ATTRIBUTE_UNUSED)
1288{
1289  rtx dst = operands[0];
1290  rtx src = operands[1];
1291
1292  if (GET_CODE (dst) == REG)
1293    {
1294      if (GET_CODE (src) == REG)
1295	{
1296	  if (REGNO (src) == CC_REG)            /* r-c */
1297            return "mvc\t%0";
1298	  else
1299            return "mov\t%0,%1";                /* r-r*/
1300	}
1301      else if (GET_CODE (src) == MEM)
1302	{
1303	  if (GET_CODE (XEXP (src, 0)) == LABEL_REF)
1304            return "lrw\t%0,[%1]";              /* a-R */
1305	  else
1306	    switch (GET_MODE (src))		/* r-m */
1307	      {
1308	      case SImode:
1309		return "ldw\t%0,%1";
1310	      case HImode:
1311		return "ld.h\t%0,%1";
1312	      case QImode:
1313		return "ld.b\t%0,%1";
1314	      default:
1315		gcc_unreachable ();
1316	      }
1317	}
1318      else if (GET_CODE (src) == CONST_INT)
1319	{
1320	  HOST_WIDE_INT x, y;
1321
1322	  if (CONST_OK_FOR_I (INTVAL (src)))       /* r-I */
1323            return "movi\t%0,%1";
1324	  else if (CONST_OK_FOR_M (INTVAL (src)))  /* r-M */
1325            return "bgeni\t%0,%P1\t// %1 %x1";
1326	  else if (CONST_OK_FOR_N (INTVAL (src)))  /* r-N */
1327            return "bmaski\t%0,%N1\t// %1 %x1";
1328	  else if (try_constant_tricks (INTVAL (src), &x, &y))     /* R-P */
1329            return output_inline_const (SImode, operands);  /* 1-2 insns */
1330	  else
1331            return "lrw\t%0,%x1\t// %1";	/* Get it from literal pool.  */
1332	}
1333      else
1334	return "lrw\t%0, %1";                /* Into the literal pool.  */
1335    }
1336  else if (GET_CODE (dst) == MEM)               /* m-r */
1337    switch (GET_MODE (dst))
1338      {
1339      case SImode:
1340	return "stw\t%1,%0";
1341      case HImode:
1342	return "st.h\t%1,%0";
1343      case QImode:
1344	return "st.b\t%1,%0";
1345      default:
1346	gcc_unreachable ();
1347      }
1348
1349  gcc_unreachable ();
1350}
1351
1352/* Return a sequence of instructions to perform DI or DF move.
1353   Since the MCORE cannot move a DI or DF in one instruction, we have
1354   to take care when we see overlapping source and dest registers.  */
1355
1356const char *
1357mcore_output_movedouble (rtx operands[], machine_mode mode ATTRIBUTE_UNUSED)
1358{
1359  rtx dst = operands[0];
1360  rtx src = operands[1];
1361
1362  if (GET_CODE (dst) == REG)
1363    {
1364      if (GET_CODE (src) == REG)
1365	{
1366	  int dstreg = REGNO (dst);
1367	  int srcreg = REGNO (src);
1368
1369	  /* Ensure the second source not overwritten.  */
1370	  if (srcreg + 1 == dstreg)
1371	    return "mov	%R0,%R1\n\tmov	%0,%1";
1372	  else
1373	    return "mov	%0,%1\n\tmov	%R0,%R1";
1374	}
1375      else if (GET_CODE (src) == MEM)
1376	{
1377	  rtx memexp = XEXP (src, 0);
1378	  int dstreg = REGNO (dst);
1379	  int basereg = -1;
1380
1381	  if (GET_CODE (memexp) == LABEL_REF)
1382	    return "lrw\t%0,[%1]\n\tlrw\t%R0,[%R1]";
1383	  else if (GET_CODE (memexp) == REG)
1384	    basereg = REGNO (memexp);
1385	  else if (GET_CODE (memexp) == PLUS)
1386	    {
1387	      if (GET_CODE (XEXP (memexp, 0)) == REG)
1388		basereg = REGNO (XEXP (memexp, 0));
1389	      else if (GET_CODE (XEXP (memexp, 1)) == REG)
1390		basereg = REGNO (XEXP (memexp, 1));
1391	      else
1392		gcc_unreachable ();
1393	    }
1394	  else
1395	    gcc_unreachable ();
1396
1397          /* ??? length attribute is wrong here.  */
1398	  if (dstreg == basereg)
1399	    {
1400	      /* Just load them in reverse order.  */
1401	      return "ldw\t%R0,%R1\n\tldw\t%0,%1";
1402
1403	      /* XXX: alternative: move basereg to basereg+1
1404	         and then fall through.  */
1405	    }
1406	  else
1407	    return "ldw\t%0,%1\n\tldw\t%R0,%R1";
1408	}
1409      else if (GET_CODE (src) == CONST_INT)
1410	{
1411	  if (TARGET_LITTLE_END)
1412	    {
1413	      if (CONST_OK_FOR_I (INTVAL (src)))
1414		output_asm_insn ("movi	%0,%1", operands);
1415	      else if (CONST_OK_FOR_M (INTVAL (src)))
1416		output_asm_insn ("bgeni	%0,%P1", operands);
1417	      else if (CONST_OK_FOR_N (INTVAL (src)))
1418		output_asm_insn ("bmaski	%0,%N1", operands);
1419	      else
1420		gcc_unreachable ();
1421
1422	      if (INTVAL (src) < 0)
1423		return "bmaski	%R0,32";
1424	      else
1425		return "movi	%R0,0";
1426	    }
1427	  else
1428	    {
1429	      if (CONST_OK_FOR_I (INTVAL (src)))
1430		output_asm_insn ("movi	%R0,%1", operands);
1431	      else if (CONST_OK_FOR_M (INTVAL (src)))
1432		output_asm_insn ("bgeni	%R0,%P1", operands);
1433	      else if (CONST_OK_FOR_N (INTVAL (src)))
1434		output_asm_insn ("bmaski	%R0,%N1", operands);
1435	      else
1436		gcc_unreachable ();
1437
1438	      if (INTVAL (src) < 0)
1439		return "bmaski	%0,32";
1440	      else
1441		return "movi	%0,0";
1442	    }
1443	}
1444      else
1445	gcc_unreachable ();
1446    }
1447  else if (GET_CODE (dst) == MEM && GET_CODE (src) == REG)
1448    return "stw\t%1,%0\n\tstw\t%R1,%R0";
1449  else
1450    gcc_unreachable ();
1451}
1452
1453/* Predicates used by the templates.  */
1454
1455int
1456mcore_arith_S_operand (rtx op)
1457{
1458  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (~INTVAL (op)))
1459    return 1;
1460
1461  return 0;
1462}
1463
1464/* Expand insert bit field.  BRC  */
1465
1466int
1467mcore_expand_insv (rtx operands[])
1468{
1469  int width = INTVAL (operands[1]);
1470  int posn = INTVAL (operands[2]);
1471  int mask;
1472  rtx mreg, sreg, ereg;
1473
1474  /* To get width 1 insv, the test in store_bit_field() (expmed.c, line 191)
1475     for width==1 must be removed.  Look around line 368.  This is something
1476     we really want the md part to do.  */
1477  if (width == 1 && GET_CODE (operands[3]) == CONST_INT)
1478    {
1479      /* Do directly with bseti or bclri.  */
1480      /* RBE: 2/97 consider only low bit of constant.  */
1481      if ((INTVAL (operands[3]) & 1) == 0)
1482	{
1483	  mask = ~(1 << posn);
1484	  emit_insn (gen_rtx_SET (SImode, operands[0],
1485			      gen_rtx_AND (SImode, operands[0], GEN_INT (mask))));
1486	}
1487      else
1488	{
1489	  mask = 1 << posn;
1490	  emit_insn (gen_rtx_SET (SImode, operands[0],
1491			    gen_rtx_IOR (SImode, operands[0], GEN_INT (mask))));
1492	}
1493
1494      return 1;
1495    }
1496
1497  /* Look at some bit-field placements that we aren't interested
1498     in handling ourselves, unless specifically directed to do so.  */
1499  if (! TARGET_W_FIELD)
1500    return 0;		/* Generally, give up about now.  */
1501
1502  if (width == 8 && posn % 8 == 0)
1503    /* Byte sized and aligned; let caller break it up.  */
1504    return 0;
1505
1506  if (width == 16 && posn % 16 == 0)
1507    /* Short sized and aligned; let caller break it up.  */
1508    return 0;
1509
1510  /* The general case - we can do this a little bit better than what the
1511     machine independent part tries.  This will get rid of all the subregs
1512     that mess up constant folding in combine when working with relaxed
1513     immediates.  */
1514
1515  /* If setting the entire field, do it directly.  */
1516  if (GET_CODE (operands[3]) == CONST_INT
1517      && INTVAL (operands[3]) == ((1 << width) - 1))
1518    {
1519      mreg = force_reg (SImode, GEN_INT (INTVAL (operands[3]) << posn));
1520      emit_insn (gen_rtx_SET (SImode, operands[0],
1521                         gen_rtx_IOR (SImode, operands[0], mreg)));
1522      return 1;
1523    }
1524
1525  /* Generate the clear mask.  */
1526  mreg = force_reg (SImode, GEN_INT (~(((1 << width) - 1) << posn)));
1527
1528  /* Clear the field, to overlay it later with the source.  */
1529  emit_insn (gen_rtx_SET (SImode, operands[0],
1530		      gen_rtx_AND (SImode, operands[0], mreg)));
1531
1532  /* If the source is constant 0, we've nothing to add back.  */
1533  if (GET_CODE (operands[3]) == CONST_INT && INTVAL (operands[3]) == 0)
1534    return 1;
1535
1536  /* XXX: Should we worry about more games with constant values?
1537     We've covered the high profile: set/clear single-bit and many-bit
1538     fields. How often do we see "arbitrary bit pattern" constants?  */
1539  sreg = copy_to_mode_reg (SImode, operands[3]);
1540
1541  /* Extract src as same width as dst (needed for signed values).  We
1542     always have to do this since we widen everything to SImode.
1543     We don't have to mask if we're shifting this up against the
1544     MSB of the register (e.g., the shift will push out any hi-order
1545     bits.  */
1546  if (width + posn != (int) GET_MODE_SIZE (SImode))
1547    {
1548      ereg = force_reg (SImode, GEN_INT ((1 << width) - 1));
1549      emit_insn (gen_rtx_SET (SImode, sreg,
1550                          gen_rtx_AND (SImode, sreg, ereg)));
1551    }
1552
1553  /* Insert source value in dest.  */
1554  if (posn != 0)
1555    emit_insn (gen_rtx_SET (SImode, sreg,
1556		        gen_rtx_ASHIFT (SImode, sreg, GEN_INT (posn))));
1557
1558  emit_insn (gen_rtx_SET (SImode, operands[0],
1559		      gen_rtx_IOR (SImode, operands[0], sreg)));
1560
1561  return 1;
1562}
1563
1564/* ??? Block move stuff stolen from m88k.  This code has not been
1565   verified for correctness.  */
1566
1567/* Emit code to perform a block move.  Choose the best method.
1568
1569   OPERANDS[0] is the destination.
1570   OPERANDS[1] is the source.
1571   OPERANDS[2] is the size.
1572   OPERANDS[3] is the alignment safe to use.  */
1573
1574/* Emit code to perform a block move with an offset sequence of ldw/st
1575   instructions (..., ldw 0, stw 1, ldw 1, stw 0, ...).  SIZE and ALIGN are
1576   known constants.  DEST and SRC are registers.  OFFSET is the known
1577   starting point for the output pattern.  */
1578
1579static const machine_mode mode_from_align[] =
1580{
1581  VOIDmode, QImode, HImode, VOIDmode, SImode,
1582};
1583
1584static void
1585block_move_sequence (rtx dst_mem, rtx src_mem, int size, int align)
1586{
1587  rtx temp[2];
1588  machine_mode mode[2];
1589  int amount[2];
1590  bool active[2];
1591  int phase = 0;
1592  int next;
1593  int offset_ld = 0;
1594  int offset_st = 0;
1595  rtx x;
1596
1597  x = XEXP (dst_mem, 0);
1598  if (!REG_P (x))
1599    {
1600      x = force_reg (Pmode, x);
1601      dst_mem = replace_equiv_address (dst_mem, x);
1602    }
1603
1604  x = XEXP (src_mem, 0);
1605  if (!REG_P (x))
1606    {
1607      x = force_reg (Pmode, x);
1608      src_mem = replace_equiv_address (src_mem, x);
1609    }
1610
1611  active[0] = active[1] = false;
1612
1613  do
1614    {
1615      next = phase;
1616      phase ^= 1;
1617
1618      if (size > 0)
1619	{
1620	  int next_amount;
1621
1622	  next_amount = (size >= 4 ? 4 : (size >= 2 ? 2 : 1));
1623	  next_amount = MIN (next_amount, align);
1624
1625	  amount[next] = next_amount;
1626	  mode[next] = mode_from_align[next_amount];
1627	  temp[next] = gen_reg_rtx (mode[next]);
1628
1629	  x = adjust_address (src_mem, mode[next], offset_ld);
1630	  emit_insn (gen_rtx_SET (VOIDmode, temp[next], x));
1631
1632	  offset_ld += next_amount;
1633	  size -= next_amount;
1634	  active[next] = true;
1635	}
1636
1637      if (active[phase])
1638	{
1639	  active[phase] = false;
1640
1641	  x = adjust_address (dst_mem, mode[phase], offset_st);
1642	  emit_insn (gen_rtx_SET (VOIDmode, x, temp[phase]));
1643
1644	  offset_st += amount[phase];
1645	}
1646    }
1647  while (active[next]);
1648}
1649
1650bool
1651mcore_expand_block_move (rtx *operands)
1652{
1653  HOST_WIDE_INT align, bytes, max;
1654
1655  if (GET_CODE (operands[2]) != CONST_INT)
1656    return false;
1657
1658  bytes = INTVAL (operands[2]);
1659  align = INTVAL (operands[3]);
1660
1661  if (bytes <= 0)
1662    return false;
1663  if (align > 4)
1664    align = 4;
1665
1666  switch (align)
1667    {
1668    case 4:
1669      if (bytes & 1)
1670	max = 4*4;
1671      else if (bytes & 3)
1672	max = 8*4;
1673      else
1674	max = 16*4;
1675      break;
1676    case 2:
1677      max = 4*2;
1678      break;
1679    case 1:
1680      max = 4*1;
1681      break;
1682    default:
1683      gcc_unreachable ();
1684    }
1685
1686  if (bytes <= max)
1687    {
1688      block_move_sequence (operands[0], operands[1], bytes, align);
1689      return true;
1690    }
1691
1692  return false;
1693}
1694
1695
1696/* Code to generate prologue and epilogue sequences.  */
1697static int number_of_regs_before_varargs;
1698
1699/* Set by TARGET_SETUP_INCOMING_VARARGS to indicate to prolog that this is
1700   for a varargs function.  */
1701static int current_function_anonymous_args;
1702
1703#define	STACK_BYTES (STACK_BOUNDARY/BITS_PER_UNIT)
1704#define	STORE_REACH (64)	/* Maximum displace of word store + 4.  */
1705#define	ADDI_REACH (32)		/* Maximum addi operand.  */
1706
1707static void
1708layout_mcore_frame (struct mcore_frame * infp)
1709{
1710  int n;
1711  unsigned int i;
1712  int nbytes;
1713  int regarg;
1714  int localregarg;
1715  int outbounds;
1716  unsigned int growths;
1717  int step;
1718
1719  /* Might have to spill bytes to re-assemble a big argument that
1720     was passed partially in registers and partially on the stack.  */
1721  nbytes = crtl->args.pretend_args_size;
1722
1723  /* Determine how much space for spilled anonymous args (e.g., stdarg).  */
1724  if (current_function_anonymous_args)
1725    nbytes += (NPARM_REGS - number_of_regs_before_varargs) * UNITS_PER_WORD;
1726
1727  infp->arg_size = nbytes;
1728
1729  /* How much space to save non-volatile registers we stomp.  */
1730  infp->reg_mask = calc_live_regs (& n);
1731  infp->reg_size = n * 4;
1732
1733  /* And the rest of it... locals and space for overflowed outbounds.  */
1734  infp->local_size = get_frame_size ();
1735  infp->outbound_size = crtl->outgoing_args_size;
1736
1737  /* Make sure we have a whole number of words for the locals.  */
1738  if (infp->local_size % STACK_BYTES)
1739    infp->local_size = (infp->local_size + STACK_BYTES - 1) & ~ (STACK_BYTES -1);
1740
1741  /* Only thing we know we have to pad is the outbound space, since
1742     we've aligned our locals assuming that base of locals is aligned.  */
1743  infp->pad_local = 0;
1744  infp->pad_reg = 0;
1745  infp->pad_outbound = 0;
1746  if (infp->outbound_size % STACK_BYTES)
1747    infp->pad_outbound = STACK_BYTES - (infp->outbound_size % STACK_BYTES);
1748
1749  /* Now we see how we want to stage the prologue so that it does
1750     the most appropriate stack growth and register saves to either:
1751     (1) run fast,
1752     (2) reduce instruction space, or
1753     (3) reduce stack space.  */
1754  for (i = 0; i < ARRAY_SIZE (infp->growth); i++)
1755    infp->growth[i] = 0;
1756
1757  regarg      = infp->reg_size + infp->arg_size;
1758  localregarg = infp->local_size + regarg;
1759  outbounds   = infp->outbound_size + infp->pad_outbound;
1760  growths     = 0;
1761
1762  /* XXX: Consider one where we consider localregarg + outbound too! */
1763
1764  /* Frame of <= 32 bytes and using stm would get <= 2 registers.
1765     use stw's with offsets and buy the frame in one shot.  */
1766  if (localregarg <= ADDI_REACH
1767      && (infp->reg_size <= 8 || (infp->reg_mask & 0xc000) != 0xc000))
1768    {
1769      /* Make sure we'll be aligned.  */
1770      if (localregarg % STACK_BYTES)
1771	infp->pad_reg = STACK_BYTES - (localregarg % STACK_BYTES);
1772
1773      step = localregarg + infp->pad_reg;
1774      infp->reg_offset = infp->local_size;
1775
1776      if (outbounds + step <= ADDI_REACH && !frame_pointer_needed)
1777	{
1778	  step += outbounds;
1779	  infp->reg_offset += outbounds;
1780	  outbounds = 0;
1781	}
1782
1783      infp->arg_offset = step - 4;
1784      infp->growth[growths++] = step;
1785      infp->reg_growth = growths;
1786      infp->local_growth = growths;
1787
1788      /* If we haven't already folded it in.  */
1789      if (outbounds)
1790	infp->growth[growths++] = outbounds;
1791
1792      goto finish;
1793    }
1794
1795  /* Frame can't be done with a single subi, but can be done with 2
1796     insns.  If the 'stm' is getting <= 2 registers, we use stw's and
1797     shift some of the stack purchase into the first subi, so both are
1798     single instructions.  */
1799  if (localregarg <= STORE_REACH
1800      && (infp->local_size > ADDI_REACH)
1801      && (infp->reg_size <= 8 || (infp->reg_mask & 0xc000) != 0xc000))
1802    {
1803      int all;
1804
1805      /* Make sure we'll be aligned; use either pad_reg or pad_local.  */
1806      if (localregarg % STACK_BYTES)
1807	infp->pad_reg = STACK_BYTES - (localregarg % STACK_BYTES);
1808
1809      all = localregarg + infp->pad_reg + infp->pad_local;
1810      step = ADDI_REACH;	/* As much up front as we can.  */
1811      if (step > all)
1812	step = all;
1813
1814      /* XXX: Consider whether step will still be aligned; we believe so.  */
1815      infp->arg_offset = step - 4;
1816      infp->growth[growths++] = step;
1817      infp->reg_growth = growths;
1818      infp->reg_offset = step - infp->pad_reg - infp->reg_size;
1819      all -= step;
1820
1821      /* Can we fold in any space required for outbounds?  */
1822      if (outbounds + all <= ADDI_REACH && !frame_pointer_needed)
1823	{
1824	  all += outbounds;
1825	  outbounds = 0;
1826	}
1827
1828      /* Get the rest of the locals in place.  */
1829      step = all;
1830      infp->growth[growths++] = step;
1831      infp->local_growth = growths;
1832      all -= step;
1833
1834      gcc_assert (all == 0);
1835
1836      /* Finish off if we need to do so.  */
1837      if (outbounds)
1838	infp->growth[growths++] = outbounds;
1839
1840      goto finish;
1841    }
1842
1843  /* Registers + args is nicely aligned, so we'll buy that in one shot.
1844     Then we buy the rest of the frame in 1 or 2 steps depending on
1845     whether we need a frame pointer.  */
1846  if ((regarg % STACK_BYTES) == 0)
1847    {
1848      infp->growth[growths++] = regarg;
1849      infp->reg_growth = growths;
1850      infp->arg_offset = regarg - 4;
1851      infp->reg_offset = 0;
1852
1853      if (infp->local_size % STACK_BYTES)
1854	infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES);
1855
1856      step = infp->local_size + infp->pad_local;
1857
1858      if (!frame_pointer_needed)
1859	{
1860	  step += outbounds;
1861	  outbounds = 0;
1862	}
1863
1864      infp->growth[growths++] = step;
1865      infp->local_growth = growths;
1866
1867      /* If there's any left to be done.  */
1868      if (outbounds)
1869	infp->growth[growths++] = outbounds;
1870
1871      goto finish;
1872    }
1873
1874  /* XXX: optimizations that we'll want to play with....
1875     -- regarg is not aligned, but it's a small number of registers;
1876    	use some of localsize so that regarg is aligned and then
1877    	save the registers.  */
1878
1879  /* Simple encoding; plods down the stack buying the pieces as it goes.
1880     -- does not optimize space consumption.
1881     -- does not attempt to optimize instruction counts.
1882     -- but it is safe for all alignments.  */
1883  if (regarg % STACK_BYTES != 0)
1884    infp->pad_reg = STACK_BYTES - (regarg % STACK_BYTES);
1885
1886  infp->growth[growths++] = infp->arg_size + infp->reg_size + infp->pad_reg;
1887  infp->reg_growth = growths;
1888  infp->arg_offset = infp->growth[0] - 4;
1889  infp->reg_offset = 0;
1890
1891  if (frame_pointer_needed)
1892    {
1893      if (infp->local_size % STACK_BYTES != 0)
1894	infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES);
1895
1896      infp->growth[growths++] = infp->local_size + infp->pad_local;
1897      infp->local_growth = growths;
1898
1899      infp->growth[growths++] = outbounds;
1900    }
1901  else
1902    {
1903      if ((infp->local_size + outbounds) % STACK_BYTES != 0)
1904	infp->pad_local = STACK_BYTES - ((infp->local_size + outbounds) % STACK_BYTES);
1905
1906      infp->growth[growths++] = infp->local_size + infp->pad_local + outbounds;
1907      infp->local_growth = growths;
1908    }
1909
1910  /* Anything else that we've forgotten?, plus a few consistency checks.  */
1911 finish:
1912  gcc_assert (infp->reg_offset >= 0);
1913  gcc_assert (growths <= MAX_STACK_GROWS);
1914
1915  for (i = 0; i < growths; i++)
1916    gcc_assert (!(infp->growth[i] % STACK_BYTES));
1917}
1918
1919/* Define the offset between two registers, one to be eliminated, and
1920   the other its replacement, at the start of a routine.  */
1921
1922int
1923mcore_initial_elimination_offset (int from, int to)
1924{
1925  int above_frame;
1926  int below_frame;
1927  struct mcore_frame fi;
1928
1929  layout_mcore_frame (& fi);
1930
1931  /* fp to ap */
1932  above_frame = fi.local_size + fi.pad_local + fi.reg_size + fi.pad_reg;
1933  /* sp to fp */
1934  below_frame = fi.outbound_size + fi.pad_outbound;
1935
1936  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
1937    return above_frame;
1938
1939  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
1940    return above_frame + below_frame;
1941
1942  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
1943    return below_frame;
1944
1945  gcc_unreachable ();
1946}
1947
1948/* Keep track of some information about varargs for the prolog.  */
1949
1950static void
1951mcore_setup_incoming_varargs (cumulative_args_t args_so_far_v,
1952			      machine_mode mode, tree type,
1953			      int * ptr_pretend_size ATTRIBUTE_UNUSED,
1954			      int second_time ATTRIBUTE_UNUSED)
1955{
1956  CUMULATIVE_ARGS *args_so_far = get_cumulative_args (args_so_far_v);
1957
1958  current_function_anonymous_args = 1;
1959
1960  /* We need to know how many argument registers are used before
1961     the varargs start, so that we can push the remaining argument
1962     registers during the prologue.  */
1963  number_of_regs_before_varargs = *args_so_far + mcore_num_arg_regs (mode, type);
1964
1965  /* There is a bug somewhere in the arg handling code.
1966     Until I can find it this workaround always pushes the
1967     last named argument onto the stack.  */
1968  number_of_regs_before_varargs = *args_so_far;
1969
1970  /* The last named argument may be split between argument registers
1971     and the stack.  Allow for this here.  */
1972  if (number_of_regs_before_varargs > NPARM_REGS)
1973    number_of_regs_before_varargs = NPARM_REGS;
1974}
1975
1976void
1977mcore_expand_prolog (void)
1978{
1979  struct mcore_frame fi;
1980  int space_allocated = 0;
1981  int growth = 0;
1982
1983  /* Find out what we're doing.  */
1984  layout_mcore_frame (&fi);
1985
1986  space_allocated = fi.arg_size + fi.reg_size + fi.local_size +
1987    fi.outbound_size + fi.pad_outbound + fi.pad_local + fi.pad_reg;
1988
1989  if (TARGET_CG_DATA)
1990    {
1991      /* Emit a symbol for this routine's frame size.  */
1992      rtx x;
1993
1994      x = DECL_RTL (current_function_decl);
1995
1996      gcc_assert (GET_CODE (x) == MEM);
1997
1998      x = XEXP (x, 0);
1999
2000      gcc_assert (GET_CODE (x) == SYMBOL_REF);
2001
2002      free (mcore_current_function_name);
2003
2004      mcore_current_function_name = xstrdup (XSTR (x, 0));
2005
2006      ASM_OUTPUT_CG_NODE (asm_out_file, mcore_current_function_name, space_allocated);
2007
2008      if (cfun->calls_alloca)
2009	ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name, "alloca", 1);
2010
2011      /* 970425: RBE:
2012         We're looking at how the 8byte alignment affects stack layout
2013         and where we had to pad things. This emits information we can
2014         extract which tells us about frame sizes and the like.  */
2015      fprintf (asm_out_file,
2016	       "\t.equ\t__$frame$info$_%s_$_%d_%d_x%x_%d_%d_%d,0\n",
2017	       mcore_current_function_name,
2018	       fi.arg_size, fi.reg_size, fi.reg_mask,
2019	       fi.local_size, fi.outbound_size,
2020	       frame_pointer_needed);
2021    }
2022
2023  if (mcore_naked_function_p ())
2024    return;
2025
2026  /* Handle stdarg+regsaves in one shot: can't be more than 64 bytes.  */
2027  output_stack_adjust (-1, fi.growth[growth++]);	/* Grows it.  */
2028
2029  /* If we have a parameter passed partially in regs and partially in memory,
2030     the registers will have been stored to memory already in function.c.  So
2031     we only need to do something here for varargs functions.  */
2032  if (fi.arg_size != 0 && crtl->args.pretend_args_size == 0)
2033    {
2034      int offset;
2035      int rn = FIRST_PARM_REG + NPARM_REGS - 1;
2036      int remaining = fi.arg_size;
2037
2038      for (offset = fi.arg_offset; remaining >= 4; offset -= 4, rn--, remaining -= 4)
2039        {
2040          emit_insn (gen_movsi
2041                     (gen_rtx_MEM (SImode,
2042				   plus_constant (Pmode, stack_pointer_rtx,
2043						  offset)),
2044                      gen_rtx_REG (SImode, rn)));
2045        }
2046    }
2047
2048  /* Do we need another stack adjustment before we do the register saves?  */
2049  if (growth < fi.reg_growth)
2050    output_stack_adjust (-1, fi.growth[growth++]);		/* Grows it.  */
2051
2052  if (fi.reg_size != 0)
2053    {
2054      int i;
2055      int offs = fi.reg_offset;
2056
2057      for (i = 15; i >= 0; i--)
2058        {
2059          if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000))
2060	    {
2061	      int first_reg = 15;
2062
2063	      while (fi.reg_mask & (1 << first_reg))
2064	        first_reg--;
2065	      first_reg++;
2066
2067	      emit_insn (gen_store_multiple (gen_rtx_MEM (SImode, stack_pointer_rtx),
2068					     gen_rtx_REG (SImode, first_reg),
2069					     GEN_INT (16 - first_reg)));
2070
2071	      i -= (15 - first_reg);
2072	      offs += (16 - first_reg) * 4;
2073	    }
2074          else if (fi.reg_mask & (1 << i))
2075	    {
2076	      emit_insn (gen_movsi
2077		         (gen_rtx_MEM (SImode,
2078				       plus_constant (Pmode, stack_pointer_rtx,
2079						      offs)),
2080		          gen_rtx_REG (SImode, i)));
2081	      offs += 4;
2082	    }
2083        }
2084    }
2085
2086  /* Figure the locals + outbounds.  */
2087  if (frame_pointer_needed)
2088    {
2089      /* If we haven't already purchased to 'fp'.  */
2090      if (growth < fi.local_growth)
2091        output_stack_adjust (-1, fi.growth[growth++]);		/* Grows it.  */
2092
2093      emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
2094
2095      /* ... and then go any remaining distance for outbounds, etc.  */
2096      if (fi.growth[growth])
2097        output_stack_adjust (-1, fi.growth[growth++]);
2098    }
2099  else
2100    {
2101      if (growth < fi.local_growth)
2102        output_stack_adjust (-1, fi.growth[growth++]);		/* Grows it.  */
2103      if (fi.growth[growth])
2104        output_stack_adjust (-1, fi.growth[growth++]);
2105    }
2106}
2107
2108void
2109mcore_expand_epilog (void)
2110{
2111  struct mcore_frame fi;
2112  int i;
2113  int offs;
2114  int growth = MAX_STACK_GROWS - 1 ;
2115
2116
2117  /* Find out what we're doing.  */
2118  layout_mcore_frame(&fi);
2119
2120  if (mcore_naked_function_p ())
2121    return;
2122
2123  /* If we had a frame pointer, restore the sp from that.  */
2124  if (frame_pointer_needed)
2125    {
2126      emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
2127      growth = fi.local_growth - 1;
2128    }
2129  else
2130    {
2131      /* XXX: while loop should accumulate and do a single sell.  */
2132      while (growth >= fi.local_growth)
2133        {
2134          if (fi.growth[growth] != 0)
2135            output_stack_adjust (1, fi.growth[growth]);
2136	  growth--;
2137        }
2138    }
2139
2140  /* Make sure we've shrunk stack back to the point where the registers
2141     were laid down. This is typically 0/1 iterations.  Then pull the
2142     register save information back off the stack.  */
2143  while (growth >= fi.reg_growth)
2144    output_stack_adjust ( 1, fi.growth[growth--]);
2145
2146  offs = fi.reg_offset;
2147
2148  for (i = 15; i >= 0; i--)
2149    {
2150      if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000))
2151	{
2152	  int first_reg;
2153
2154	  /* Find the starting register.  */
2155	  first_reg = 15;
2156
2157	  while (fi.reg_mask & (1 << first_reg))
2158	    first_reg--;
2159
2160	  first_reg++;
2161
2162	  emit_insn (gen_load_multiple (gen_rtx_REG (SImode, first_reg),
2163					gen_rtx_MEM (SImode, stack_pointer_rtx),
2164					GEN_INT (16 - first_reg)));
2165
2166	  i -= (15 - first_reg);
2167	  offs += (16 - first_reg) * 4;
2168	}
2169      else if (fi.reg_mask & (1 << i))
2170	{
2171	  emit_insn (gen_movsi
2172		     (gen_rtx_REG (SImode, i),
2173		      gen_rtx_MEM (SImode,
2174				   plus_constant (Pmode, stack_pointer_rtx,
2175						  offs))));
2176	  offs += 4;
2177	}
2178    }
2179
2180  /* Give back anything else.  */
2181  /* XXX: Should accumulate total and then give it back.  */
2182  while (growth >= 0)
2183    output_stack_adjust ( 1, fi.growth[growth--]);
2184}
2185
2186/* This code is borrowed from the SH port.  */
2187
2188/* The MCORE cannot load a large constant into a register, constants have to
2189   come from a pc relative load.  The reference of a pc relative load
2190   instruction must be less than 1k in front of the instruction.  This
2191   means that we often have to dump a constant inside a function, and
2192   generate code to branch around it.
2193
2194   It is important to minimize this, since the branches will slow things
2195   down and make things bigger.
2196
2197   Worst case code looks like:
2198
2199   lrw   L1,r0
2200   br    L2
2201   align
2202   L1:   .long value
2203   L2:
2204   ..
2205
2206   lrw   L3,r0
2207   br    L4
2208   align
2209   L3:   .long value
2210   L4:
2211   ..
2212
2213   We fix this by performing a scan before scheduling, which notices which
2214   instructions need to have their operands fetched from the constant table
2215   and builds the table.
2216
2217   The algorithm is:
2218
2219   scan, find an instruction which needs a pcrel move.  Look forward, find the
2220   last barrier which is within MAX_COUNT bytes of the requirement.
2221   If there isn't one, make one.  Process all the instructions between
2222   the find and the barrier.
2223
2224   In the above example, we can tell that L3 is within 1k of L1, so
2225   the first move can be shrunk from the 2 insn+constant sequence into
2226   just 1 insn, and the constant moved to L3 to make:
2227
2228   lrw          L1,r0
2229   ..
2230   lrw          L3,r0
2231   bra          L4
2232   align
2233   L3:.long value
2234   L4:.long value
2235
2236   Then the second move becomes the target for the shortening process.  */
2237
2238typedef struct
2239{
2240  rtx value;			/* Value in table.  */
2241  rtx label;			/* Label of value.  */
2242} pool_node;
2243
2244/* The maximum number of constants that can fit into one pool, since
2245   the pc relative range is 0...1020 bytes and constants are at least 4
2246   bytes long.  We subtract 4 from the range to allow for the case where
2247   we need to add a branch/align before the constant pool.  */
2248
2249#define MAX_COUNT 1016
2250#define MAX_POOL_SIZE (MAX_COUNT/4)
2251static pool_node pool_vector[MAX_POOL_SIZE];
2252static int pool_size;
2253
2254/* Dump out any constants accumulated in the final pass.  These
2255   will only be labels.  */
2256
2257const char *
2258mcore_output_jump_label_table (void)
2259{
2260  int i;
2261
2262  if (pool_size)
2263    {
2264      fprintf (asm_out_file, "\t.align 2\n");
2265
2266      for (i = 0; i < pool_size; i++)
2267	{
2268	  pool_node * p = pool_vector + i;
2269
2270	  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (p->label));
2271
2272	  output_asm_insn (".long	%0", &p->value);
2273	}
2274
2275      pool_size = 0;
2276    }
2277
2278  return "";
2279}
2280
2281/* Check whether insn is a candidate for a conditional.  */
2282
2283static cond_type
2284is_cond_candidate (rtx insn)
2285{
2286  /* The only things we conditionalize are those that can be directly
2287     changed into a conditional.  Only bother with SImode items.  If
2288     we wanted to be a little more aggressive, we could also do other
2289     modes such as DImode with reg-reg move or load 0.  */
2290  if (NONJUMP_INSN_P (insn))
2291    {
2292      rtx pat = PATTERN (insn);
2293      rtx src, dst;
2294
2295      if (GET_CODE (pat) != SET)
2296	return COND_NO;
2297
2298      dst = XEXP (pat, 0);
2299
2300      if ((GET_CODE (dst) != REG &&
2301           GET_CODE (dst) != SUBREG) ||
2302	  GET_MODE (dst) != SImode)
2303	return COND_NO;
2304
2305      src = XEXP (pat, 1);
2306
2307      if ((GET_CODE (src) == REG ||
2308           (GET_CODE (src) == SUBREG &&
2309	    GET_CODE (SUBREG_REG (src)) == REG)) &&
2310	  GET_MODE (src) == SImode)
2311	return COND_MOV_INSN;
2312      else if (GET_CODE (src) == CONST_INT &&
2313               INTVAL (src) == 0)
2314	return COND_CLR_INSN;
2315      else if (GET_CODE (src) == PLUS &&
2316               (GET_CODE (XEXP (src, 0)) == REG ||
2317                (GET_CODE (XEXP (src, 0)) == SUBREG &&
2318                 GET_CODE (SUBREG_REG (XEXP (src, 0))) == REG)) &&
2319               GET_MODE (XEXP (src, 0)) == SImode &&
2320               GET_CODE (XEXP (src, 1)) == CONST_INT &&
2321               INTVAL (XEXP (src, 1)) == 1)
2322	return COND_INC_INSN;
2323      else if (((GET_CODE (src) == MINUS &&
2324		 GET_CODE (XEXP (src, 1)) == CONST_INT &&
2325		 INTVAL( XEXP (src, 1)) == 1) ||
2326                (GET_CODE (src) == PLUS &&
2327		 GET_CODE (XEXP (src, 1)) == CONST_INT &&
2328		 INTVAL (XEXP (src, 1)) == -1)) &&
2329               (GET_CODE (XEXP (src, 0)) == REG ||
2330		(GET_CODE (XEXP (src, 0)) == SUBREG &&
2331		 GET_CODE (SUBREG_REG (XEXP (src, 0))) == REG)) &&
2332               GET_MODE (XEXP (src, 0)) == SImode)
2333	return COND_DEC_INSN;
2334
2335      /* Some insns that we don't bother with:
2336	 (set (rx:DI) (ry:DI))
2337	 (set (rx:DI) (const_int 0))
2338      */
2339
2340    }
2341  else if (JUMP_P (insn)
2342	   && GET_CODE (PATTERN (insn)) == SET
2343	   && GET_CODE (XEXP (PATTERN (insn), 1)) == LABEL_REF)
2344    return COND_BRANCH_INSN;
2345
2346  return COND_NO;
2347}
2348
2349/* Emit a conditional version of insn and replace the old insn with the
2350   new one.  Return the new insn if emitted.  */
2351
2352static rtx_insn *
2353emit_new_cond_insn (rtx insn, int cond)
2354{
2355  rtx c_insn = 0;
2356  rtx pat, dst, src;
2357  cond_type num;
2358
2359  if ((num = is_cond_candidate (insn)) == COND_NO)
2360    return NULL;
2361
2362  pat = PATTERN (insn);
2363
2364  if (NONJUMP_INSN_P (insn))
2365    {
2366      dst = SET_DEST (pat);
2367      src = SET_SRC (pat);
2368    }
2369  else
2370    {
2371      dst = JUMP_LABEL (insn);
2372      src = NULL_RTX;
2373    }
2374
2375  switch (num)
2376    {
2377    case COND_MOV_INSN:
2378    case COND_CLR_INSN:
2379      if (cond)
2380	c_insn = gen_movt0 (dst, src, dst);
2381      else
2382	c_insn = gen_movt0 (dst, dst, src);
2383      break;
2384
2385    case COND_INC_INSN:
2386      if (cond)
2387	c_insn = gen_incscc (dst, dst);
2388      else
2389	c_insn = gen_incscc_false (dst, dst);
2390      break;
2391
2392    case COND_DEC_INSN:
2393      if (cond)
2394	c_insn = gen_decscc (dst, dst);
2395      else
2396	c_insn = gen_decscc_false (dst, dst);
2397      break;
2398
2399    case COND_BRANCH_INSN:
2400      if (cond)
2401	c_insn = gen_branch_true (dst);
2402      else
2403	c_insn = gen_branch_false (dst);
2404      break;
2405
2406    default:
2407      return NULL;
2408    }
2409
2410  /* Only copy the notes if they exist.  */
2411  if (rtx_length [GET_CODE (c_insn)] >= 7 && rtx_length [GET_CODE (insn)] >= 7)
2412    {
2413      /* We really don't need to bother with the notes and links at this
2414	 point, but go ahead and save the notes.  This will help is_dead()
2415	 when applying peepholes (links don't matter since they are not
2416	 used any more beyond this point for the mcore).  */
2417      REG_NOTES (c_insn) = REG_NOTES (insn);
2418    }
2419
2420  if (num == COND_BRANCH_INSN)
2421    {
2422      /* For jumps, we need to be a little bit careful and emit the new jump
2423         before the old one and to update the use count for the target label.
2424         This way, the barrier following the old (uncond) jump will get
2425	 deleted, but the label won't.  */
2426      c_insn = emit_jump_insn_before (c_insn, insn);
2427
2428      ++ LABEL_NUSES (dst);
2429
2430      JUMP_LABEL (c_insn) = dst;
2431    }
2432  else
2433    c_insn = emit_insn_after (c_insn, insn);
2434
2435  delete_insn (insn);
2436
2437  return as_a <rtx_insn *> (c_insn);
2438}
2439
2440/* Attempt to change a basic block into a series of conditional insns.  This
2441   works by taking the branch at the end of the 1st block and scanning for the
2442   end of the 2nd block.  If all instructions in the 2nd block have cond.
2443   versions and the label at the start of block 3 is the same as the target
2444   from the branch at block 1, then conditionalize all insn in block 2 using
2445   the inverse condition of the branch at block 1.  (Note I'm bending the
2446   definition of basic block here.)
2447
2448   e.g., change:
2449
2450		bt	L2             <-- end of block 1 (delete)
2451		mov	r7,r8
2452		addu	r7,1
2453		br	L3             <-- end of block 2
2454
2455	L2:	...                    <-- start of block 3 (NUSES==1)
2456	L3:	...
2457
2458   to:
2459
2460		movf	r7,r8
2461		incf	r7
2462		bf	L3
2463
2464	L3:	...
2465
2466   we can delete the L2 label if NUSES==1 and re-apply the optimization
2467   starting at the last instruction of block 2.  This may allow an entire
2468   if-then-else statement to be conditionalized.  BRC  */
2469static rtx_insn *
2470conditionalize_block (rtx_insn *first)
2471{
2472  rtx_insn *insn;
2473  rtx br_pat;
2474  rtx_insn *end_blk_1_br = 0;
2475  rtx_insn *end_blk_2_insn = 0;
2476  rtx_insn *start_blk_3_lab = 0;
2477  int cond;
2478  int br_lab_num;
2479  int blk_size = 0;
2480
2481
2482  /* Check that the first insn is a candidate conditional jump.  This is
2483     the one that we'll eliminate.  If not, advance to the next insn to
2484     try.  */
2485  if (! JUMP_P (first)
2486      || GET_CODE (PATTERN (first)) != SET
2487      || GET_CODE (XEXP (PATTERN (first), 1)) != IF_THEN_ELSE)
2488    return NEXT_INSN (first);
2489
2490  /* Extract some information we need.  */
2491  end_blk_1_br = first;
2492  br_pat = PATTERN (end_blk_1_br);
2493
2494  /* Complement the condition since we use the reverse cond. for the insns.  */
2495  cond = (GET_CODE (XEXP (XEXP (br_pat, 1), 0)) == EQ);
2496
2497  /* Determine what kind of branch we have.  */
2498  if (GET_CODE (XEXP (XEXP (br_pat, 1), 1)) == LABEL_REF)
2499    {
2500      /* A normal branch, so extract label out of first arm.  */
2501      br_lab_num = CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (br_pat, 1), 1), 0));
2502    }
2503  else
2504    {
2505      /* An inverse branch, so extract the label out of the 2nd arm
2506	 and complement the condition.  */
2507      cond = (cond == 0);
2508      br_lab_num = CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (br_pat, 1), 2), 0));
2509    }
2510
2511  /* Scan forward for the start of block 2: it must start with a
2512     label and that label must be the same as the branch target
2513     label from block 1.  We don't care about whether block 2 actually
2514     ends with a branch or a label (an uncond. branch is
2515     conditionalizable).  */
2516  for (insn = NEXT_INSN (first); insn; insn = NEXT_INSN (insn))
2517    {
2518      enum rtx_code code;
2519
2520      code = GET_CODE (insn);
2521
2522      /* Look for the label at the start of block 3.  */
2523      if (code == CODE_LABEL && CODE_LABEL_NUMBER (insn) == br_lab_num)
2524	break;
2525
2526      /* Skip barriers, notes, and conditionalizable insns.  If the
2527         insn is not conditionalizable or makes this optimization fail,
2528         just return the next insn so we can start over from that point.  */
2529      if (code != BARRIER && code != NOTE && !is_cond_candidate (insn))
2530	return NEXT_INSN (insn);
2531
2532      /* Remember the last real insn before the label (i.e. end of block 2).  */
2533      if (code == JUMP_INSN || code == INSN)
2534	{
2535	  blk_size ++;
2536	  end_blk_2_insn = insn;
2537	}
2538    }
2539
2540  if (!insn)
2541    return insn;
2542
2543  /* It is possible for this optimization to slow performance if the blocks
2544     are long.  This really depends upon whether the branch is likely taken
2545     or not.  If the branch is taken, we slow performance in many cases.  But,
2546     if the branch is not taken, we always help performance (for a single
2547     block, but for a double block (i.e. when the optimization is re-applied)
2548     this is not true since the 'right thing' depends on the overall length of
2549     the collapsed block).  As a compromise, don't apply this optimization on
2550     blocks larger than size 2 (unlikely for the mcore) when speed is important.
2551     the best threshold depends on the latencies of the instructions (i.e.,
2552     the branch penalty).  */
2553  if (optimize > 1 && blk_size > 2)
2554    return insn;
2555
2556  /* At this point, we've found the start of block 3 and we know that
2557     it is the destination of the branch from block 1.   Also, all
2558     instructions in the block 2 are conditionalizable.  So, apply the
2559     conditionalization and delete the branch.  */
2560  start_blk_3_lab = insn;
2561
2562  for (insn = NEXT_INSN (end_blk_1_br); insn != start_blk_3_lab;
2563       insn = NEXT_INSN (insn))
2564    {
2565      rtx_insn *newinsn;
2566
2567      if (insn->deleted ())
2568	continue;
2569
2570      /* Try to form a conditional variant of the instruction and emit it.  */
2571      if ((newinsn = emit_new_cond_insn (insn, cond)))
2572	{
2573	  if (end_blk_2_insn == insn)
2574            end_blk_2_insn = newinsn;
2575
2576	  insn = newinsn;
2577	}
2578    }
2579
2580  /* Note whether we will delete the label starting blk 3 when the jump
2581     gets deleted.  If so, we want to re-apply this optimization at the
2582     last real instruction right before the label.  */
2583  if (LABEL_NUSES (start_blk_3_lab) == 1)
2584    {
2585      start_blk_3_lab = 0;
2586    }
2587
2588  /* ??? we probably should redistribute the death notes for this insn, esp.
2589     the death of cc, but it doesn't really matter this late in the game.
2590     The peepholes all use is_dead() which will find the correct death
2591     regardless of whether there is a note.  */
2592  delete_insn (end_blk_1_br);
2593
2594  if (! start_blk_3_lab)
2595    return end_blk_2_insn;
2596
2597  /* Return the insn right after the label at the start of block 3.  */
2598  return NEXT_INSN (start_blk_3_lab);
2599}
2600
2601/* Apply the conditionalization of blocks optimization.  This is the
2602   outer loop that traverses through the insns scanning for a branch
2603   that signifies an opportunity to apply the optimization.  Note that
2604   this optimization is applied late.  If we could apply it earlier,
2605   say before cse 2, it may expose more optimization opportunities.
2606   but, the pay back probably isn't really worth the effort (we'd have
2607   to update all reg/flow/notes/links/etc to make it work - and stick it
2608   in before cse 2).  */
2609
2610static void
2611conditionalize_optimization (void)
2612{
2613  rtx_insn *insn;
2614
2615  for (insn = get_insns (); insn; insn = conditionalize_block (insn))
2616    continue;
2617}
2618
2619/* This is to handle loads from the constant pool.  */
2620
2621static void
2622mcore_reorg (void)
2623{
2624  /* Reset this variable.  */
2625  current_function_anonymous_args = 0;
2626
2627  if (optimize == 0)
2628    return;
2629
2630  /* Conditionalize blocks where we can.  */
2631  conditionalize_optimization ();
2632
2633  /* Literal pool generation is now pushed off until the assembler.  */
2634}
2635
2636
2637/* Return true if X is something that can be moved directly into r15.  */
2638
2639bool
2640mcore_r15_operand_p (rtx x)
2641{
2642  switch (GET_CODE (x))
2643    {
2644    case CONST_INT:
2645      return mcore_const_ok_for_inline (INTVAL (x));
2646
2647    case REG:
2648    case SUBREG:
2649    case MEM:
2650      return 1;
2651
2652    default:
2653      return 0;
2654    }
2655}
2656
2657/* Implement SECONDARY_RELOAD_CLASS.  If RCLASS contains r15, and we can't
2658   directly move X into it, use r1-r14 as a temporary.  */
2659
2660enum reg_class
2661mcore_secondary_reload_class (enum reg_class rclass,
2662			      machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2663{
2664  if (TEST_HARD_REG_BIT (reg_class_contents[rclass], 15)
2665      && !mcore_r15_operand_p (x))
2666    return LRW_REGS;
2667  return NO_REGS;
2668}
2669
2670/* Return the reg_class to use when reloading the rtx X into the class
2671   RCLASS.  If X is too complex to move directly into r15, prefer to
2672   use LRW_REGS instead.  */
2673
2674enum reg_class
2675mcore_reload_class (rtx x, enum reg_class rclass)
2676{
2677  if (reg_class_subset_p (LRW_REGS, rclass) && !mcore_r15_operand_p (x))
2678    return LRW_REGS;
2679
2680  return rclass;
2681}
2682
2683/* Tell me if a pair of reg/subreg rtx's actually refer to the same
2684   register.  Note that the current version doesn't worry about whether
2685   they are the same mode or note (e.g., a QImode in r2 matches an HImode
2686   in r2 matches an SImode in r2. Might think in the future about whether
2687   we want to be able to say something about modes.  */
2688
2689int
2690mcore_is_same_reg (rtx x, rtx y)
2691{
2692  /* Strip any and all of the subreg wrappers.  */
2693  while (GET_CODE (x) == SUBREG)
2694    x = SUBREG_REG (x);
2695
2696  while (GET_CODE (y) == SUBREG)
2697    y = SUBREG_REG (y);
2698
2699  if (GET_CODE(x) == REG && GET_CODE(y) == REG && REGNO(x) == REGNO(y))
2700    return 1;
2701
2702  return 0;
2703}
2704
2705static void
2706mcore_option_override (void)
2707{
2708  /* Only the m340 supports little endian code.  */
2709  if (TARGET_LITTLE_END && ! TARGET_M340)
2710    target_flags |= MASK_M340;
2711}
2712
2713
2714/* Compute the number of word sized registers needed to
2715   hold a function argument of mode MODE and type TYPE.  */
2716
2717int
2718mcore_num_arg_regs (machine_mode mode, const_tree type)
2719{
2720  int size;
2721
2722  if (targetm.calls.must_pass_in_stack (mode, type))
2723    return 0;
2724
2725  if (type && mode == BLKmode)
2726    size = int_size_in_bytes (type);
2727  else
2728    size = GET_MODE_SIZE (mode);
2729
2730  return ROUND_ADVANCE (size);
2731}
2732
2733static rtx
2734handle_structs_in_regs (machine_mode mode, const_tree type, int reg)
2735{
2736  int size;
2737
2738  /* The MCore ABI defines that a structure whose size is not a whole multiple
2739     of bytes is passed packed into registers (or spilled onto the stack if
2740     not enough registers are available) with the last few bytes of the
2741     structure being packed, left-justified, into the last register/stack slot.
2742     GCC handles this correctly if the last word is in a stack slot, but we
2743     have to generate a special, PARALLEL RTX if the last word is in an
2744     argument register.  */
2745  if (type
2746      && TYPE_MODE (type) == BLKmode
2747      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
2748      && (size = int_size_in_bytes (type)) > UNITS_PER_WORD
2749      && (size % UNITS_PER_WORD != 0)
2750      && (reg + mcore_num_arg_regs (mode, type) <= (FIRST_PARM_REG + NPARM_REGS)))
2751    {
2752      rtx    arg_regs [NPARM_REGS];
2753      int    nregs;
2754      rtx    result;
2755      rtvec  rtvec;
2756
2757      for (nregs = 0; size > 0; size -= UNITS_PER_WORD)
2758        {
2759          arg_regs [nregs] =
2760	    gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, reg ++),
2761		  	       GEN_INT (nregs * UNITS_PER_WORD));
2762	  nregs ++;
2763        }
2764
2765      /* We assume here that NPARM_REGS == 6.  The assert checks this.  */
2766      gcc_assert (ARRAY_SIZE (arg_regs) == 6);
2767      rtvec = gen_rtvec (nregs, arg_regs[0], arg_regs[1], arg_regs[2],
2768			  arg_regs[3], arg_regs[4], arg_regs[5]);
2769
2770      result = gen_rtx_PARALLEL (mode, rtvec);
2771      return result;
2772    }
2773
2774  return gen_rtx_REG (mode, reg);
2775}
2776
2777rtx
2778mcore_function_value (const_tree valtype, const_tree func)
2779{
2780  machine_mode mode;
2781  int unsigned_p;
2782
2783  mode = TYPE_MODE (valtype);
2784
2785  /* Since we promote return types, we must promote the mode here too.  */
2786  mode = promote_function_mode (valtype, mode, &unsigned_p, func, 1);
2787
2788  return handle_structs_in_regs (mode, valtype, FIRST_RET_REG);
2789}
2790
2791/* Define where to put the arguments to a function.
2792   Value is zero to push the argument on the stack,
2793   or a hard register in which to store the argument.
2794
2795   MODE is the argument's machine mode.
2796   TYPE is the data type of the argument (as a tree).
2797    This is null for libcalls where that information may
2798    not be available.
2799   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2800    the preceding args and about the function being called.
2801   NAMED is nonzero if this argument is a named parameter
2802    (otherwise it is an extra parameter matching an ellipsis).
2803
2804   On MCore the first args are normally in registers
2805   and the rest are pushed.  Any arg that starts within the first
2806   NPARM_REGS words is at least partially passed in a register unless
2807   its data type forbids.  */
2808
2809static rtx
2810mcore_function_arg (cumulative_args_t cum, machine_mode mode,
2811		    const_tree type, bool named)
2812{
2813  int arg_reg;
2814
2815  if (! named || mode == VOIDmode)
2816    return 0;
2817
2818  if (targetm.calls.must_pass_in_stack (mode, type))
2819    return 0;
2820
2821  arg_reg = ROUND_REG (*get_cumulative_args (cum), mode);
2822
2823  if (arg_reg < NPARM_REGS)
2824    return handle_structs_in_regs (mode, type, FIRST_PARM_REG + arg_reg);
2825
2826  return 0;
2827}
2828
2829static void
2830mcore_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
2831			    const_tree type, bool named ATTRIBUTE_UNUSED)
2832{
2833  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2834
2835  *cum = (ROUND_REG (*cum, mode)
2836	  + (int)named * mcore_num_arg_regs (mode, type));
2837}
2838
2839static unsigned int
2840mcore_function_arg_boundary (machine_mode mode,
2841			     const_tree type ATTRIBUTE_UNUSED)
2842{
2843  /* Doubles must be aligned to an 8 byte boundary.  */
2844  return (mode != BLKmode && GET_MODE_SIZE (mode) == 8
2845	  ? BIGGEST_ALIGNMENT
2846	  : PARM_BOUNDARY);
2847}
2848
2849/* Returns the number of bytes of argument registers required to hold *part*
2850   of a parameter of machine mode MODE and type TYPE (which may be NULL if
2851   the type is not known).  If the argument fits entirely in the argument
2852   registers, or entirely on the stack, then 0 is returned.  CUM is the
2853   number of argument registers already used by earlier parameters to
2854   the function.  */
2855
2856static int
2857mcore_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
2858			 tree type, bool named)
2859{
2860  int reg = ROUND_REG (*get_cumulative_args (cum), mode);
2861
2862  if (named == 0)
2863    return 0;
2864
2865  if (targetm.calls.must_pass_in_stack (mode, type))
2866    return 0;
2867
2868  /* REG is not the *hardware* register number of the register that holds
2869     the argument, it is the *argument* register number.  So for example,
2870     the first argument to a function goes in argument register 0, which
2871     translates (for the MCore) into hardware register 2.  The second
2872     argument goes into argument register 1, which translates into hardware
2873     register 3, and so on.  NPARM_REGS is the number of argument registers
2874     supported by the target, not the maximum hardware register number of
2875     the target.  */
2876  if (reg >= NPARM_REGS)
2877    return 0;
2878
2879  /* If the argument fits entirely in registers, return 0.  */
2880  if (reg + mcore_num_arg_regs (mode, type) <= NPARM_REGS)
2881    return 0;
2882
2883  /* The argument overflows the number of available argument registers.
2884     Compute how many argument registers have not yet been assigned to
2885     hold an argument.  */
2886  reg = NPARM_REGS - reg;
2887
2888  /* Return partially in registers and partially on the stack.  */
2889  return reg * UNITS_PER_WORD;
2890}
2891
2892/* Return nonzero if SYMBOL is marked as being dllexport'd.  */
2893
2894int
2895mcore_dllexport_name_p (const char * symbol)
2896{
2897  return symbol[0] == '@' && symbol[1] == 'e' && symbol[2] == '.';
2898}
2899
2900/* Return nonzero if SYMBOL is marked as being dllimport'd.  */
2901
2902int
2903mcore_dllimport_name_p (const char * symbol)
2904{
2905  return symbol[0] == '@' && symbol[1] == 'i' && symbol[2] == '.';
2906}
2907
2908/* Mark a DECL as being dllexport'd.  */
2909
2910static void
2911mcore_mark_dllexport (tree decl)
2912{
2913  const char * oldname;
2914  char * newname;
2915  rtx    rtlname;
2916  tree   idp;
2917
2918  rtlname = XEXP (DECL_RTL (decl), 0);
2919
2920  if (GET_CODE (rtlname) == MEM)
2921    rtlname = XEXP (rtlname, 0);
2922  gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
2923  oldname = XSTR (rtlname, 0);
2924
2925  if (mcore_dllexport_name_p (oldname))
2926    return;  /* Already done.  */
2927
2928  newname = XALLOCAVEC (char, strlen (oldname) + 4);
2929  sprintf (newname, "@e.%s", oldname);
2930
2931  /* We pass newname through get_identifier to ensure it has a unique
2932     address.  RTL processing can sometimes peek inside the symbol ref
2933     and compare the string's addresses to see if two symbols are
2934     identical.  */
2935  /* ??? At least I think that's why we do this.  */
2936  idp = get_identifier (newname);
2937
2938  XEXP (DECL_RTL (decl), 0) =
2939    gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
2940}
2941
2942/* Mark a DECL as being dllimport'd.  */
2943
2944static void
2945mcore_mark_dllimport (tree decl)
2946{
2947  const char * oldname;
2948  char * newname;
2949  tree   idp;
2950  rtx    rtlname;
2951  rtx    newrtl;
2952
2953  rtlname = XEXP (DECL_RTL (decl), 0);
2954
2955  if (GET_CODE (rtlname) == MEM)
2956    rtlname = XEXP (rtlname, 0);
2957  gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
2958  oldname = XSTR (rtlname, 0);
2959
2960  gcc_assert (!mcore_dllexport_name_p (oldname));
2961  if (mcore_dllimport_name_p (oldname))
2962    return; /* Already done.  */
2963
2964  /* ??? One can well ask why we're making these checks here,
2965     and that would be a good question.  */
2966
2967  /* Imported variables can't be initialized.  */
2968  if (TREE_CODE (decl) == VAR_DECL
2969      && !DECL_VIRTUAL_P (decl)
2970      && DECL_INITIAL (decl))
2971    {
2972      error ("initialized variable %q+D is marked dllimport", decl);
2973      return;
2974    }
2975
2976  /* `extern' needn't be specified with dllimport.
2977     Specify `extern' now and hope for the best.  Sigh.  */
2978  if (TREE_CODE (decl) == VAR_DECL
2979      /* ??? Is this test for vtables needed?  */
2980      && !DECL_VIRTUAL_P (decl))
2981    {
2982      DECL_EXTERNAL (decl) = 1;
2983      TREE_PUBLIC (decl) = 1;
2984    }
2985
2986  newname = XALLOCAVEC (char, strlen (oldname) + 11);
2987  sprintf (newname, "@i.__imp_%s", oldname);
2988
2989  /* We pass newname through get_identifier to ensure it has a unique
2990     address.  RTL processing can sometimes peek inside the symbol ref
2991     and compare the string's addresses to see if two symbols are
2992     identical.  */
2993  /* ??? At least I think that's why we do this.  */
2994  idp = get_identifier (newname);
2995
2996  newrtl = gen_rtx_MEM (Pmode,
2997		    gen_rtx_SYMBOL_REF (Pmode,
2998			     IDENTIFIER_POINTER (idp)));
2999  XEXP (DECL_RTL (decl), 0) = newrtl;
3000}
3001
3002static int
3003mcore_dllexport_p (tree decl)
3004{
3005  if (   TREE_CODE (decl) != VAR_DECL
3006      && TREE_CODE (decl) != FUNCTION_DECL)
3007    return 0;
3008
3009  return lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)) != 0;
3010}
3011
3012static int
3013mcore_dllimport_p (tree decl)
3014{
3015  if (   TREE_CODE (decl) != VAR_DECL
3016      && TREE_CODE (decl) != FUNCTION_DECL)
3017    return 0;
3018
3019  return lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl)) != 0;
3020}
3021
3022/* We must mark dll symbols specially.  Definitions of dllexport'd objects
3023   install some info in the .drective (PE) or .exports (ELF) sections.  */
3024
3025static void
3026mcore_encode_section_info (tree decl, rtx rtl ATTRIBUTE_UNUSED, int first ATTRIBUTE_UNUSED)
3027{
3028  /* Mark the decl so we can tell from the rtl whether the object is
3029     dllexport'd or dllimport'd.  */
3030  if (mcore_dllexport_p (decl))
3031    mcore_mark_dllexport (decl);
3032  else if (mcore_dllimport_p (decl))
3033    mcore_mark_dllimport (decl);
3034
3035  /* It might be that DECL has already been marked as dllimport, but
3036     a subsequent definition nullified that.  The attribute is gone
3037     but DECL_RTL still has @i.__imp_foo.  We need to remove that.  */
3038  else if ((TREE_CODE (decl) == FUNCTION_DECL
3039	    || TREE_CODE (decl) == VAR_DECL)
3040	   && DECL_RTL (decl) != NULL_RTX
3041	   && GET_CODE (DECL_RTL (decl)) == MEM
3042	   && GET_CODE (XEXP (DECL_RTL (decl), 0)) == MEM
3043	   && GET_CODE (XEXP (XEXP (DECL_RTL (decl), 0), 0)) == SYMBOL_REF
3044	   && mcore_dllimport_name_p (XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0)))
3045    {
3046      const char * oldname = XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0);
3047      tree idp = get_identifier (oldname + 9);
3048      rtx newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
3049
3050      XEXP (DECL_RTL (decl), 0) = newrtl;
3051
3052      /* We previously set TREE_PUBLIC and DECL_EXTERNAL.
3053	 ??? We leave these alone for now.  */
3054    }
3055}
3056
3057/* Undo the effects of the above.  */
3058
3059static const char *
3060mcore_strip_name_encoding (const char * str)
3061{
3062  return str + (str[0] == '@' ? 3 : 0);
3063}
3064
3065/* MCore specific attribute support.
3066   dllexport - for exporting a function/variable that will live in a dll
3067   dllimport - for importing a function/variable from a dll
3068   naked     - do not create a function prologue/epilogue.  */
3069
3070/* Handle a "naked" attribute; arguments as in
3071   struct attribute_spec.handler.  */
3072
3073static tree
3074mcore_handle_naked_attribute (tree * node, tree name, tree args ATTRIBUTE_UNUSED,
3075			      int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3076{
3077  if (TREE_CODE (*node) != FUNCTION_DECL)
3078    {
3079      warning (OPT_Wattributes, "%qE attribute only applies to functions",
3080	       name);
3081      *no_add_attrs = true;
3082    }
3083
3084  return NULL_TREE;
3085}
3086
3087/* ??? It looks like this is PE specific?  Oh well, this is what the
3088   old code did as well.  */
3089
3090static void
3091mcore_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
3092{
3093  int len;
3094  const char * name;
3095  char * string;
3096  const char * prefix;
3097
3098  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3099
3100  /* Strip off any encoding in name.  */
3101  name = (* targetm.strip_name_encoding) (name);
3102
3103  /* The object is put in, for example, section .text$foo.
3104     The linker will then ultimately place them in .text
3105     (everything from the $ on is stripped).  */
3106  if (TREE_CODE (decl) == FUNCTION_DECL)
3107    prefix = ".text$";
3108  /* For compatibility with EPOC, we ignore the fact that the
3109     section might have relocs against it.  */
3110  else if (decl_readonly_section (decl, 0))
3111    prefix = ".rdata$";
3112  else
3113    prefix = ".data$";
3114
3115  len = strlen (name) + strlen (prefix);
3116  string = XALLOCAVEC (char, len + 1);
3117
3118  sprintf (string, "%s%s", prefix, name);
3119
3120  set_decl_section_name (decl, string);
3121}
3122
3123int
3124mcore_naked_function_p (void)
3125{
3126  return lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) != NULL_TREE;
3127}
3128
3129static bool
3130mcore_warn_func_return (tree decl)
3131{
3132  /* Naked functions are implemented entirely in assembly, including the
3133     return sequence, so suppress warnings about this.  */
3134  return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3135}
3136
3137#ifdef OBJECT_FORMAT_ELF
3138static void
3139mcore_asm_named_section (const char *name,
3140			 unsigned int flags ATTRIBUTE_UNUSED,
3141			 tree decl ATTRIBUTE_UNUSED)
3142{
3143  fprintf (asm_out_file, "\t.section %s\n", name);
3144}
3145#endif /* OBJECT_FORMAT_ELF */
3146
3147/* Worker function for TARGET_ASM_EXTERNAL_LIBCALL.  */
3148
3149static void
3150mcore_external_libcall (rtx fun)
3151{
3152  fprintf (asm_out_file, "\t.import\t");
3153  assemble_name (asm_out_file, XSTR (fun, 0));
3154  fprintf (asm_out_file, "\n");
3155}
3156
3157/* Worker function for TARGET_RETURN_IN_MEMORY.  */
3158
3159static bool
3160mcore_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3161{
3162  const HOST_WIDE_INT size = int_size_in_bytes (type);
3163  return (size == -1 || size > 2 * UNITS_PER_WORD);
3164}
3165
3166/* Worker function for TARGET_ASM_TRAMPOLINE_TEMPLATE.
3167   Output assembler code for a block containing the constant parts
3168   of a trampoline, leaving space for the variable parts.
3169
3170   On the MCore, the trampoline looks like:
3171   	lrw	r1,  function
3172     	lrw	r13, area
3173   	jmp	r13
3174   	or	r0, r0
3175    .literals                                                */
3176
3177static void
3178mcore_asm_trampoline_template (FILE *f)
3179{
3180  fprintf (f, "\t.short	0x7102\n");
3181  fprintf (f, "\t.short	0x7d02\n");
3182  fprintf (f, "\t.short	0x00cd\n");
3183  fprintf (f, "\t.short	0x1e00\n");
3184  fprintf (f, "\t.long	0\n");
3185  fprintf (f, "\t.long	0\n");
3186}
3187
3188/* Worker function for TARGET_TRAMPOLINE_INIT.  */
3189
3190static void
3191mcore_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3192{
3193  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3194  rtx mem;
3195
3196  emit_block_move (m_tramp, assemble_trampoline_template (),
3197		   GEN_INT (2*UNITS_PER_WORD), BLOCK_OP_NORMAL);
3198
3199  mem = adjust_address (m_tramp, SImode, 8);
3200  emit_move_insn (mem, chain_value);
3201  mem = adjust_address (m_tramp, SImode, 12);
3202  emit_move_insn (mem, fnaddr);
3203}
3204
3205/* Implement TARGET_LEGITIMATE_CONSTANT_P
3206
3207   On the MCore, allow anything but a double.  */
3208
3209static bool
3210mcore_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3211{
3212  return GET_CODE (x) != CONST_DOUBLE;
3213}
3214