1/* Target machine subroutines for TI PRU.
2   Copyright (C) 2014-2020 Free Software Foundation, Inc.
3   Dimitar Dimitrov <dimitar@dinux.eu>
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published
9   by the Free Software Foundation; either version 3, or (at your
10   option) any later version.
11
12   GCC is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING3.  If not see
19   <http://www.gnu.org/licenses/>.  */
20
21#define IN_TARGET_CODE 1
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "target.h"
28#include "rtl.h"
29#include "tree.h"
30#include "stringpool.h"
31#include "attribs.h"
32#include "df.h"
33#include "memmodel.h"
34#include "tm_p.h"
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "diagnostic-core.h"
40#include "output.h"
41#include "insn-attr.h"
42#include "flags.h"
43#include "explow.h"
44#include "calls.h"
45#include "varasm.h"
46#include "expr.h"
47#include "toplev.h"
48#include "langhooks.h"
49#include "cfgrtl.h"
50#include "stor-layout.h"
51#include "dumpfile.h"
52#include "builtins.h"
53#include "pru-protos.h"
54
55/* This file should be included last.  */
56#include "target-def.h"
57
58#define INIT_ARRAY_ENTRY_BYTES	2
59
60/* Global PRU CTABLE entries, filled in by pragmas, and used for fast
61   addressing via LBCO/SBCO instructions.  */
62struct pru_ctable_entry pru_ctable[32];
63
64/* Forward function declarations.  */
65static bool prologue_saved_reg_p (int);
66static void pru_reorg_loop (rtx_insn *);
67
68struct GTY (()) machine_function
69{
70  /* Current frame information, to be filled in by pru_compute_frame_layout
71     with register save masks, and offsets for the current function.  */
72
73  /* Mask of registers to save.  */
74  HARD_REG_SET save_mask;
75  /* Number of bytes that the entire frame takes up.  */
76  int total_size;
77  /* Number of bytes that variables take up.  */
78  int var_size;
79  /* Number of bytes that outgoing arguments take up.  */
80  int out_args_size;
81  /* Number of bytes needed to store registers in frame.  */
82  int save_reg_size;
83  /* Offset from new stack pointer to store registers.  */
84  int save_regs_offset;
85  /* True if final frame layout is already calculated.  */
86  bool initialized;
87  /* Number of doloop tags used so far.  */
88  int doloop_tags;
89  /* True if the last tag was allocated to a doloop_end.  */
90  bool doloop_tag_from_end;
91};
92
93/* Stack layout and calling conventions.
94
95   The PRU ABI defines r4 as Argument Pointer.  GCC implements the same
96   semantics, but represents it with HARD_FRAME_POINTER_REGNUM and
97   names it FP.  The stack layout is shown below:
98
99       ---------------------- high address
100	| incoming args
101       ------call-boundary---
102	| pretend_args	    ^
103    FP ----------------     | total
104	| save_regs	    | frame
105	---------------	    | size
106	| local vars	    |
107	---------------	    |
108	| outgoing args     V
109    SP ---------------------- low address
110
111 */
112
113#define PRU_STACK_ALIGN(LOC)  ROUND_UP ((LOC), STACK_BOUNDARY / BITS_PER_UNIT)
114
115/* Implement TARGET_COMPUTE_FRAME_LAYOUT.  */
116static void
117pru_compute_frame_layout (void)
118{
119  int regno;
120  HARD_REG_SET *save_mask;
121  int total_size;
122  int var_size;
123  int out_args_size;
124  int save_reg_size;
125
126  gcc_assert (!cfun->machine->initialized);
127
128  save_mask = &cfun->machine->save_mask;
129  CLEAR_HARD_REG_SET (*save_mask);
130
131  var_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) get_frame_size ());
132  out_args_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) crtl->outgoing_args_size);
133  total_size = var_size + out_args_size;
134
135  /* Calculate space needed for gp registers.  */
136  save_reg_size = 0;
137  for (regno = 0; regno <= LAST_GP_REGNUM; regno++)
138    if (prologue_saved_reg_p (regno))
139      {
140	SET_HARD_REG_BIT (*save_mask, regno);
141	save_reg_size += 1;
142      }
143
144  save_reg_size = PRU_STACK_ALIGN (save_reg_size);
145  total_size += save_reg_size;
146  total_size += PRU_STACK_ALIGN (crtl->args.pretend_args_size);
147
148  /* Save other computed information.  */
149  cfun->machine->total_size = total_size;
150  cfun->machine->var_size = var_size;
151  cfun->machine->out_args_size = out_args_size;
152  cfun->machine->save_reg_size = save_reg_size;
153  cfun->machine->initialized = reload_completed;
154  cfun->machine->save_regs_offset = out_args_size + var_size;
155}
156
157/* Emit efficient RTL equivalent of ADD3 with the given const_int for
158   frame-related registers.
159     op0	  - Destination register.
160     op1	  - First addendum operand (a register).
161     addendum	  - Second addendum operand (a constant).
162     kind	  - Note kind.  REG_NOTE_MAX if no note must be added.
163 */
164static rtx
165pru_add3_frame_adjust (rtx op0, rtx op1, int addendum,
166		       const enum reg_note kind)
167{
168  rtx insn;
169
170  rtx op0_adjust = gen_rtx_SET (op0, plus_constant (Pmode, op1, addendum));
171
172  if (UBYTE_INT (addendum) || UBYTE_INT (-addendum))
173    insn = emit_insn (op0_adjust);
174  else
175    {
176      /* Help the compiler to cope with an arbitrary integer constant.
177	 Reload has finished so we can't expect the compiler to
178	 auto-allocate a temporary register.  But we know that call-saved
179	 registers are not live yet, so we utilize them.  */
180      rtx tmpreg = gen_rtx_REG (Pmode, PROLOGUE_TEMP_REGNUM);
181      if (addendum < 0)
182	{
183	  emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (-addendum, Pmode)));
184	  insn = emit_insn (gen_sub3_insn (op0, op1, tmpreg));
185	}
186      else
187	{
188	  emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (addendum, Pmode)));
189	  insn = emit_insn (gen_add3_insn (op0, op1, tmpreg));
190	}
191    }
192
193  /* Attach a note indicating what happened.  */
194  if (kind != REG_NOTE_MAX)
195    add_reg_note (insn, kind, copy_rtx (op0_adjust));
196
197  RTX_FRAME_RELATED_P (insn) = 1;
198
199  return insn;
200}
201
202/* Add a const_int to the stack pointer register.  */
203static rtx
204pru_add_to_sp (int addendum, const enum reg_note kind)
205{
206  return pru_add3_frame_adjust (stack_pointer_rtx, stack_pointer_rtx,
207				addendum, kind);
208}
209
210/* Helper function used during prologue/epilogue.  Emits a single LBBO/SBBO
211   instruction for load/store of the next group of consecutive registers.  */
212static int
213xbbo_next_reg_cluster (int regno_start, int *sp_offset, bool do_store)
214{
215  int regno, nregs, i;
216  rtx addr;
217  rtx_insn *insn;
218
219  nregs = 0;
220
221  /* Skip the empty slots.  */
222  for (; regno_start <= LAST_GP_REGNUM;)
223    if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno_start))
224      break;
225    else
226      regno_start++;
227
228  /* Find the largest consecutive group of registers to save.  */
229  for (regno = regno_start; regno <= LAST_GP_REGNUM;)
230    if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno))
231      {
232	regno++;
233	nregs++;
234      }
235    else
236      break;
237
238  if (!nregs)
239    return -1;
240
241  gcc_assert (UBYTE_INT (*sp_offset));
242
243  /* Ok, save this bunch.  */
244  addr = plus_constant (Pmode, stack_pointer_rtx, *sp_offset);
245
246  if (do_store)
247    insn = targetm.gen_store_multiple (gen_frame_mem (BLKmode, addr),
248				       gen_rtx_REG (QImode, regno_start),
249				       GEN_INT (nregs));
250  else
251    insn = targetm.gen_load_multiple (gen_rtx_REG (QImode, regno_start),
252				      gen_frame_mem (BLKmode, addr),
253				      GEN_INT (nregs));
254
255  gcc_assert (reload_completed);
256  gcc_assert (insn);
257  emit_insn (insn);
258
259  /* Tag as frame-related.  */
260  RTX_FRAME_RELATED_P (insn) = 1;
261
262  if (!do_store)
263    {
264      /* Tag epilogue unwind notes.  */
265      for (i = regno_start; i < (regno_start + nregs); i++)
266	add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (QImode, i));
267    }
268
269  /* Increment and save offset in anticipation of the next register group.  */
270  *sp_offset += nregs * UNITS_PER_WORD;
271
272  return regno_start + nregs;
273}
274
275/* Emit function prologue.  */
276void
277pru_expand_prologue (void)
278{
279  int regno_start;
280  int total_frame_size;
281  int sp_offset;      /* Offset from base_reg to final stack value.  */
282  int save_regs_base; /* Offset from base_reg to register save area.  */
283  int save_offset;    /* Temporary offset to currently saved register group.  */
284
285  total_frame_size = cfun->machine->total_size;
286
287  if (flag_stack_usage_info)
288    current_function_static_stack_size = total_frame_size;
289
290  /* Decrement the stack pointer.  */
291  if (!UBYTE_INT (total_frame_size))
292    {
293      /* We need an intermediary point, this will point at the spill block.  */
294      pru_add_to_sp (cfun->machine->save_regs_offset - total_frame_size,
295		     REG_NOTE_MAX);
296      save_regs_base = 0;
297      sp_offset = -cfun->machine->save_regs_offset;
298    }
299  else if (total_frame_size)
300    {
301      pru_add_to_sp (- total_frame_size, REG_NOTE_MAX);
302      save_regs_base = cfun->machine->save_regs_offset;
303      sp_offset = 0;
304    }
305  else
306    save_regs_base = sp_offset = 0;
307
308  regno_start = 0;
309  save_offset = save_regs_base;
310  do
311    regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, true);
312  while (regno_start >= 0);
313
314  /* Set FP before adjusting SP.  This way fp_offset has
315     better chance to fit in UBYTE.  */
316  if (frame_pointer_needed)
317    {
318      int fp_offset = total_frame_size
319	- crtl->args.pretend_args_size
320	+ sp_offset;
321
322      pru_add3_frame_adjust (hard_frame_pointer_rtx, stack_pointer_rtx,
323			     fp_offset, REG_NOTE_MAX);
324    }
325
326  if (sp_offset)
327    pru_add_to_sp (sp_offset, REG_FRAME_RELATED_EXPR);
328
329  /* If we are profiling, make sure no instructions are scheduled before
330     the call to mcount.  */
331  if (crtl->profile)
332    emit_insn (gen_blockage ());
333}
334
335/* Emit function epilogue.  */
336void
337pru_expand_epilogue (bool sibcall_p)
338{
339  int total_frame_size;
340  int sp_adjust, save_offset;
341  int regno_start;
342
343  if (!sibcall_p && pru_can_use_return_insn ())
344    {
345      emit_jump_insn (gen_return ());
346      return;
347    }
348
349  emit_insn (gen_blockage ());
350
351  total_frame_size = cfun->machine->total_size;
352
353  if (frame_pointer_needed)
354    {
355      /* Recover the stack pointer.  */
356      pru_add3_frame_adjust (stack_pointer_rtx, hard_frame_pointer_rtx,
357			     - cfun->machine->save_reg_size,
358			     REG_CFA_ADJUST_CFA);
359
360      save_offset = 0;
361      sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
362    }
363  else if (!UBYTE_INT (total_frame_size))
364    {
365      pru_add_to_sp (cfun->machine->save_regs_offset, REG_CFA_ADJUST_CFA);
366      save_offset = 0;
367      sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
368    }
369  else
370    {
371      save_offset = cfun->machine->save_regs_offset;
372      sp_adjust = total_frame_size;
373    }
374
375  regno_start = 0;
376  do
377    regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, false);
378  while (regno_start >= 0);
379
380  /* Emit a blockage insn here to keep these insns from being moved to
381     an earlier spot in the epilogue.
382
383     This is necessary as we must not cut the stack back before all the
384     restores are finished.  */
385  emit_insn (gen_blockage ());
386
387  if (sp_adjust)
388    pru_add_to_sp (sp_adjust, REG_CFA_ADJUST_CFA);
389
390  if (!sibcall_p)
391    emit_jump_insn (gen_simple_return ());
392}
393
394/* Implement RETURN_ADDR_RTX.  Note, we do not support moving
395   back to a previous frame.  */
396rtx
397pru_get_return_address (int count)
398{
399  if (count != 0)
400    return NULL_RTX;
401
402  /* Return r3.w2.  */
403  return get_hard_reg_initial_val (HImode, RA_REGNUM);
404}
405
406/* Implement FUNCTION_PROFILER macro.  */
407void
408pru_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
409{
410  fprintf (file, "\tmov\tr1, ra\n");
411  fprintf (file, "\tcall\t_mcount\n");
412  fprintf (file, "\tmov\tra, r1\n");
413}
414
415/* Dump stack layout.  */
416static void
417pru_dump_frame_layout (FILE *file)
418{
419  fprintf (file, "\t%s Current Frame Info\n", ASM_COMMENT_START);
420  fprintf (file, "\t%s total_size = %d\n", ASM_COMMENT_START,
421	   cfun->machine->total_size);
422  fprintf (file, "\t%s var_size = %d\n", ASM_COMMENT_START,
423	   cfun->machine->var_size);
424  fprintf (file, "\t%s out_args_size = %d\n", ASM_COMMENT_START,
425	   cfun->machine->out_args_size);
426  fprintf (file, "\t%s save_reg_size = %d\n", ASM_COMMENT_START,
427	   cfun->machine->save_reg_size);
428  fprintf (file, "\t%s initialized = %d\n", ASM_COMMENT_START,
429	   cfun->machine->initialized);
430  fprintf (file, "\t%s save_regs_offset = %d\n", ASM_COMMENT_START,
431	   cfun->machine->save_regs_offset);
432  fprintf (file, "\t%s is_leaf = %d\n", ASM_COMMENT_START,
433	   crtl->is_leaf);
434  fprintf (file, "\t%s frame_pointer_needed = %d\n", ASM_COMMENT_START,
435	   frame_pointer_needed);
436  fprintf (file, "\t%s pretend_args_size = %d\n", ASM_COMMENT_START,
437	   crtl->args.pretend_args_size);
438}
439
440/* Return true if REGNO should be saved in the prologue.  */
441static bool
442prologue_saved_reg_p (int regno)
443{
444  gcc_assert (GP_REG_P (regno));
445
446  if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
447    return true;
448
449  /* 32-bit FP.  */
450  if (frame_pointer_needed
451      && regno >= HARD_FRAME_POINTER_REGNUM
452      && regno < HARD_FRAME_POINTER_REGNUM + GET_MODE_SIZE (Pmode))
453    return true;
454
455  /* 16-bit RA.  */
456  if (regno == RA_REGNUM && df_regs_ever_live_p (RA_REGNUM))
457    return true;
458  if (regno == RA_REGNUM + 1 && df_regs_ever_live_p (RA_REGNUM + 1))
459    return true;
460
461  return false;
462}
463
464/* Implement TARGET_CAN_ELIMINATE.  */
465static bool
466pru_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
467{
468  if (to == STACK_POINTER_REGNUM)
469    return !frame_pointer_needed;
470  return true;
471}
472
473/* Implement INITIAL_ELIMINATION_OFFSET macro.  */
474int
475pru_initial_elimination_offset (int from, int to)
476{
477  int offset;
478
479  /* Set OFFSET to the offset from the stack pointer.  */
480  switch (from)
481    {
482    case FRAME_POINTER_REGNUM:
483      offset = cfun->machine->out_args_size;
484      break;
485
486    case ARG_POINTER_REGNUM:
487      offset = cfun->machine->total_size;
488      offset -= crtl->args.pretend_args_size;
489      break;
490
491    default:
492      gcc_unreachable ();
493    }
494
495  /* If we are asked for the frame pointer offset, then adjust OFFSET
496     by the offset from the frame pointer to the stack pointer.  */
497  if (to == HARD_FRAME_POINTER_REGNUM)
498    offset -= cfun->machine->total_size - crtl->args.pretend_args_size;
499
500
501  return offset;
502}
503
504/* Return nonzero if this function is known to have a null epilogue.
505   This allows the optimizer to omit jumps to jumps if no stack
506   was created.  */
507int
508pru_can_use_return_insn (void)
509{
510  if (!reload_completed || crtl->profile)
511    return 0;
512
513  return cfun->machine->total_size == 0;
514}
515
516/* Implement TARGET_HARD_REGNO_MODE_OK.  */
517
518static bool
519pru_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
520{
521  switch (GET_MODE_SIZE (mode))
522    {
523    case 1: return true;
524    case 2: return (regno % 4) <= 2;
525    case 4: return (regno % 4) == 0;
526    case 8: return (regno % 4) == 0;
527    case 16: return (regno % 4) == 0; /* Not sure why TImode is used.  */
528    case 32: return (regno % 4) == 0; /* Not sure why CTImode is used.  */
529    default:
530      /* TODO: Find out why VOIDmode and BLKmode are passed.  */
531      gcc_assert (mode == BLKmode || mode == VOIDmode);
532      return (regno % 4) == 0;
533    }
534}
535
536/* Implement `TARGET_HARD_REGNO_SCRATCH_OK'.
537   Returns true if REGNO is safe to be allocated as a scratch
538   register (for a define_peephole2) in the current function.  */
539
540static bool
541pru_hard_regno_scratch_ok (unsigned int regno)
542{
543  /* Don't allow hard registers that might be part of the frame pointer.
544     Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
545     and don't handle a frame pointer that spans more than one register.
546     TODO: Fix those faulty places.  */
547
548  if ((!reload_completed || frame_pointer_needed)
549      && (IN_RANGE (regno, HARD_FRAME_POINTER_REGNUM,
550		    HARD_FRAME_POINTER_REGNUM + 3)
551	  || IN_RANGE (regno, FRAME_POINTER_REGNUM,
552		       FRAME_POINTER_REGNUM + 3)))
553    return false;
554
555  return true;
556}
557
558
559/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  */
560
561static bool
562pru_hard_regno_call_part_clobbered (unsigned, unsigned regno,
563				    machine_mode mode)
564{
565  HARD_REG_SET caller_saved_set;
566  HARD_REG_SET callee_saved_set;
567
568  CLEAR_HARD_REG_SET (caller_saved_set);
569  CLEAR_HARD_REG_SET (callee_saved_set);
570
571  /* r0 and r1 are caller saved.  */
572  add_range_to_hard_reg_set (&caller_saved_set, 0, 2 * 4);
573
574  add_range_to_hard_reg_set (&caller_saved_set, FIRST_ARG_REGNUM,
575			     LAST_ARG_REGNUM + 1 - FIRST_ARG_REGNUM);
576
577  /* Treat SP as callee saved.  */
578  add_range_to_hard_reg_set (&callee_saved_set, STACK_POINTER_REGNUM, 4);
579
580  /* r3 to r13 are callee saved.  */
581  add_range_to_hard_reg_set (&callee_saved_set, FIRST_CALLEE_SAVED_REGNUM,
582			     LAST_CALEE_SAVED_REGNUM + 1
583			     - FIRST_CALLEE_SAVED_REGNUM);
584
585  return overlaps_hard_reg_set_p (caller_saved_set, mode, regno)
586	 && overlaps_hard_reg_set_p (callee_saved_set, mode, regno);
587}
588
589
590/* Worker function for `HARD_REGNO_RENAME_OK'.
591   Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
592
593int
594pru_hard_regno_rename_ok (unsigned int old_reg,
595			  unsigned int new_reg)
596{
597  /* Don't allow hard registers that might be part of the frame pointer.
598     Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
599     and don't care for a frame pointer that spans more than one register.
600     TODO: Fix those faulty places.  */
601  if ((!reload_completed || frame_pointer_needed)
602      && (IN_RANGE (old_reg, HARD_FRAME_POINTER_REGNUM,
603		    HARD_FRAME_POINTER_REGNUM + 3)
604	  || IN_RANGE (old_reg, FRAME_POINTER_REGNUM,
605		       FRAME_POINTER_REGNUM + 3)
606	  || IN_RANGE (new_reg, HARD_FRAME_POINTER_REGNUM,
607		       HARD_FRAME_POINTER_REGNUM + 3)
608	  || IN_RANGE (new_reg, FRAME_POINTER_REGNUM,
609		       FRAME_POINTER_REGNUM + 3)))
610    return 0;
611
612  return 1;
613}
614
615/* Allocate a chunk of memory for per-function machine-dependent data.  */
616static struct machine_function *
617pru_init_machine_status (void)
618{
619  return ggc_cleared_alloc<machine_function> ();
620}
621
622/* Implement TARGET_OPTION_OVERRIDE.  */
623static void
624pru_option_override (void)
625{
626#ifdef SUBTARGET_OVERRIDE_OPTIONS
627  SUBTARGET_OVERRIDE_OPTIONS;
628#endif
629
630  /* Check for unsupported options.  */
631  if (flag_pic == 1)
632    warning (OPT_fpic, "%<-fpic%> is not supported");
633  if (flag_pic == 2)
634    warning (OPT_fPIC, "%<-fPIC%> is not supported");
635  if (flag_pie == 1)
636    warning (OPT_fpie, "%<-fpie%> is not supported");
637  if (flag_pie == 2)
638    warning (OPT_fPIE, "%<-fPIE%> is not supported");
639
640  /* QBxx conditional branching cannot cope with block reordering.  */
641  if (flag_reorder_blocks_and_partition)
642    {
643      inform (input_location, "%<-freorder-blocks-and-partition%> "
644			      "not supported on this architecture");
645      flag_reorder_blocks_and_partition = 0;
646      flag_reorder_blocks = 1;
647    }
648
649  /* Function to allocate machine-dependent function status.  */
650  init_machine_status = &pru_init_machine_status;
651
652  /* Save the initial options in case the user does function specific
653     options.  */
654  target_option_default_node = target_option_current_node
655    = build_target_option_node (&global_options);
656
657  /* Due to difficulties in implementing the TI ABI with GCC,
658     at least check and error-out if GCC cannot compile a
659     compliant output.  */
660  pru_register_abicheck_pass ();
661}
662
663/* Compute a (partial) cost for rtx X.  Return true if the complete
664   cost has been computed, and false if subexpressions should be
665   scanned.  In either case, *TOTAL contains the cost result.  */
666static bool
667pru_rtx_costs (rtx x, machine_mode mode,
668	       int outer_code, int opno ATTRIBUTE_UNUSED,
669	       int *total, bool speed ATTRIBUTE_UNUSED)
670{
671  const int code = GET_CODE (x);
672
673  switch (code)
674    {
675    case CONST_INT:
676      if ((mode == VOIDmode && UBYTE_INT (INTVAL (x)))
677	  || (mode != VOIDmode && const_ubyte_operand (x, mode)))
678	{
679	  *total = COSTS_N_INSNS (0);
680	  return true;
681	}
682      else if ((mode == VOIDmode && UHWORD_INT (INTVAL (x)))
683	       || (mode != VOIDmode && const_uhword_operand (x, mode)))
684	{
685	  *total = COSTS_N_INSNS (1);
686	  return true;
687	}
688      else if (outer_code == MEM && ctable_addr_operand (x, VOIDmode))
689	{
690	  *total = COSTS_N_INSNS (0);
691	  return true;
692	}
693      else
694	{
695	  *total = COSTS_N_INSNS (2);
696	  return true;
697	}
698
699    case LABEL_REF:
700    case SYMBOL_REF:
701    case CONST:
702      {
703	*total = COSTS_N_INSNS (1);
704	return true;
705      }
706    case CONST_DOUBLE:
707      {
708	*total = COSTS_N_INSNS (2);
709	return true;
710      }
711    case CONST_WIDE_INT:
712      {
713	/* PRU declares no vector or very large integer types.  */
714	gcc_unreachable ();
715	return true;
716      }
717    case SET:
718      {
719	int factor;
720
721	/* A SET doesn't have a mode, so let's look at the SET_DEST to get
722	   the mode for the factor.  */
723	mode = GET_MODE (SET_DEST (x));
724
725	/* SI move has the same cost as a QI move.  Moves larger than
726	   64 bits are costly.  */
727	factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
728	*total = factor * COSTS_N_INSNS (1);
729
730	return false;
731      }
732
733    case MULT:
734      {
735	/* Factor in that "mul" requires fixed registers, which
736	   would likely require register moves.  */
737	*total = COSTS_N_INSNS (7);
738	return false;
739      }
740    case PLUS:
741      {
742	rtx op0 = XEXP (x, 0);
743	rtx op1 = XEXP (x, 1);
744	machine_mode op1_mode = GET_MODE (op1);
745
746	/* Generic RTL address expressions do not enforce mode for
747	   offsets, yet our UBYTE constraint requires it.  Fix it here.  */
748	if (op1_mode == VOIDmode && CONST_INT_P (op1) && outer_code == MEM)
749	  op1_mode = Pmode;
750	if (outer_code == MEM
751	    && ((REG_P (op0) && reg_or_ubyte_operand (op1, op1_mode))
752		|| ctable_addr_operand (op0, VOIDmode)
753		|| ctable_addr_operand (op1, VOIDmode)
754		|| (ctable_base_operand (op0, VOIDmode) && REG_P (op1))
755		|| (ctable_base_operand (op1, VOIDmode) && REG_P (op0))))
756	  {
757	    /* CTABLE or REG base addressing - PLUS comes for free.  */
758	    *total = COSTS_N_INSNS (0);
759	    return true;
760	  }
761	else
762	  {
763	    *total = COSTS_N_INSNS (1);
764	    return false;
765	  }
766	}
767    case SIGN_EXTEND:
768      {
769	*total = COSTS_N_INSNS (3);
770	return false;
771      }
772    case ASHIFTRT:
773      {
774	rtx op1 = XEXP (x, 1);
775	if (const_1_operand (op1, VOIDmode))
776	  *total = COSTS_N_INSNS (3);
777	else
778	  *total = COSTS_N_INSNS (7);
779	return false;
780      }
781    case ZERO_EXTRACT:
782      {
783	rtx op2 = XEXP (x, 2);
784	if ((outer_code == EQ || outer_code == NE)
785	    && CONST_INT_P (op2)
786	    && INTVAL (op2) == 1)
787	  {
788	    /* Branch if bit is set/clear is a single instruction.  */
789	    *total = COSTS_N_INSNS (0);
790	    return true;
791	  }
792	else
793	  {
794	    *total = COSTS_N_INSNS (2);
795	    return false;
796	  }
797      }
798    case ZERO_EXTEND:
799      {
800	*total = COSTS_N_INSNS (0);
801	return false;
802      }
803
804    default:
805      {
806	/* PRU ALU is 32 bit, despite GCC's UNITS_PER_WORD=1.  */
807	int factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
808	*total = factor * COSTS_N_INSNS (1);
809	return false;
810      }
811    }
812}
813
814static GTY(()) rtx eqdf_libfunc;
815static GTY(()) rtx nedf_libfunc;
816static GTY(()) rtx ledf_libfunc;
817static GTY(()) rtx ltdf_libfunc;
818static GTY(()) rtx gedf_libfunc;
819static GTY(()) rtx gtdf_libfunc;
820static GTY(()) rtx eqsf_libfunc;
821static GTY(()) rtx nesf_libfunc;
822static GTY(()) rtx lesf_libfunc;
823static GTY(()) rtx ltsf_libfunc;
824static GTY(()) rtx gesf_libfunc;
825static GTY(()) rtx gtsf_libfunc;
826
827/* Implement the TARGET_INIT_LIBFUNCS macro.  We use this to rename library
828   functions to match the PRU ABI.  */
829
830static void
831pru_init_libfuncs (void)
832{
833  /* Double-precision floating-point arithmetic.  */
834  set_optab_libfunc (add_optab, DFmode, "__pruabi_addd");
835  set_optab_libfunc (sdiv_optab, DFmode, "__pruabi_divd");
836  set_optab_libfunc (smul_optab, DFmode, "__pruabi_mpyd");
837  set_optab_libfunc (neg_optab, DFmode, "__pruabi_negd");
838  set_optab_libfunc (sub_optab, DFmode, "__pruabi_subd");
839
840  /* Single-precision floating-point arithmetic.  */
841  set_optab_libfunc (add_optab, SFmode, "__pruabi_addf");
842  set_optab_libfunc (sdiv_optab, SFmode, "__pruabi_divf");
843  set_optab_libfunc (smul_optab, SFmode, "__pruabi_mpyf");
844  set_optab_libfunc (neg_optab, SFmode, "__pruabi_negf");
845  set_optab_libfunc (sub_optab, SFmode, "__pruabi_subf");
846
847  /* Floating-point comparisons.  */
848  eqsf_libfunc = init_one_libfunc ("__pruabi_eqf");
849  nesf_libfunc = init_one_libfunc ("__pruabi_neqf");
850  lesf_libfunc = init_one_libfunc ("__pruabi_lef");
851  ltsf_libfunc = init_one_libfunc ("__pruabi_ltf");
852  gesf_libfunc = init_one_libfunc ("__pruabi_gef");
853  gtsf_libfunc = init_one_libfunc ("__pruabi_gtf");
854  eqdf_libfunc = init_one_libfunc ("__pruabi_eqd");
855  nedf_libfunc = init_one_libfunc ("__pruabi_neqd");
856  ledf_libfunc = init_one_libfunc ("__pruabi_led");
857  ltdf_libfunc = init_one_libfunc ("__pruabi_ltd");
858  gedf_libfunc = init_one_libfunc ("__pruabi_ged");
859  gtdf_libfunc = init_one_libfunc ("__pruabi_gtd");
860
861  /* In PRU ABI, much like other TI processors, floating point
862     comparisons return non-standard values.  This quirk is handled
863     by disabling the optab library functions, and handling the
864     comparison during RTL expansion.  */
865  set_optab_libfunc (eq_optab, SFmode, NULL);
866  set_optab_libfunc (ne_optab, SFmode, NULL);
867  set_optab_libfunc (gt_optab, SFmode, NULL);
868  set_optab_libfunc (ge_optab, SFmode, NULL);
869  set_optab_libfunc (lt_optab, SFmode, NULL);
870  set_optab_libfunc (le_optab, SFmode, NULL);
871  set_optab_libfunc (eq_optab, DFmode, NULL);
872  set_optab_libfunc (ne_optab, DFmode, NULL);
873  set_optab_libfunc (gt_optab, DFmode, NULL);
874  set_optab_libfunc (ge_optab, DFmode, NULL);
875  set_optab_libfunc (lt_optab, DFmode, NULL);
876  set_optab_libfunc (le_optab, DFmode, NULL);
877
878  /* The isunordered function appears to be supported only by GCC.  */
879  set_optab_libfunc (unord_optab, SFmode, "__pruabi_unordf");
880  set_optab_libfunc (unord_optab, DFmode, "__pruabi_unordd");
881
882  /* Floating-point to integer conversions.  */
883  set_conv_libfunc (sfix_optab, SImode, DFmode, "__pruabi_fixdi");
884  set_conv_libfunc (ufix_optab, SImode, DFmode, "__pruabi_fixdu");
885  set_conv_libfunc (sfix_optab, DImode, DFmode, "__pruabi_fixdlli");
886  set_conv_libfunc (ufix_optab, DImode, DFmode, "__pruabi_fixdull");
887  set_conv_libfunc (sfix_optab, SImode, SFmode, "__pruabi_fixfi");
888  set_conv_libfunc (ufix_optab, SImode, SFmode, "__pruabi_fixfu");
889  set_conv_libfunc (sfix_optab, DImode, SFmode, "__pruabi_fixflli");
890  set_conv_libfunc (ufix_optab, DImode, SFmode, "__pruabi_fixfull");
891
892  /* Conversions between floating types.  */
893  set_conv_libfunc (trunc_optab, SFmode, DFmode, "__pruabi_cvtdf");
894  set_conv_libfunc (sext_optab, DFmode, SFmode, "__pruabi_cvtfd");
895
896  /* Integer to floating-point conversions.  */
897  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__pruabi_fltid");
898  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__pruabi_fltud");
899  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__pruabi_fltllid");
900  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__pruabi_fltulld");
901  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__pruabi_fltif");
902  set_conv_libfunc (ufloat_optab, SFmode, SImode, "__pruabi_fltuf");
903  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__pruabi_fltllif");
904  set_conv_libfunc (ufloat_optab, SFmode, DImode, "__pruabi_fltullf");
905
906  /* Long long.  */
907  set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll");
908  set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
909  set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll");
910  set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll");
911
912  set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi");
913  set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu");
914  set_optab_libfunc (smod_optab, SImode, "__pruabi_remi");
915  set_optab_libfunc (umod_optab, SImode, "__pruabi_remu");
916  set_optab_libfunc (sdivmod_optab, SImode, "__pruabi_divremi");
917  set_optab_libfunc (udivmod_optab, SImode, "__pruabi_divremu");
918  set_optab_libfunc (sdiv_optab, DImode, "__pruabi_divlli");
919  set_optab_libfunc (udiv_optab, DImode, "__pruabi_divull");
920  set_optab_libfunc (smod_optab, DImode, "__pruabi_remlli");
921  set_optab_libfunc (umod_optab, DImode, "__pruabi_remull");
922  set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull");
923}
924
925
926/* Emit comparison instruction if necessary, returning the expression
927   that holds the compare result in the proper mode.  Return the comparison
928   that should be used in the jump insn.  */
929
930rtx
931pru_expand_fp_compare (rtx comparison, machine_mode mode)
932{
933  enum rtx_code code = GET_CODE (comparison);
934  rtx op0 = XEXP (comparison, 0);
935  rtx op1 = XEXP (comparison, 1);
936  rtx cmp;
937  enum rtx_code jump_code = code;
938  machine_mode op_mode = GET_MODE (op0);
939  rtx_insn *insns;
940  rtx libfunc;
941
942  gcc_assert (op_mode == DFmode || op_mode == SFmode);
943
944  /* FP exceptions are not raised by PRU's softfp implementation.  So the
945     following transformations are safe.  */
946  if (code == UNGE)
947    {
948      code = LT;
949      jump_code = EQ;
950    }
951  else if (code == UNLE)
952    {
953      code = GT;
954      jump_code = EQ;
955    }
956  else
957    jump_code = NE;
958
959  switch (code)
960    {
961    case EQ:
962      libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
963      break;
964    case NE:
965      libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
966      break;
967    case GT:
968      libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
969      break;
970    case GE:
971      libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
972      break;
973    case LT:
974      libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
975      break;
976    case LE:
977      libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
978      break;
979    default:
980      gcc_unreachable ();
981    }
982  start_sequence ();
983
984  cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
985				 op0, op_mode, op1, op_mode);
986  insns = get_insns ();
987  end_sequence ();
988
989  emit_libcall_block (insns, cmp, cmp,
990		      gen_rtx_fmt_ee (code, SImode, op0, op1));
991
992  return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
993}
994
995/* Return the sign bit position for given OP's mode.  */
996static int
997sign_bit_position (const rtx op)
998{
999  const int sz = GET_MODE_SIZE (GET_MODE (op));
1000
1001  return sz * 8 - 1;
1002}
1003
1004/* Output asm code for sign_extend operation.  */
1005const char *
1006pru_output_sign_extend (rtx *operands)
1007{
1008  static char buf[512];
1009  int bufi;
1010  const int dst_sz = GET_MODE_SIZE (GET_MODE (operands[0]));
1011  const int src_sz = GET_MODE_SIZE (GET_MODE (operands[1]));
1012  char ext_start;
1013
1014  switch (src_sz)
1015    {
1016    case 1: ext_start = 'y'; break;
1017    case 2: ext_start = 'z'; break;
1018    default: gcc_unreachable ();
1019    }
1020
1021  gcc_assert (dst_sz > src_sz);
1022
1023  /* Note that src and dst can be different parts of the same
1024     register, e.g. "r7, r7.w1".  */
1025  bufi = snprintf (buf, sizeof (buf),
1026	  "mov\t%%0, %%1\n\t"		      /* Copy AND make positive.  */
1027	  "qbbc\t.+8, %%0, %d\n\t"	      /* Check sign bit.  */
1028	  "fill\t%%%c0, %d",		      /* Make negative.  */
1029	  sign_bit_position (operands[1]),
1030	  ext_start,
1031	  dst_sz - src_sz);
1032
1033  gcc_assert (bufi > 0);
1034  gcc_assert ((unsigned int) bufi < sizeof (buf));
1035
1036  return buf;
1037}
1038
1039/* Branches and compares.  */
1040
1041/* PRU's ALU does not support signed comparison operations.  That's why we
1042   emulate them.  By first checking the sign bit and handling every possible
1043   operand sign combination, we can simulate signed comparisons in just
1044   5 instructions.  See table below.
1045
1046.-------------------.---------------------------------------------------.
1047| Operand sign bit  | Mapping the signed comparison to an unsigned one  |
1048|---------+---------+------------+------------+------------+------------|
1049| OP1.b31 | OP2.b31 | OP1 < OP2  | OP1 <= OP2 | OP1 > OP2  | OP1 >= OP2 |
1050|---------+---------+------------+------------+------------+------------|
1051| 0       | 0       | OP1 < OP2  | OP1 <= OP2 | OP1 > OP2  | OP1 >= OP2 |
1052|---------+---------+------------+------------+------------+------------|
1053| 0       | 1       | false      | false      | true       | true       |
1054|---------+---------+------------+------------+------------+------------|
1055| 1       | 0       | true       | true       | false      | false      |
1056|---------+---------+------------+------------+------------+------------|
1057| 1       | 1       | OP1 < OP2  | OP1 <= OP2 | OP1 > OP2  | OP1 >= OP2 |
1058`---------'---------'------------'------------'------------+------------'
1059
1060
1061Given the table above, here is an example for a concrete op:
1062  LT:
1063		    qbbc OP1_POS, OP1, 31
1064  OP1_NEG:	    qbbc BRANCH_TAKEN_LABEL, OP2, 31
1065  OP1_NEG_OP2_NEG:  qblt BRANCH_TAKEN_LABEL, OP2, OP1
1066		    ; jmp OUT -> can be eliminated because we'll take the
1067		    ; following branch.  OP2.b31 is guaranteed to be 1
1068		    ; by the time we get here.
1069  OP1_POS:	    qbbs OUT, OP2, 31
1070  OP1_POS_OP2_POS:  qblt BRANCH_TAKEN_LABEL, OP2, OP1
1071#if FAR_JUMP
1072		    jmp OUT
1073BRANCH_TAKEN_LABEL: jmp REAL_BRANCH_TAKEN_LABEL
1074#endif
1075  OUT:
1076
1077*/
1078
1079/* Output asm code for a signed-compare LT/LE conditional branch.  */
1080static const char *
1081pru_output_ltle_signed_cbranch (rtx *operands, bool is_near)
1082{
1083  static char buf[1024];
1084  enum rtx_code code = GET_CODE (operands[0]);
1085  rtx op1;
1086  rtx op2;
1087  const char *cmp_opstr;
1088  int bufi = 0;
1089
1090  op1 = operands[1];
1091  op2 = operands[2];
1092
1093  gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1094
1095  /* Determine the comparison operators for positive and negative operands.  */
1096  if (code == LT)
1097    cmp_opstr = "qblt";
1098  else if (code == LE)
1099    cmp_opstr = "qble";
1100  else
1101    gcc_unreachable ();
1102
1103  if (is_near)
1104    bufi = snprintf (buf, sizeof (buf),
1105		     "qbbc\t.+12, %%1, %d\n\t"
1106		     "qbbc\t%%l3, %%2, %d\n\t"  /* OP1_NEG.  */
1107		     "%s\t%%l3, %%2, %%1\n\t"   /* OP1_NEG_OP2_NEG.  */
1108		     "qbbs\t.+8, %%2, %d\n\t"   /* OP1_POS.  */
1109		     "%s\t%%l3, %%2, %%1",	/* OP1_POS_OP2_POS.  */
1110		     sign_bit_position (op1),
1111		     sign_bit_position (op2),
1112		     cmp_opstr,
1113		     sign_bit_position (op2),
1114		     cmp_opstr);
1115  else
1116    bufi = snprintf (buf, sizeof (buf),
1117		     "qbbc\t.+12, %%1, %d\n\t"
1118		     "qbbc\t.+20, %%2, %d\n\t"  /* OP1_NEG.  */
1119		     "%s\t.+16, %%2, %%1\n\t"   /* OP1_NEG_OP2_NEG.  */
1120		     "qbbs\t.+16, %%2, %d\n\t"  /* OP1_POS.  */
1121		     "%s\t.+8, %%2, %%1\n\t"    /* OP1_POS_OP2_POS.  */
1122		     "jmp\t.+8\n\t"		/* jmp OUT.  */
1123		     "jmp\t%%%%label(%%l3)",	/* BRANCH_TAKEN_LABEL.  */
1124		     sign_bit_position (op1),
1125		     sign_bit_position (op2),
1126		     cmp_opstr,
1127		     sign_bit_position (op2),
1128		     cmp_opstr);
1129
1130  gcc_assert (bufi > 0);
1131  gcc_assert ((unsigned int) bufi < sizeof (buf));
1132
1133  return buf;
1134}
1135
1136/* Output asm code for a signed-compare GT/GE conditional branch.  */
1137static const char *
1138pru_output_gtge_signed_cbranch (rtx *operands, bool is_near)
1139{
1140  static char buf[1024];
1141  enum rtx_code code = GET_CODE (operands[0]);
1142  rtx op1;
1143  rtx op2;
1144  const char *cmp_opstr;
1145  int bufi = 0;
1146
1147  op1 = operands[1];
1148  op2 = operands[2];
1149
1150  gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1151
1152  /* Determine the comparison operators for positive and negative operands.  */
1153  if (code == GT)
1154    cmp_opstr = "qbgt";
1155  else if (code == GE)
1156    cmp_opstr = "qbge";
1157  else
1158    gcc_unreachable ();
1159
1160  if (is_near)
1161    bufi = snprintf (buf, sizeof (buf),
1162		     "qbbs\t.+12, %%1, %d\n\t"
1163		     "qbbs\t%%l3, %%2, %d\n\t"  /* OP1_POS.  */
1164		     "%s\t%%l3, %%2, %%1\n\t"   /* OP1_POS_OP2_POS.  */
1165		     "qbbc\t.+8, %%2, %d\n\t"   /* OP1_NEG.  */
1166		     "%s\t%%l3, %%2, %%1",      /* OP1_NEG_OP2_NEG.  */
1167		     sign_bit_position (op1),
1168		     sign_bit_position (op2),
1169		     cmp_opstr,
1170		     sign_bit_position (op2),
1171		     cmp_opstr);
1172  else
1173    bufi = snprintf (buf, sizeof (buf),
1174		     "qbbs\t.+12, %%1, %d\n\t"
1175		     "qbbs\t.+20, %%2, %d\n\t"  /* OP1_POS.  */
1176		     "%s\t.+16, %%2, %%1\n\t"   /* OP1_POS_OP2_POS.  */
1177		     "qbbc\t.+16, %%2, %d\n\t"  /* OP1_NEG.  */
1178		     "%s\t.+8, %%2, %%1\n\t"    /* OP1_NEG_OP2_NEG.  */
1179		     "jmp\t.+8\n\t"		/* jmp OUT.  */
1180		     "jmp\t%%%%label(%%l3)",	/* BRANCH_TAKEN_LABEL.  */
1181		     sign_bit_position (op1),
1182		     sign_bit_position (op2),
1183		     cmp_opstr,
1184		     sign_bit_position (op2),
1185		     cmp_opstr);
1186
1187  gcc_assert (bufi > 0);
1188  gcc_assert ((unsigned int) bufi < sizeof (buf));
1189
1190  return buf;
1191}
1192
1193/* Output asm code for a signed-compare conditional branch.
1194
1195   If IS_NEAR is true, then QBBx instructions may be used for reaching
1196   the destination label.  Otherwise JMP is used, at the expense of
1197   increased code size.  */
1198const char *
1199pru_output_signed_cbranch (rtx *operands, bool is_near)
1200{
1201  enum rtx_code code = GET_CODE (operands[0]);
1202
1203  if (code == LT || code == LE)
1204    return pru_output_ltle_signed_cbranch (operands, is_near);
1205  else if (code == GT || code == GE)
1206    return pru_output_gtge_signed_cbranch (operands, is_near);
1207  else
1208    gcc_unreachable ();
1209}
1210
1211/* Optimized version of pru_output_signed_cbranch for constant second
1212   operand.  */
1213
1214const char *
1215pru_output_signed_cbranch_ubyteop2 (rtx *operands, bool is_near)
1216{
1217  static char buf[1024];
1218  enum rtx_code code = GET_CODE (operands[0]);
1219  int regop_sign_bit_pos = sign_bit_position (operands[1]);
1220  const char *cmp_opstr;
1221  const char *rcmp_opstr;
1222
1223  /* We must swap operands due to PRU's demand OP1 to be the immediate.  */
1224  code = swap_condition (code);
1225
1226  /* Determine normal and reversed comparison operators for both positive
1227     operands.  This enables us to go completely unsigned.
1228
1229     NOTE: We cannot use the R print modifier because we convert signed
1230     comparison operators to unsigned ones.  */
1231  switch (code)
1232    {
1233    case LT: cmp_opstr = "qblt"; rcmp_opstr = "qbge"; break;
1234    case LE: cmp_opstr = "qble"; rcmp_opstr = "qbgt"; break;
1235    case GT: cmp_opstr = "qbgt"; rcmp_opstr = "qble"; break;
1236    case GE: cmp_opstr = "qbge"; rcmp_opstr = "qblt"; break;
1237    default: gcc_unreachable ();
1238    }
1239
1240  /* OP2 is a constant unsigned byte - utilize this info to generate
1241     optimized code.  We can "remove half" of the op table above because
1242     we know that OP2.b31 = 0 (remember that 0 <= OP2 <= 255).  */
1243  if (code == LT || code == LE)
1244    {
1245      if (is_near)
1246	snprintf (buf, sizeof (buf),
1247		  "qbbs\t.+8, %%1, %d\n\t"
1248		  "%s\t%%l3, %%1, %%u2",
1249		  regop_sign_bit_pos,
1250		  cmp_opstr);
1251      else
1252	snprintf (buf, sizeof (buf),
1253		  "qbbs\t.+12, %%1, %d\n\t"
1254		  "%s\t.+8, %%1, %%u2\n\t"
1255		  "jmp\t%%%%label(%%l3)",
1256		  regop_sign_bit_pos,
1257		  rcmp_opstr);
1258    }
1259  else if (code == GT || code == GE)
1260    {
1261      if (is_near)
1262	snprintf (buf, sizeof (buf),
1263		  "qbbs\t%%l3, %%1, %d\n\t"
1264		  "%s\t%%l3, %%1, %%u2",
1265		  regop_sign_bit_pos,
1266		  cmp_opstr);
1267      else
1268	snprintf (buf, sizeof (buf),
1269		  "qbbs\t.+8, %%1, %d\n\t"
1270		  "%s\t.+8, %%1, %%u2\n\t"
1271		  "jmp\t%%%%label(%%l3)",
1272		  regop_sign_bit_pos,
1273		  rcmp_opstr);
1274    }
1275  else
1276    gcc_unreachable ();
1277
1278  return buf;
1279}
1280
1281/* Optimized version of pru_output_signed_cbranch_ubyteop2 for constant
1282   zero second operand.  */
1283
1284const char *
1285pru_output_signed_cbranch_zeroop2 (rtx *operands, bool is_near)
1286{
1287  static char buf[1024];
1288  enum rtx_code code = GET_CODE (operands[0]);
1289  int regop_sign_bit_pos = sign_bit_position (operands[1]);
1290
1291  /* OP2 is a constant zero - utilize this info to simply check the
1292     OP1 sign bit when comparing for LT or GE.  */
1293  if (code == LT)
1294    {
1295      if (is_near)
1296	snprintf (buf, sizeof (buf),
1297		  "qbbs\t%%l3, %%1, %d\n\t",
1298		  regop_sign_bit_pos);
1299      else
1300	snprintf (buf, sizeof (buf),
1301		  "qbbc\t.+8, %%1, %d\n\t"
1302		  "jmp\t%%%%label(%%l3)",
1303		  regop_sign_bit_pos);
1304    }
1305  else if (code == GE)
1306    {
1307      if (is_near)
1308	snprintf (buf, sizeof (buf),
1309		  "qbbc\t%%l3, %%1, %d\n\t",
1310		  regop_sign_bit_pos);
1311      else
1312	snprintf (buf, sizeof (buf),
1313		  "qbbs\t.+8, %%1, %d\n\t"
1314		  "jmp\t%%%%label(%%l3)",
1315		  regop_sign_bit_pos);
1316    }
1317  else
1318    gcc_unreachable ();
1319
1320  return buf;
1321}
1322
1323/* Addressing Modes.  */
1324
1325/* Return true if register REGNO is a valid base register.
1326   STRICT_P is true if REG_OK_STRICT is in effect.  */
1327
1328bool
1329pru_regno_ok_for_base_p (int regno, bool strict_p)
1330{
1331  if (!HARD_REGISTER_NUM_P (regno) && !strict_p)
1332    return true;
1333
1334  /* The fake registers will be eliminated to either the stack or
1335     hard frame pointer, both of which are usually valid base registers.
1336     Reload deals with the cases where the eliminated form isn't valid.  */
1337  return (GP_REG_P (regno)
1338	  || regno == FRAME_POINTER_REGNUM
1339	  || regno == ARG_POINTER_REGNUM);
1340}
1341
1342/* Return true if given xbbo constant OFFSET is valid.  */
1343static bool
1344pru_valid_const_ubyte_offset (machine_mode mode, HOST_WIDE_INT offset)
1345{
1346  bool valid = UBYTE_INT (offset);
1347
1348  /* Reload can split multi word accesses, so make sure we can address
1349     the second word in a DI.  */
1350  if (valid && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode))
1351    valid = UBYTE_INT (offset + GET_MODE_SIZE (mode) - 1);
1352
1353  return valid;
1354}
1355
1356/* Recognize a CTABLE base address.  Return CTABLE entry index, or -1 if
1357   base was not found in the pragma-filled pru_ctable.  */
1358int
1359pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
1360{
1361  unsigned int i;
1362
1363  for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1364    {
1365      if (pru_ctable[i].valid && pru_ctable[i].base == caddr)
1366	return i;
1367    }
1368  return -1;
1369}
1370
1371
1372/* Check if the given address can be addressed via CTABLE_BASE + UBYTE_OFFS,
1373   and return the base CTABLE index if possible.  */
1374int
1375pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
1376{
1377  unsigned int i;
1378
1379  for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1380    {
1381      if (pru_ctable[i].valid && IN_RANGE (caddr,
1382					   pru_ctable[i].base,
1383					   pru_ctable[i].base + 0xff))
1384	return i;
1385    }
1386  return -1;
1387}
1388
1389
1390/* Return the offset from some CTABLE base for this address.  */
1391int
1392pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr)
1393{
1394  int i;
1395
1396  i = pru_get_ctable_base_index (caddr);
1397  gcc_assert (i >= 0);
1398
1399  return caddr - pru_ctable[i].base;
1400}
1401
1402/* Return true if the address expression formed by BASE + OFFSET is
1403   valid.
1404
1405   Note that the following address is not handled here:
1406	  base CTABLE constant base + UBYTE constant offset
1407   The constants will be folded.  The ctable_addr_operand predicate will take
1408   care of the validation.  The CTABLE base+offset split will happen during
1409   operand printing.  */
1410static bool
1411pru_valid_addr_expr_p (machine_mode mode, rtx base, rtx offset, bool strict_p)
1412{
1413  if (!strict_p && GET_CODE (base) == SUBREG)
1414    base = SUBREG_REG (base);
1415  if (!strict_p && GET_CODE (offset) == SUBREG)
1416    offset = SUBREG_REG (offset);
1417
1418  if (REG_P (base)
1419      && pru_regno_ok_for_base_p (REGNO (base), strict_p)
1420      && ((CONST_INT_P (offset)
1421	      && pru_valid_const_ubyte_offset (mode, INTVAL (offset)))
1422	  || (REG_P (offset)
1423	      && pru_regno_ok_for_index_p (REGNO (offset), strict_p))))
1424    /*     base register + register offset
1425     * OR  base register + UBYTE constant offset.  */
1426    return true;
1427  else if (REG_P (base)
1428	   && pru_regno_ok_for_index_p (REGNO (base), strict_p)
1429	   && ctable_base_operand (offset, VOIDmode))
1430    /*     base CTABLE constant base + register offset
1431     * Note: GCC always puts the register as a first operand of PLUS.  */
1432    return true;
1433  else
1434    return false;
1435}
1436
1437/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
1438static bool
1439pru_legitimate_address_p (machine_mode mode,
1440			    rtx operand, bool strict_p)
1441{
1442  switch (GET_CODE (operand))
1443    {
1444    /* Direct.  */
1445    case SYMBOL_REF:
1446    case LABEL_REF:
1447    case CONST:
1448    case CONST_WIDE_INT:
1449      return false;
1450
1451    case CONST_INT:
1452      return ctable_addr_operand (operand, VOIDmode);
1453
1454      /* Register indirect.  */
1455    case REG:
1456      return pru_regno_ok_for_base_p (REGNO (operand), strict_p);
1457
1458      /* Register indirect with displacement.  */
1459    case PLUS:
1460      {
1461	rtx op0 = XEXP (operand, 0);
1462	rtx op1 = XEXP (operand, 1);
1463
1464	return pru_valid_addr_expr_p (mode, op0, op1, strict_p);
1465      }
1466
1467    default:
1468      break;
1469    }
1470  return false;
1471}
1472
1473/* Output assembly language related definitions.  */
1474
1475/* Implement TARGET_ASM_CONSTRUCTOR.  */
1476static void
1477pru_elf_asm_constructor (rtx symbol, int priority)
1478{
1479  char buf[23];
1480  section *s;
1481
1482  if (priority == DEFAULT_INIT_PRIORITY)
1483    snprintf (buf, sizeof (buf), ".init_array");
1484  else
1485    {
1486      /* While priority is known to be in range [0, 65535], so 18 bytes
1487	 would be enough, the compiler might not know that.  To avoid
1488	 -Wformat-truncation false positive, use a larger size.  */
1489      snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
1490    }
1491  s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1492  switch_to_section (s);
1493  assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1494}
1495
1496/* Implement TARGET_ASM_DESTRUCTOR.  */
1497static void
1498pru_elf_asm_destructor (rtx symbol, int priority)
1499{
1500  char buf[23];
1501  section *s;
1502
1503  if (priority == DEFAULT_INIT_PRIORITY)
1504    snprintf (buf, sizeof (buf), ".fini_array");
1505  else
1506    {
1507      /* While priority is known to be in range [0, 65535], so 18 bytes
1508	 would be enough, the compiler might not know that.  To avoid
1509	 -Wformat-truncation false positive, use a larger size.  */
1510      snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
1511    }
1512  s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1513  switch_to_section (s);
1514  assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1515}
1516
1517/* Map rtx_code to unsigned PRU branch op suffix.  Callers must
1518   handle sign comparison themselves for signed operations.  */
1519static const char *
1520pru_comparison_str (enum rtx_code cond)
1521{
1522  switch (cond)
1523    {
1524    case NE:  return "ne";
1525    case EQ:  return "eq";
1526    case GEU: return "ge";
1527    case GTU: return "gt";
1528    case LEU: return "le";
1529    case LTU: return "lt";
1530    default: gcc_unreachable ();
1531    }
1532}
1533
1534/* Access some RTX as INT_MODE.  If X is a CONST_FIXED we can get
1535   the bit representation of X by "casting" it to CONST_INT.  */
1536
1537static rtx
1538pru_to_int_mode (rtx x)
1539{
1540  machine_mode mode = GET_MODE (x);
1541
1542  return VOIDmode == mode
1543    ? x
1544    : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0);
1545}
1546
1547/* Translate between the MachineDescription notion
1548   of 8-bit consecutive registers, to the PRU
1549   assembler syntax of REGWORD[.SUBREG].  */
1550static const char *
1551pru_asm_regname (rtx op)
1552{
1553  static char canon_reg_names[3][LAST_GP_REGNUM][8];
1554  int speci, regi;
1555
1556  gcc_assert (REG_P (op));
1557
1558  if (!canon_reg_names[0][0][0])
1559    {
1560      for (regi = 0; regi < LAST_GP_REGNUM; regi++)
1561	for (speci = 0; speci < 3; speci++)
1562	  {
1563	    const int sz = (speci == 0) ? 1 : ((speci == 1) ? 2 : 4);
1564	    if ((regi + sz) > (32 * 4))
1565	      continue;	/* Invalid entry.  */
1566
1567	    /* Construct the lookup table.  */
1568	    const char *suffix = "";
1569
1570	    switch ((sz << 8) | (regi % 4))
1571	      {
1572	      case (1 << 8) | 0: suffix = ".b0"; break;
1573	      case (1 << 8) | 1: suffix = ".b1"; break;
1574	      case (1 << 8) | 2: suffix = ".b2"; break;
1575	      case (1 << 8) | 3: suffix = ".b3"; break;
1576	      case (2 << 8) | 0: suffix = ".w0"; break;
1577	      case (2 << 8) | 1: suffix = ".w1"; break;
1578	      case (2 << 8) | 2: suffix = ".w2"; break;
1579	      case (4 << 8) | 0: suffix = ""; break;
1580	      default:
1581		/* Invalid entry.  */
1582		continue;
1583	      }
1584	    sprintf (&canon_reg_names[speci][regi][0],
1585		     "r%d%s", regi / 4, suffix);
1586	  }
1587    }
1588
1589  switch (GET_MODE_SIZE (GET_MODE (op)))
1590    {
1591    case 1: speci = 0; break;
1592    case 2: speci = 1; break;
1593    case 4: speci = 2; break;
1594    case 8: speci = 2; break; /* Existing GCC test cases are not using %F.  */
1595    default: gcc_unreachable ();
1596    }
1597  regi = REGNO (op);
1598  gcc_assert (regi < LAST_GP_REGNUM);
1599  gcc_assert (canon_reg_names[speci][regi][0]);
1600
1601  return &canon_reg_names[speci][regi][0];
1602}
1603
1604/* Print the operand OP to file stream FILE modified by LETTER.
1605   LETTER can be one of:
1606
1607     b: prints the register byte start (used by LBBO/SBBO).
1608     B: prints 'c' or 'b' for CTABLE or REG base in a memory address.
1609     F: Full 32-bit register.
1610     H: Higher 16-bits of a const_int operand.
1611     L: Lower 16-bits of a const_int operand.
1612     N: prints next 32-bit register (upper 32bits of a 64bit REG couple).
1613     P: prints swapped condition.
1614     Q: prints swapped and reversed condition.
1615     R: prints reversed condition.
1616     S: print operand mode size (but do not print the operand itself).
1617     T: print exact_log2 () for const_int operands.
1618     u: print QI constant integer as unsigned.  No transformation for regs.
1619     V: print exact_log2 () of negated const_int operands.
1620     w: Lower 32-bits of a const_int operand.
1621     W: Upper 32-bits of a const_int operand.
1622     y: print the next 8-bit register (regardless of op size).
1623     z: print the second next 8-bit register (regardless of op size).
1624*/
1625static void
1626pru_print_operand (FILE *file, rtx op, int letter)
1627{
1628  switch (letter)
1629    {
1630    case 'S':
1631      fprintf (file, "%d", GET_MODE_SIZE (GET_MODE (op)));
1632      return;
1633
1634    default:
1635      break;
1636    }
1637
1638  if (comparison_operator (op, VOIDmode))
1639    {
1640      enum rtx_code cond = GET_CODE (op);
1641      gcc_assert (!pru_signed_cmp_operator (op, VOIDmode));
1642
1643      switch (letter)
1644	{
1645	case 0:
1646	  fprintf (file, "%s", pru_comparison_str (cond));
1647	  return;
1648	case 'P':
1649	  fprintf (file, "%s", pru_comparison_str (swap_condition (cond)));
1650	  return;
1651	case 'Q':
1652	  cond = swap_condition (cond);
1653	  /* Fall through to reverse.  */
1654	case 'R':
1655	  fprintf (file, "%s", pru_comparison_str (reverse_condition (cond)));
1656	  return;
1657	}
1658    }
1659
1660  switch (GET_CODE (op))
1661    {
1662    case REG:
1663      if (letter == 0 || letter == 'u')
1664	{
1665	  fprintf (file, "%s", pru_asm_regname (op));
1666	  return;
1667	}
1668      else if (letter == 'b')
1669	{
1670	  if (REGNO (op) > LAST_NONIO_GP_REGNUM)
1671	    {
1672	      output_operand_lossage ("I/O register operand for '%%%c'",
1673				      letter);
1674	      return;
1675	    }
1676	  fprintf (file, "r%d.b%d", REGNO (op) / 4, REGNO (op) % 4);
1677	  return;
1678	}
1679      else if (letter == 'F' || letter == 'N')
1680	{
1681	  if (REGNO (op) > LAST_NONIO_GP_REGNUM - 1)
1682	    {
1683	      output_operand_lossage ("I/O register operand for '%%%c'",
1684				      letter);
1685	      return;
1686	    }
1687	  if (REGNO (op) % 4 != 0)
1688	    {
1689	      output_operand_lossage ("non 32 bit register operand for '%%%c'",
1690				      letter);
1691	      return;
1692	    }
1693	  fprintf (file, "r%d", REGNO (op) / 4 + (letter == 'N' ? 1 : 0));
1694	  return;
1695	}
1696      else if (letter == 'y')
1697	{
1698	  if (REGNO (op) > LAST_NONIO_GP_REGNUM - 1)
1699	    {
1700	      output_operand_lossage ("invalid operand for '%%%c'", letter);
1701	      return;
1702	    }
1703	  fprintf (file, "%s", reg_names[REGNO (op) + 1]);
1704	  return;
1705	}
1706      else if (letter == 'z')
1707	{
1708	  if (REGNO (op) > LAST_NONIO_GP_REGNUM - 2)
1709	    {
1710	      output_operand_lossage ("invalid operand for '%%%c'", letter);
1711	      return;
1712	    }
1713	  fprintf (file, "%s", reg_names[REGNO (op) + 2]);
1714	  return;
1715	}
1716      break;
1717
1718    case CONST_INT:
1719      if (letter == 'H')
1720	{
1721	  HOST_WIDE_INT val = INTVAL (op);
1722	  val = (val >> 16) & 0xFFFF;
1723	  output_addr_const (file, gen_int_mode (val, SImode));
1724	  return;
1725	}
1726      else if (letter == 'L')
1727	{
1728	  HOST_WIDE_INT val = INTVAL (op);
1729	  val &= 0xFFFF;
1730	  output_addr_const (file, gen_int_mode (val, SImode));
1731	  return;
1732	}
1733      else if (letter == 'T')
1734	{
1735	  /* The predicate should have already validated the 1-high-bit
1736	     requirement.  Use CTZ here to deal with constant's sign
1737	     extension.  */
1738	  HOST_WIDE_INT val = wi::ctz (INTVAL (op));
1739	  if (val < 0 || val > 31)
1740	    {
1741	      output_operand_lossage ("invalid operand for '%%%c'", letter);
1742	      return;
1743	    }
1744	  output_addr_const (file, gen_int_mode (val, SImode));
1745	  return;
1746	}
1747      else if (letter == 'V')
1748	{
1749	  HOST_WIDE_INT val = wi::ctz (~INTVAL (op));
1750	  if (val < 0 || val > 31)
1751	    {
1752	      output_operand_lossage ("invalid operand for '%%%c'", letter);
1753	      return;
1754	    }
1755	  output_addr_const (file, gen_int_mode (val, SImode));
1756	  return;
1757	}
1758      else if (letter == 'w')
1759	{
1760	  HOST_WIDE_INT val = INTVAL (op) & 0xffffffff;
1761	  output_addr_const (file, gen_int_mode (val, SImode));
1762	  return;
1763	}
1764      else if (letter == 'W')
1765	{
1766	  HOST_WIDE_INT val = (INTVAL (op) >> 32) & 0xffffffff;
1767	  output_addr_const (file, gen_int_mode (val, SImode));
1768	  return;
1769	}
1770      else if (letter == 'u')
1771	{
1772	  /* Workaround GCC's representation of QI constants in sign-extended
1773	     form, and PRU's assembler insistence on unsigned constant
1774	     integers.  See the notes about O constraint.  */
1775	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) & 0xff);
1776	  return;
1777	}
1778      /* Else, fall through.  */
1779
1780    case CONST:
1781    case LABEL_REF:
1782    case SYMBOL_REF:
1783      if (letter == 0)
1784	{
1785	  output_addr_const (file, op);
1786	  return;
1787	}
1788      break;
1789
1790    case CONST_FIXED:
1791	{
1792	  HOST_WIDE_INT ival = INTVAL (pru_to_int_mode (op));
1793	  if (letter != 0)
1794	    output_operand_lossage ("unsupported code '%c' for fixed-point:",
1795				    letter);
1796	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
1797	  return;
1798	}
1799      break;
1800
1801    case CONST_DOUBLE:
1802      if (letter == 0)
1803	{
1804	  long val;
1805
1806	  if (GET_MODE (op) != SFmode)
1807	    {
1808	      output_operand_lossage ("double constants not supported");
1809	      return;
1810	    }
1811	  REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), val);
1812	  fprintf (file, "0x%lx", val);
1813	  return;
1814	}
1815      else if (letter == 'w' || letter == 'W')
1816	{
1817	  long t[2];
1818	  REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), t);
1819	  fprintf (file, "0x%lx", t[letter == 'w' ? 0 : 1]);
1820	  return;
1821	}
1822      else
1823	{
1824	  output_operand_lossage ("invalid operand for '%%%c'", letter);
1825	  return;
1826	}
1827      break;
1828
1829    case SUBREG:
1830      /* Subregs should not appear at so late stage.  */
1831      gcc_unreachable ();
1832      break;
1833
1834    case MEM:
1835      if (letter == 0)
1836	{
1837	  output_address (VOIDmode, op);
1838	  return;
1839	}
1840      else if (letter == 'B')
1841	{
1842	  rtx base = XEXP (op, 0);
1843	  if (GET_CODE (base) == PLUS)
1844	    {
1845	      rtx op0 = XEXP (base, 0);
1846	      rtx op1 = XEXP (base, 1);
1847
1848	      /* PLUS cannot have two constant operands, so first one
1849		 of them must be a REG, hence we must check for an
1850		 exact base address.  */
1851	      if (ctable_base_operand (op1, VOIDmode))
1852		{
1853		  fprintf (file, "c");
1854		  return;
1855		}
1856	      else if (REG_P (op0))
1857		{
1858		  fprintf (file, "b");
1859		  return;
1860		}
1861	      else
1862		gcc_unreachable ();
1863	    }
1864	  else if (REG_P (base))
1865	    {
1866	      fprintf (file, "b");
1867	      return;
1868	    }
1869	  else if (ctable_addr_operand (base, VOIDmode))
1870	    {
1871	      fprintf (file, "c");
1872	      return;
1873	    }
1874	  else
1875	    gcc_unreachable ();
1876	}
1877      break;
1878
1879    case CODE_LABEL:
1880      if (letter == 0)
1881	{
1882	  output_addr_const (file, op);
1883	  return;
1884	}
1885      break;
1886
1887    default:
1888      break;
1889    }
1890
1891  output_operand_lossage ("unsupported operand %s for code '%c'",
1892			  GET_RTX_NAME (GET_CODE (op)), letter);
1893}
1894
1895/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
1896static void
1897pru_print_operand_address (FILE *file, machine_mode mode, rtx op)
1898{
1899  if (CONSTANT_ADDRESS_P (op) && text_segment_operand (op, VOIDmode))
1900    {
1901      output_operand_lossage ("unexpected text address:");
1902      return;
1903    }
1904
1905  switch (GET_CODE (op))
1906    {
1907    case CONST:
1908    case LABEL_REF:
1909    case CONST_WIDE_INT:
1910    case SYMBOL_REF:
1911      break;
1912
1913    case CONST_INT:
1914      {
1915	unsigned HOST_WIDE_INT caddr = INTVAL (op);
1916	int base = pru_get_ctable_base_index (caddr);
1917	int offs = pru_get_ctable_base_offset (caddr);
1918	if (base < 0)
1919	  {
1920	    output_operand_lossage ("unsupported constant address:");
1921	    return;
1922	  }
1923	fprintf (file, "%d, %d", base, offs);
1924	return;
1925      }
1926      break;
1927
1928    case PLUS:
1929      {
1930	int base;
1931	rtx op0 = XEXP (op, 0);
1932	rtx op1 = XEXP (op, 1);
1933
1934	if (REG_P (op0) && CONST_INT_P (op1)
1935	    && pru_get_ctable_exact_base_index (INTVAL (op1)) >= 0)
1936	  {
1937	    base = pru_get_ctable_exact_base_index (INTVAL (op1));
1938	    fprintf (file, "%d, %s", base, pru_asm_regname (op0));
1939	    return;
1940	  }
1941	else if (REG_P (op1) && CONST_INT_P (op0)
1942		 && pru_get_ctable_exact_base_index (INTVAL (op0)) >= 0)
1943	  {
1944	    /* Not a valid RTL.  */
1945	    gcc_unreachable ();
1946	  }
1947	else if (REG_P (op0) && CONSTANT_P (op1))
1948	  {
1949	    fprintf (file, "%s, ", pru_asm_regname (op0));
1950	    output_addr_const (file, op1);
1951	    return;
1952	  }
1953	else if (REG_P (op1) && CONSTANT_P (op0))
1954	  {
1955	    /* Not a valid RTL.  */
1956	    gcc_unreachable ();
1957	  }
1958	else if (REG_P (op1) && REG_P (op0))
1959	  {
1960	    fprintf (file, "%s, %s", pru_asm_regname (op0),
1961				     pru_asm_regname (op1));
1962	    return;
1963	  }
1964      }
1965      break;
1966
1967    case REG:
1968      fprintf (file, "%s, 0", pru_asm_regname (op));
1969      return;
1970
1971    case MEM:
1972      {
1973	rtx base = XEXP (op, 0);
1974	pru_print_operand_address (file, mode, base);
1975	return;
1976      }
1977    default:
1978      break;
1979    }
1980
1981  output_operand_lossage ("unsupported memory expression:");
1982}
1983
1984/* Implement TARGET_ASM_FUNCTION_PROLOGUE.  */
1985static void
1986pru_asm_function_prologue (FILE *file)
1987{
1988  if (flag_verbose_asm || flag_debug_asm)
1989    pru_dump_frame_layout (file);
1990}
1991
1992/* Implement `TARGET_ASM_INTEGER'.
1993   Target hook for assembling integer objects.  PRU version needs
1994   special handling for references to pmem.  Code copied from AVR.  */
1995
1996static bool
1997pru_assemble_integer (rtx x, unsigned int size, int aligned_p)
1998{
1999  if (size == POINTER_SIZE / BITS_PER_UNIT
2000      && aligned_p
2001      && text_segment_operand (x, VOIDmode))
2002    {
2003      fputs ("\t.4byte\t%pmem(", asm_out_file);
2004      output_addr_const (asm_out_file, x);
2005      fputs (")\n", asm_out_file);
2006
2007      return true;
2008    }
2009  else if (size == INIT_ARRAY_ENTRY_BYTES
2010	   && aligned_p
2011	   && text_segment_operand (x, VOIDmode))
2012    {
2013      fputs ("\t.2byte\t%pmem(", asm_out_file);
2014      output_addr_const (asm_out_file, x);
2015      fputs (")\n", asm_out_file);
2016
2017      return true;
2018    }
2019  else
2020    {
2021      return default_assemble_integer (x, size, aligned_p);
2022    }
2023}
2024
2025/* Implement TARGET_ASM_FILE_START.  */
2026
2027static void
2028pru_file_start (void)
2029{
2030  default_file_start ();
2031
2032  /* Compiler will take care of placing %label, so there is no
2033     need to confuse users with this warning.  */
2034  fprintf (asm_out_file, "\t.set no_warn_regname_label\n");
2035}
2036
2037/* Function argument related.  */
2038
2039/* Return the number of bytes needed for storing an argument with
2040   the given MODE and TYPE.  */
2041static int
2042pru_function_arg_size (machine_mode mode, const_tree type)
2043{
2044  HOST_WIDE_INT param_size;
2045
2046  if (mode == BLKmode)
2047    param_size = int_size_in_bytes (type);
2048  else
2049    param_size = GET_MODE_SIZE (mode);
2050
2051  /* Convert to words (round up).  */
2052  param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
2053  gcc_assert (param_size >= 0);
2054
2055  return param_size;
2056}
2057
2058/* Check if argument with the given size must be
2059   passed/returned in a register.
2060
2061   Reference:
2062   https://e2e.ti.com/support/development_tools/compiler/f/343/p/650176/2393029
2063
2064   Arguments other than 8/16/24/32/64bits are passed on stack.  */
2065static bool
2066pru_arg_in_reg_bysize (size_t sz)
2067{
2068  return sz == 1 || sz == 2 || sz == 3 || sz == 4 || sz == 8;
2069}
2070
2071/* Helper function to get the starting storage HW register for an argument,
2072   or -1 if it must be passed on stack.  The cum_v state is not changed.  */
2073static int
2074pru_function_arg_regi (cumulative_args_t cum_v,
2075		       machine_mode mode, const_tree type,
2076		       bool named)
2077{
2078  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2079  size_t argsize = pru_function_arg_size (mode, type);
2080  size_t i, bi;
2081  int regi = -1;
2082
2083  if (!pru_arg_in_reg_bysize (argsize))
2084    return -1;
2085
2086  if (!named)
2087    return -1;
2088
2089  /* Find the first available slot that fits.  Yes, that's the PRU ABI.  */
2090  for (i = 0; regi < 0 && i < ARRAY_SIZE (cum->regs_used); i++)
2091    {
2092      /* VLAs and vector types are not defined in the PRU ABI.  Let's
2093	 handle them the same as their same-sized counterparts.  This way
2094	 we do not need to treat BLKmode differently, and need only to check
2095	 the size.  */
2096      gcc_assert (argsize == 1 || argsize == 2 || argsize == 3
2097		  || argsize == 4 || argsize == 8);
2098
2099      /* Ensure SI and DI arguments are stored in full registers only.  */
2100      if ((argsize >= 4) && (i % 4) != 0)
2101	continue;
2102
2103      /* Structures with size 24 bits are passed starting at a full
2104	 register boundary.  */
2105      if (argsize == 3 && (i % 4) != 0)
2106	continue;
2107
2108      /* rX.w0/w1/w2 are OK.  But avoid spreading the second byte
2109	 into a different full register.  */
2110      if (argsize == 2 && (i % 4) == 3)
2111	continue;
2112
2113      for (bi = 0;
2114	   bi < argsize && (bi + i) < ARRAY_SIZE (cum->regs_used);
2115	   bi++)
2116	{
2117	  if (cum->regs_used[bi + i])
2118	    break;
2119	}
2120      if (bi == argsize)
2121	regi = FIRST_ARG_REGNUM + i;
2122    }
2123
2124  return regi;
2125}
2126
2127/* Mark CUM_V that a function argument will occupy HW register slot starting
2128   at REGI.  The number of consecutive 8-bit HW registers marked as occupied
2129   depends on the MODE and TYPE of the argument.  */
2130static void
2131pru_function_arg_regi_mark_slot (int regi,
2132				 cumulative_args_t cum_v,
2133				 machine_mode mode, const_tree type,
2134				 bool named)
2135{
2136  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2137  HOST_WIDE_INT param_size = pru_function_arg_size (mode, type);
2138
2139  gcc_assert (named);
2140
2141  /* Mark all byte sub-registers occupied by argument as used.  */
2142  while (param_size--)
2143    {
2144      gcc_assert (regi >= FIRST_ARG_REGNUM && regi <= LAST_ARG_REGNUM);
2145      gcc_assert (!cum->regs_used[regi - FIRST_ARG_REGNUM]);
2146      cum->regs_used[regi - FIRST_ARG_REGNUM] = true;
2147      regi++;
2148    }
2149}
2150
2151/* Define where to put the arguments to a function.  Value is zero to
2152   push the argument on the stack, or a hard register in which to
2153   store the argument.
2154
2155   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2156   the preceding args and about the function being called.
2157   ARG is a description of the argument.  */
2158
2159static rtx
2160pru_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
2161{
2162  rtx return_rtx = NULL_RTX;
2163  int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
2164
2165  if (regi >= 0)
2166    return_rtx = gen_rtx_REG (arg.mode, regi);
2167
2168  return return_rtx;
2169}
2170
2171/* Implement TARGET_ARG_PARTIAL_BYTES.  PRU never splits any arguments
2172   between registers and memory, so we can return 0.  */
2173
2174static int
2175pru_arg_partial_bytes (cumulative_args_t, const function_arg_info &)
2176{
2177  return 0;
2178}
2179
2180/* Update the data in CUM to advance over argument ARG.  */
2181
2182static void
2183pru_function_arg_advance (cumulative_args_t cum_v,
2184			  const function_arg_info &arg)
2185{
2186  int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
2187
2188  if (regi >= 0)
2189    pru_function_arg_regi_mark_slot (regi, cum_v, arg.mode,
2190				     arg.type, arg.named);
2191}
2192
2193/* Implement TARGET_FUNCTION_VALUE.  */
2194static rtx
2195pru_function_value (const_tree ret_type, const_tree fn ATTRIBUTE_UNUSED,
2196		      bool outgoing ATTRIBUTE_UNUSED)
2197{
2198  return gen_rtx_REG (TYPE_MODE (ret_type), FIRST_RETVAL_REGNUM);
2199}
2200
2201/* Implement TARGET_LIBCALL_VALUE.  */
2202static rtx
2203pru_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
2204{
2205  return gen_rtx_REG (mode, FIRST_RETVAL_REGNUM);
2206}
2207
2208/* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
2209static bool
2210pru_function_value_regno_p (const unsigned int regno)
2211{
2212  return regno == FIRST_RETVAL_REGNUM;
2213}
2214
2215/* Implement TARGET_RETURN_IN_MEMORY.  */
2216bool
2217pru_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2218{
2219  bool in_memory = (!pru_arg_in_reg_bysize (int_size_in_bytes (type))
2220		    || int_size_in_bytes (type) == -1);
2221
2222  return in_memory;
2223}
2224
2225/* Implement TARGET_CAN_USE_DOLOOP_P.  */
2226
2227static bool
2228pru_can_use_doloop_p (const widest_int &, const widest_int &iterations_max,
2229		      unsigned int loop_depth, bool)
2230{
2231  /* Considering limitations in the hardware, only use doloop
2232     for innermost loops which must be entered from the top.  */
2233  if (loop_depth > 1)
2234    return false;
2235  /* PRU internal loop counter is 16bits wide.  Remember that iterations_max
2236     holds the maximum number of loop latch executions, while PRU loop
2237     instruction needs the count of loop body executions.  */
2238  if (iterations_max == 0 || wi::geu_p (iterations_max, 0xffff))
2239    return false;
2240
2241  return true;
2242}
2243
2244/* NULL if INSN insn is valid within a low-overhead loop.
2245   Otherwise return why doloop cannot be applied.  */
2246
2247static const char *
2248pru_invalid_within_doloop (const rtx_insn *insn)
2249{
2250  if (CALL_P (insn))
2251    return "Function call in the loop.";
2252
2253  if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return)
2254    return "Return from a call instruction in the loop.";
2255
2256  if (NONDEBUG_INSN_P (insn)
2257      && INSN_CODE (insn) < 0
2258      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
2259	  || asm_noperands (PATTERN (insn)) >= 0))
2260    return "Loop contains asm statement.";
2261
2262  return NULL;
2263}
2264
2265
2266/* Figure out where to put LABEL, which is the label for a repeat loop.
2267   The loop ends just before LAST_INSN.  If SHARED, insns other than the
2268   "repeat" might use LABEL to jump to the loop's continuation point.
2269
2270   Return the last instruction in the adjusted loop.  */
2271
2272static rtx_insn *
2273pru_insert_loop_label_last (rtx_insn *last_insn, rtx_code_label *label,
2274			    bool shared)
2275{
2276  rtx_insn *next, *prev;
2277  int count = 0, code, icode;
2278
2279  if (dump_file)
2280    fprintf (dump_file, "considering end of repeat loop at insn %d\n",
2281	     INSN_UID (last_insn));
2282
2283  /* Set PREV to the last insn in the loop.  */
2284  prev = PREV_INSN (last_insn);
2285
2286  /* Set NEXT to the next insn after the loop label.  */
2287  next = last_insn;
2288  if (!shared)
2289    while (prev != 0)
2290      {
2291	code = GET_CODE (prev);
2292	if (code == CALL_INSN || code == CODE_LABEL || code == BARRIER)
2293	  break;
2294
2295	if (INSN_P (prev))
2296	  {
2297	    if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2298	      prev = as_a <rtx_insn *> (XVECEXP (PATTERN (prev), 0, 1));
2299
2300	    /* Other insns that should not be in the last two opcodes.  */
2301	    icode = recog_memoized (prev);
2302	    if (icode < 0
2303		|| icode == CODE_FOR_pruloophi
2304		|| icode == CODE_FOR_pruloopsi)
2305	      break;
2306
2307	    count++;
2308	    next = prev;
2309	    if (dump_file)
2310	      print_rtl_single (dump_file, next);
2311	    if (count == 2)
2312	      break;
2313	  }
2314	prev = PREV_INSN (prev);
2315      }
2316
2317  /* Insert the nops.  */
2318  if (dump_file && count < 2)
2319    fprintf (dump_file, "Adding %d nop%s inside loop\n\n",
2320	     2 - count, count == 1 ? "" : "s");
2321
2322  for (; count < 2; count++)
2323    emit_insn_before (gen_nop (), last_insn);
2324
2325  /* Insert the label.  */
2326  emit_label_before (label, last_insn);
2327
2328  return last_insn;
2329}
2330
2331/* If IS_END is false, expand a canonical doloop_begin RTL into the
2332   PRU-specific doloop_begin_internal.  Otherwise expand doloop_end to
2333   doloop_end_internal.  */
2334void
2335pru_emit_doloop (rtx *operands, int is_end)
2336{
2337  rtx tag;
2338
2339  if (cfun->machine->doloop_tags == 0
2340      || cfun->machine->doloop_tag_from_end == is_end)
2341    {
2342      cfun->machine->doloop_tags++;
2343      cfun->machine->doloop_tag_from_end = is_end;
2344    }
2345
2346  tag = GEN_INT (cfun->machine->doloop_tags - 1);
2347  machine_mode opmode = GET_MODE (operands[0]);
2348  if (is_end)
2349    {
2350      if (opmode == HImode)
2351	emit_jump_insn (gen_doloop_end_internalhi (operands[0],
2352						   operands[1], tag));
2353      else if (opmode == SImode)
2354	emit_jump_insn (gen_doloop_end_internalsi (operands[0],
2355						   operands[1], tag));
2356      else
2357	gcc_unreachable ();
2358    }
2359  else
2360    {
2361      if (opmode == HImode)
2362	emit_insn (gen_doloop_begin_internalhi (operands[0], operands[0], tag));
2363      else if (opmode == SImode)
2364	emit_insn (gen_doloop_begin_internalsi (operands[0], operands[0], tag));
2365      else
2366	gcc_unreachable ();
2367    }
2368}
2369
2370
2371/* Code for converting doloop_begins and doloop_ends into valid
2372   PRU instructions.  Idea and code snippets borrowed from mep port.
2373
2374   A doloop_begin is just a placeholder:
2375
2376	$count = unspec ($count)
2377
2378   where $count is initially the number of iterations.
2379   doloop_end has the form:
2380
2381	if (--$count == 0) goto label
2382
2383   The counter variable is private to the doloop insns, nothing else
2384   relies on its value.
2385
2386   There are three cases, in decreasing order of preference:
2387
2388      1.  A loop has exactly one doloop_begin and one doloop_end.
2389	 The doloop_end branches to the first instruction after
2390	 the doloop_begin.
2391
2392	 In this case we can replace the doloop_begin with a LOOP
2393	 instruction and remove the doloop_end.  I.e.:
2394
2395		$count1 = unspec ($count1)
2396	    label:
2397		...
2398		if (--$count2 != 0) goto label
2399
2400	  becomes:
2401
2402		LOOP end_label,$count1
2403	    label:
2404		...
2405	    end_label:
2406		# end loop
2407
2408      2.  As for (1), except there are several doloop_ends.  One of them
2409	 (call it X) falls through to a label L.  All the others fall
2410	 through to branches to L.
2411
2412	 In this case, we remove X and replace the other doloop_ends
2413	 with branches to the LOOP label.  For example:
2414
2415		$count1 = unspec ($count1)
2416	    label:
2417		...
2418		if (--$count1 != 0) goto label
2419	    end_label:
2420		...
2421		if (--$count2 != 0) goto label
2422		goto end_label
2423
2424	 becomes:
2425
2426		LOOP end_label,$count1
2427	    label:
2428		...
2429	    end_label:
2430		# end repeat
2431		...
2432		goto end_label
2433
2434      3.  The fallback case.  Replace doloop_begins with:
2435
2436		$count = $count
2437
2438	 Replace doloop_ends with the equivalent of:
2439
2440		$count = $count - 1
2441		if ($count != 0) goto loop_label
2442
2443	 */
2444
2445/* A structure describing one doloop_begin.  */
2446struct pru_doloop_begin {
2447  /* The next doloop_begin with the same tag.  */
2448  struct pru_doloop_begin *next;
2449
2450  /* The instruction itself.  */
2451  rtx_insn *insn;
2452
2453  /* The initial counter value.  */
2454  rtx loop_count;
2455
2456  /* The counter register.  */
2457  rtx counter;
2458};
2459
2460/* A structure describing a doloop_end.  */
2461struct pru_doloop_end {
2462  /* The next doloop_end with the same loop tag.  */
2463  struct pru_doloop_end *next;
2464
2465  /* The instruction itself.  */
2466  rtx_insn *insn;
2467
2468  /* The first instruction after INSN when the branch isn't taken.  */
2469  rtx_insn *fallthrough;
2470
2471  /* The location of the counter value.  Since doloop_end_internal is a
2472     jump instruction, it has to allow the counter to be stored anywhere
2473     (any non-fixed register).  */
2474  rtx counter;
2475
2476  /* The target label (the place where the insn branches when the counter
2477     isn't zero).  */
2478  rtx label;
2479
2480  /* A scratch register.  Only available when COUNTER isn't stored
2481     in a general register.  */
2482  rtx scratch;
2483};
2484
2485
2486/* One do-while loop.  */
2487struct pru_doloop {
2488  /* All the doloop_begins for this loop (in no particular order).  */
2489  struct pru_doloop_begin *begin;
2490
2491  /* All the doloop_ends.  When there is more than one, arrange things
2492     so that the first one is the most likely to be X in case (2) above.  */
2493  struct pru_doloop_end *end;
2494};
2495
2496
2497/* Return true if LOOP can be converted into LOOP form
2498   (that is, if it matches cases (1) or (2) above).  */
2499
2500static bool
2501pru_repeat_loop_p (struct pru_doloop *loop)
2502{
2503  struct pru_doloop_end *end;
2504  rtx_insn *fallthrough;
2505
2506  /* There must be exactly one doloop_begin and at least one doloop_end.  */
2507  if (loop->begin == 0 || loop->end == 0 || loop->begin->next != 0)
2508    return false;
2509
2510  /* The first doloop_end (X) must branch back to the insn after
2511     the doloop_begin.  */
2512  if (prev_real_insn (as_a<rtx_insn *> (loop->end->label)) != loop->begin->insn)
2513    return false;
2514
2515  /* Check that the first doloop_end (X) can actually reach
2516     doloop_begin () with U8_PCREL relocation for LOOP instruction.  */
2517  if (get_attr_length (loop->end->insn) != 4)
2518    return false;
2519
2520  /* All the other doloop_ends must branch to the same place as X.
2521     When the branch isn't taken, they must jump to the instruction
2522     after X.  */
2523  fallthrough = loop->end->fallthrough;
2524  for (end = loop->end->next; end != 0; end = end->next)
2525    if (end->label != loop->end->label
2526	|| !simplejump_p (end->fallthrough)
2527	|| fallthrough
2528	   != next_real_insn (JUMP_LABEL_AS_INSN (end->fallthrough)))
2529      return false;
2530
2531  return true;
2532}
2533
2534
2535/* The main repeat reorg function.  See comment above for details.  */
2536
2537static void
2538pru_reorg_loop (rtx_insn *insns)
2539{
2540  rtx_insn *insn;
2541  struct pru_doloop *loops, *loop;
2542  struct pru_doloop_begin *begin;
2543  struct pru_doloop_end *end;
2544  size_t tmpsz;
2545
2546  /* Quick exit if we haven't created any loops.  */
2547  if (cfun->machine->doloop_tags == 0)
2548    return;
2549
2550  /* Create an array of pru_doloop structures.  */
2551  tmpsz = sizeof (loops[0]) * cfun->machine->doloop_tags;
2552  loops = (struct pru_doloop *) alloca (tmpsz);
2553  memset (loops, 0, sizeof (loops[0]) * cfun->machine->doloop_tags);
2554
2555  /* Search the function for do-while insns and group them by loop tag.  */
2556  for (insn = insns; insn; insn = NEXT_INSN (insn))
2557    if (INSN_P (insn))
2558      switch (recog_memoized (insn))
2559	{
2560	case CODE_FOR_doloop_begin_internalhi:
2561	case CODE_FOR_doloop_begin_internalsi:
2562	  insn_extract (insn);
2563	  loop = &loops[INTVAL (recog_data.operand[2])];
2564
2565	  tmpsz = sizeof (struct pru_doloop_begin);
2566	  begin = (struct pru_doloop_begin *) alloca (tmpsz);
2567	  begin->next = loop->begin;
2568	  begin->insn = insn;
2569	  begin->loop_count = recog_data.operand[1];
2570	  begin->counter = recog_data.operand[0];
2571
2572	  loop->begin = begin;
2573	  break;
2574
2575	case CODE_FOR_doloop_end_internalhi:
2576	case CODE_FOR_doloop_end_internalsi:
2577	  insn_extract (insn);
2578	  loop = &loops[INTVAL (recog_data.operand[2])];
2579
2580	  tmpsz = sizeof (struct pru_doloop_end);
2581	  end = (struct pru_doloop_end *) alloca (tmpsz);
2582	  end->insn = insn;
2583	  end->fallthrough = next_real_insn (insn);
2584	  end->counter = recog_data.operand[0];
2585	  end->label = recog_data.operand[1];
2586	  end->scratch = recog_data.operand[3];
2587
2588	  /* If this insn falls through to an unconditional jump,
2589	     give it a lower priority than the others.  */
2590	  if (loop->end != 0 && simplejump_p (end->fallthrough))
2591	    {
2592	      end->next = loop->end->next;
2593	      loop->end->next = end;
2594	    }
2595	  else
2596	    {
2597	      end->next = loop->end;
2598	      loop->end = end;
2599	    }
2600	  break;
2601	}
2602
2603  /* Convert the insns for each loop in turn.  */
2604  for (loop = loops; loop < loops + cfun->machine->doloop_tags; loop++)
2605    if (pru_repeat_loop_p (loop))
2606      {
2607	/* Case (1) or (2).  */
2608	rtx_code_label *repeat_label;
2609	rtx label_ref;
2610
2611	/* Create a new label for the repeat insn.  */
2612	repeat_label = gen_label_rtx ();
2613
2614	/* Replace the doloop_begin with a repeat.  We get rid
2615	   of the iteration register because LOOP instruction
2616	   will utilize an internal for the PRU core LOOP register.  */
2617	label_ref = gen_rtx_LABEL_REF (VOIDmode, repeat_label);
2618	machine_mode loop_mode = GET_MODE (loop->begin->loop_count);
2619	if (loop_mode == HImode)
2620	  emit_insn_before (gen_pruloophi (loop->begin->loop_count, label_ref),
2621			    loop->begin->insn);
2622	else if (loop_mode == SImode)
2623	  {
2624	    rtx loop_rtx = gen_pruloopsi (loop->begin->loop_count, label_ref);
2625	    emit_insn_before (loop_rtx, loop->begin->insn);
2626	  }
2627	else if (loop_mode == VOIDmode)
2628	  {
2629	    gcc_assert (CONST_INT_P (loop->begin->loop_count));
2630	    gcc_assert (UBYTE_INT ( INTVAL (loop->begin->loop_count)));
2631	    rtx loop_rtx = gen_pruloopsi (loop->begin->loop_count, label_ref);
2632	    emit_insn_before (loop_rtx, loop->begin->insn);
2633	  }
2634	else
2635	  gcc_unreachable ();
2636	delete_insn (loop->begin->insn);
2637
2638	/* Insert the repeat label before the first doloop_end.
2639	   Fill the gap with nops if LOOP insn is less than 2
2640	   instructions away than loop->end.  */
2641	pru_insert_loop_label_last (loop->end->insn, repeat_label,
2642				    loop->end->next != 0);
2643
2644	/* Emit a pruloop_end (to improve the readability of the output).  */
2645	emit_insn_before (gen_pruloop_end (), loop->end->insn);
2646
2647	/* HACK: TODO: This is usually not needed, but is required for
2648	   a few rare cases where a JUMP that breaks the loop
2649	   references the LOOP_END address.  In other words, since
2650	   we're missing a real "loop_end" instruction, a loop "break"
2651	   may accidentally reference the loop end itself, and thus
2652	   continuing the cycle.  */
2653	for (insn = NEXT_INSN (loop->end->insn);
2654	     insn != next_real_insn (loop->end->insn);
2655	     insn = NEXT_INSN (insn))
2656	  {
2657	    if (LABEL_P (insn) && LABEL_NUSES (insn) > 0)
2658	      emit_insn_before (gen_nop_loop_guard (), loop->end->insn);
2659	  }
2660
2661	/* Delete the first doloop_end.  */
2662	delete_insn (loop->end->insn);
2663
2664	/* Replace the others with branches to REPEAT_LABEL.  */
2665	for (end = loop->end->next; end != 0; end = end->next)
2666	  {
2667	    rtx_insn *newjmp;
2668	    newjmp = emit_jump_insn_before (gen_jump (repeat_label), end->insn);
2669	    JUMP_LABEL (newjmp) = repeat_label;
2670	    delete_insn (end->insn);
2671	    delete_insn (end->fallthrough);
2672	  }
2673      }
2674    else
2675      {
2676	/* Case (3).  First replace all the doloop_begins with setting
2677	   the HW register used for loop counter.  */
2678	for (begin = loop->begin; begin != 0; begin = begin->next)
2679	  {
2680	    insn = gen_move_insn (copy_rtx (begin->counter),
2681				  copy_rtx (begin->loop_count));
2682	    emit_insn_before (insn, begin->insn);
2683	    delete_insn (begin->insn);
2684	  }
2685
2686	/* Replace all the doloop_ends with decrement-and-branch sequences.  */
2687	for (end = loop->end; end != 0; end = end->next)
2688	  {
2689	    rtx reg;
2690
2691	    start_sequence ();
2692
2693	    /* Load the counter value into a general register.  */
2694	    reg = end->counter;
2695	    if (!REG_P (reg) || REGNO (reg) > LAST_NONIO_GP_REGNUM)
2696	      {
2697		reg = end->scratch;
2698		emit_move_insn (copy_rtx (reg), copy_rtx (end->counter));
2699	      }
2700
2701	    /* Decrement the counter.  */
2702	    emit_insn (gen_add3_insn (copy_rtx (reg), copy_rtx (reg),
2703				      constm1_rtx));
2704
2705	    /* Copy it back to its original location.  */
2706	    if (reg != end->counter)
2707	      emit_move_insn (copy_rtx (end->counter), copy_rtx (reg));
2708
2709	    /* Jump back to the start label.  */
2710	    insn = emit_jump_insn (gen_cbranchsi4 (gen_rtx_NE (VOIDmode, reg,
2711							       const0_rtx),
2712						   reg,
2713						   const0_rtx,
2714						   end->label));
2715
2716	    JUMP_LABEL (insn) = end->label;
2717	    LABEL_NUSES (end->label)++;
2718
2719	    /* Emit the whole sequence before the doloop_end.  */
2720	    insn = get_insns ();
2721	    end_sequence ();
2722	    emit_insn_before (insn, end->insn);
2723
2724	    /* Delete the doloop_end.  */
2725	    delete_insn (end->insn);
2726	  }
2727      }
2728}
2729
2730/* Implement TARGET_MACHINE_DEPENDENT_REORG.  */
2731static void
2732pru_reorg (void)
2733{
2734  rtx_insn *insns = get_insns ();
2735
2736  compute_bb_for_insn ();
2737  df_analyze ();
2738
2739  /* Need correct insn lengths for allowing LOOP instruction
2740     emitting due to U8_PCREL limitations.  */
2741  shorten_branches (get_insns ());
2742
2743  /* The generic reorg_loops () is not suitable for PRU because
2744     it doesn't handle doloop_begin/end tying.  And we need our
2745     doloop_begin emitted before reload.  It is difficult to coalesce
2746     UBYTE constant initial loop values into the LOOP insn during
2747     machine reorg phase.  */
2748  pru_reorg_loop (insns);
2749
2750  df_finish_pass (false);
2751}
2752
2753/* Enumerate all PRU-specific builtins.  */
2754enum pru_builtin
2755{
2756  PRU_BUILTIN_DELAY_CYCLES,
2757  PRU_BUILTIN_max
2758};
2759
2760static GTY(()) tree pru_builtins [(int) PRU_BUILTIN_max];
2761
2762/* Implement TARGET_INIT_BUILTINS.  */
2763
2764static void
2765pru_init_builtins (void)
2766{
2767  tree void_ftype_longlong
2768    = build_function_type_list (void_type_node,
2769				long_long_integer_type_node,
2770				NULL);
2771
2772  pru_builtins[PRU_BUILTIN_DELAY_CYCLES]
2773    = add_builtin_function ("__delay_cycles", void_ftype_longlong,
2774			    PRU_BUILTIN_DELAY_CYCLES, BUILT_IN_MD, NULL,
2775			    NULL_TREE);
2776}
2777
2778/* Implement TARGET_BUILTIN_DECL.  */
2779
2780static tree
2781pru_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
2782{
2783  switch (code)
2784    {
2785    case PRU_BUILTIN_DELAY_CYCLES:
2786      return pru_builtins[code];
2787    default:
2788      return error_mark_node;
2789    }
2790}
2791
2792/* Emit a sequence of one or more delay_cycles_X insns, in order to generate
2793   code that delays exactly ARG cycles.  */
2794
2795static rtx
2796pru_expand_delay_cycles (rtx arg)
2797{
2798  HOST_WIDE_INT c, n;
2799
2800  if (GET_CODE (arg) != CONST_INT)
2801    {
2802      error ("%<__delay_cycles%> only takes constant arguments");
2803      return NULL_RTX;
2804    }
2805
2806  c = INTVAL (arg);
2807
2808  gcc_assert (HOST_BITS_PER_WIDE_INT > 32);
2809  if (c < 0)
2810    {
2811      error ("%<__delay_cycles%> only takes non-negative cycle counts");
2812      return NULL_RTX;
2813    }
2814
2815  emit_insn (gen_delay_cycles_start (arg));
2816
2817  /* For 32-bit loops, there's 2 + 2x cycles.  */
2818  if (c > 2 * 0xffff + 1)
2819    {
2820      n = (c - 2) / 2;
2821      c -= (n * 2) + 2;
2822      if ((unsigned long long) n > 0xffffffffULL)
2823	{
2824	  error ("%<__delay_cycles%> is limited to 32-bit loop counts");
2825	  return NULL_RTX;
2826	}
2827      emit_insn (gen_delay_cycles_2x_plus2_si (GEN_INT (n)));
2828    }
2829
2830  /* For 16-bit loops, there's 1 + 2x cycles.  */
2831  if (c > 2)
2832    {
2833      n = (c - 1) / 2;
2834      c -= (n * 2) + 1;
2835
2836      emit_insn (gen_delay_cycles_2x_plus1_hi (GEN_INT (n)));
2837    }
2838
2839  while (c > 0)
2840    {
2841      emit_insn (gen_delay_cycles_1 ());
2842      c -= 1;
2843    }
2844
2845  emit_insn (gen_delay_cycles_end (arg));
2846
2847  return NULL_RTX;
2848}
2849
2850
2851/* Implement TARGET_EXPAND_BUILTIN.  Expand an expression EXP that calls
2852   a built-in function, with result going to TARGET if that's convenient
2853   (and in mode MODE if that's convenient).
2854   SUBTARGET may be used as the target for computing one of EXP's operands.
2855   IGNORE is nonzero if the value is to be ignored.  */
2856
2857static rtx
2858pru_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
2859		    rtx subtarget ATTRIBUTE_UNUSED,
2860		    machine_mode mode ATTRIBUTE_UNUSED,
2861		    int ignore ATTRIBUTE_UNUSED)
2862{
2863  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
2864  unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
2865  rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
2866
2867  if (fcode == PRU_BUILTIN_DELAY_CYCLES)
2868    return pru_expand_delay_cycles (arg1);
2869
2870  internal_error ("bad builtin code");
2871
2872  return NULL_RTX;
2873}
2874
2875/* Remember the last target of pru_set_current_function.  */
2876static GTY(()) tree pru_previous_fndecl;
2877
2878/* Establish appropriate back-end context for processing the function
2879   FNDECL.  The argument might be NULL to indicate processing at top
2880   level, outside of any function scope.  */
2881static void
2882pru_set_current_function (tree fndecl)
2883{
2884  tree old_tree = (pru_previous_fndecl
2885		   ? DECL_FUNCTION_SPECIFIC_TARGET (pru_previous_fndecl)
2886		   : NULL_TREE);
2887
2888  tree new_tree = (fndecl
2889		   ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
2890		   : NULL_TREE);
2891
2892  if (fndecl && fndecl != pru_previous_fndecl)
2893    {
2894      pru_previous_fndecl = fndecl;
2895      if (old_tree == new_tree)
2896	;
2897
2898      else if (new_tree)
2899	{
2900	  cl_target_option_restore (&global_options,
2901				    TREE_TARGET_OPTION (new_tree));
2902	  target_reinit ();
2903	}
2904
2905      else if (old_tree)
2906	{
2907	  struct cl_target_option *def
2908	    = TREE_TARGET_OPTION (target_option_current_node);
2909
2910	  cl_target_option_restore (&global_options, def);
2911	  target_reinit ();
2912	}
2913    }
2914}
2915
2916/* Implement TARGET_UNWIND_WORD_MODE.
2917
2918   Since PRU is really a 32-bit CPU, the default word_mode is not suitable.  */
2919static scalar_int_mode
2920pru_unwind_word_mode (void)
2921{
2922  return SImode;
2923}
2924
2925
2926/* Initialize the GCC target structure.  */
2927#undef TARGET_ASM_FUNCTION_PROLOGUE
2928#define TARGET_ASM_FUNCTION_PROLOGUE pru_asm_function_prologue
2929#undef TARGET_ASM_INTEGER
2930#define TARGET_ASM_INTEGER pru_assemble_integer
2931
2932#undef TARGET_ASM_FILE_START
2933#define TARGET_ASM_FILE_START pru_file_start
2934
2935#undef TARGET_INIT_BUILTINS
2936#define TARGET_INIT_BUILTINS pru_init_builtins
2937#undef TARGET_EXPAND_BUILTIN
2938#define TARGET_EXPAND_BUILTIN pru_expand_builtin
2939#undef TARGET_BUILTIN_DECL
2940#define TARGET_BUILTIN_DECL pru_builtin_decl
2941
2942#undef TARGET_COMPUTE_FRAME_LAYOUT
2943#define TARGET_COMPUTE_FRAME_LAYOUT pru_compute_frame_layout
2944
2945#undef TARGET_FUNCTION_OK_FOR_SIBCALL
2946#define TARGET_FUNCTION_OK_FOR_SIBCALL hook_bool_tree_tree_true
2947
2948#undef TARGET_CAN_ELIMINATE
2949#define TARGET_CAN_ELIMINATE pru_can_eliminate
2950
2951#undef TARGET_HARD_REGNO_MODE_OK
2952#define TARGET_HARD_REGNO_MODE_OK pru_hard_regno_mode_ok
2953
2954#undef  TARGET_HARD_REGNO_SCRATCH_OK
2955#define TARGET_HARD_REGNO_SCRATCH_OK pru_hard_regno_scratch_ok
2956#undef  TARGET_HARD_REGNO_CALL_PART_CLOBBERED
2957#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
2958  pru_hard_regno_call_part_clobbered
2959
2960#undef TARGET_FUNCTION_ARG
2961#define TARGET_FUNCTION_ARG pru_function_arg
2962
2963#undef TARGET_FUNCTION_ARG_ADVANCE
2964#define TARGET_FUNCTION_ARG_ADVANCE pru_function_arg_advance
2965
2966#undef TARGET_ARG_PARTIAL_BYTES
2967#define TARGET_ARG_PARTIAL_BYTES pru_arg_partial_bytes
2968
2969#undef TARGET_FUNCTION_VALUE
2970#define TARGET_FUNCTION_VALUE pru_function_value
2971
2972#undef TARGET_LIBCALL_VALUE
2973#define TARGET_LIBCALL_VALUE pru_libcall_value
2974
2975#undef TARGET_FUNCTION_VALUE_REGNO_P
2976#define TARGET_FUNCTION_VALUE_REGNO_P pru_function_value_regno_p
2977
2978#undef TARGET_RETURN_IN_MEMORY
2979#define TARGET_RETURN_IN_MEMORY pru_return_in_memory
2980
2981#undef TARGET_MUST_PASS_IN_STACK
2982#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
2983
2984#undef TARGET_LEGITIMATE_ADDRESS_P
2985#define TARGET_LEGITIMATE_ADDRESS_P pru_legitimate_address_p
2986
2987#undef TARGET_INIT_LIBFUNCS
2988#define TARGET_INIT_LIBFUNCS pru_init_libfuncs
2989#undef TARGET_LIBFUNC_GNU_PREFIX
2990#define TARGET_LIBFUNC_GNU_PREFIX true
2991
2992#undef TARGET_RTX_COSTS
2993#define TARGET_RTX_COSTS pru_rtx_costs
2994
2995#undef TARGET_PRINT_OPERAND
2996#define TARGET_PRINT_OPERAND pru_print_operand
2997
2998#undef TARGET_PRINT_OPERAND_ADDRESS
2999#define TARGET_PRINT_OPERAND_ADDRESS pru_print_operand_address
3000
3001#undef TARGET_OPTION_OVERRIDE
3002#define TARGET_OPTION_OVERRIDE pru_option_override
3003
3004#undef TARGET_SET_CURRENT_FUNCTION
3005#define TARGET_SET_CURRENT_FUNCTION pru_set_current_function
3006
3007#undef  TARGET_MACHINE_DEPENDENT_REORG
3008#define TARGET_MACHINE_DEPENDENT_REORG  pru_reorg
3009
3010#undef  TARGET_CAN_USE_DOLOOP_P
3011#define TARGET_CAN_USE_DOLOOP_P		pru_can_use_doloop_p
3012
3013#undef TARGET_INVALID_WITHIN_DOLOOP
3014#define TARGET_INVALID_WITHIN_DOLOOP  pru_invalid_within_doloop
3015
3016#undef  TARGET_UNWIND_WORD_MODE
3017#define TARGET_UNWIND_WORD_MODE pru_unwind_word_mode
3018
3019#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
3020#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
3021
3022struct gcc_target targetm = TARGET_INITIALIZER;
3023
3024#include "gt-pru.h"
3025