1/* Helper routines for memory move and comparison insns.
2   Copyright (C) 2013-2022 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#define IN_TARGET_CODE 1
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "function.h"
27#include "basic-block.h"
28#include "rtl.h"
29#include "tree.h"
30#include "memmodel.h"
31#include "tm_p.h"
32#include "emit-rtl.h"
33#include "explow.h"
34#include "expr.h"
35
36/* Like force_operand, but guarantees that VALUE ends up in TARGET.  */
37static void
38force_into (rtx value, rtx target)
39{
40  value = force_operand (value, target);
41  if (! rtx_equal_p (value, target))
42    emit_insn (gen_move_insn (target, value));
43}
44
45/* Emit code to perform a block move.  Choose the best method.
46
47   OPERANDS[0] is the destination.
48   OPERANDS[1] is the source.
49   OPERANDS[2] is the size.
50   OPERANDS[3] is the alignment safe to use.  */
51bool
52expand_block_move (rtx *operands)
53{
54  int align = INTVAL (operands[3]);
55  int constp = (CONST_INT_P (operands[2]));
56  int bytes = (constp ? INTVAL (operands[2]) : 0);
57
58  if (! constp)
59    return false;
60
61  /* If we could use mov.l to move words and dest is word-aligned, we
62     can use movua.l for loads and still generate a relatively short
63     and efficient sequence.  */
64  if (TARGET_SH4A && align < 4
65      && MEM_ALIGN (operands[0]) >= 32
66      && can_move_by_pieces (bytes, 32))
67    {
68      rtx dest = copy_rtx (operands[0]);
69      rtx src = copy_rtx (operands[1]);
70      /* We could use different pseudos for each copied word, but
71	 since movua can only load into r0, it's kind of
72	 pointless.  */
73      rtx temp = gen_reg_rtx (SImode);
74      rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
75      int copied = 0;
76
77      while (copied + 4 <= bytes)
78	{
79	  rtx to = adjust_address (dest, SImode, copied);
80	  rtx from = adjust_automodify_address (src, BLKmode,
81						src_addr, copied);
82
83	  set_mem_size (from, 4);
84	  emit_insn (gen_movua (temp, from));
85	  emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
86	  emit_move_insn (to, temp);
87	  copied += 4;
88	}
89
90      if (copied < bytes)
91	move_by_pieces (adjust_address (dest, BLKmode, copied),
92			adjust_automodify_address (src, BLKmode,
93						   src_addr, copied),
94			bytes - copied, align, RETURN_BEGIN);
95
96      return true;
97    }
98
99  /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
100     alignment, or if it isn't a multiple of 4 bytes, then fail.  */
101  if (align < 4 || (bytes % 4 != 0))
102    return false;
103
104  if (TARGET_HARD_SH4)
105    {
106      if (bytes < 12)
107	return false;
108      else if (bytes == 12)
109	{
110	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
111	  rtx r4 = gen_rtx_REG (SImode, 4);
112	  rtx r5 = gen_rtx_REG (SImode, 5);
113
114	  rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
115				     SFUNC_STATIC).lab;
116	  force_into (XEXP (operands[0], 0), r4);
117	  force_into (XEXP (operands[1], 0), r5);
118	  emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
119	  return true;
120	}
121      else if (! optimize_size)
122	{
123	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
124	  rtx r4 = gen_rtx_REG (SImode, 4);
125	  rtx r5 = gen_rtx_REG (SImode, 5);
126	  rtx r6 = gen_rtx_REG (SImode, 6);
127
128	  rtx lab = function_symbol (func_addr_rtx, bytes & 4
129						    ? "__movmem_i4_odd"
130						    : "__movmem_i4_even",
131				     SFUNC_STATIC).lab;
132	  force_into (XEXP (operands[0], 0), r4);
133	  force_into (XEXP (operands[1], 0), r5);
134
135	  int dwords = bytes >> 3;
136	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
137	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
138	  return true;
139	}
140      else
141	return false;
142    }
143  if (bytes < 64)
144    {
145      char entry[30];
146      rtx func_addr_rtx = gen_reg_rtx (Pmode);
147      rtx r4 = gen_rtx_REG (SImode, 4);
148      rtx r5 = gen_rtx_REG (SImode, 5);
149
150      sprintf (entry, "__movmemSI%d", bytes);
151      rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
152      force_into (XEXP (operands[0], 0), r4);
153      force_into (XEXP (operands[1], 0), r5);
154      emit_insn (gen_block_move_real (func_addr_rtx, lab));
155      return true;
156    }
157
158  /* This is the same number of bytes as a memcpy call, but to a different
159     less common function name, so this will occasionally use more space.  */
160  if (! optimize_size)
161    {
162      rtx func_addr_rtx = gen_reg_rtx (Pmode);
163      int final_switch, while_loop;
164      rtx r4 = gen_rtx_REG (SImode, 4);
165      rtx r5 = gen_rtx_REG (SImode, 5);
166      rtx r6 = gen_rtx_REG (SImode, 6);
167
168      rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
169      force_into (XEXP (operands[0], 0), r4);
170      force_into (XEXP (operands[1], 0), r5);
171
172      /* r6 controls the size of the move.  16 is decremented from it
173	 for each 64 bytes moved.  Then the negative bit left over is used
174	 as an index into a list of move instructions.  e.g., a 72 byte move
175	 would be set up with size(r6) = 14, for one iteration through the
176	 big while loop, and a switch of -2 for the last part.  */
177
178      final_switch = 16 - ((bytes / 4) % 16);
179      while_loop = ((bytes / 4) / 16 - 1) * 16;
180      emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
181      emit_insn (gen_block_lump_real (func_addr_rtx, lab));
182      return true;
183    }
184
185  return false;
186}
187
188static const int prob_unlikely
189  = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 10)
190    .to_reg_br_prob_note ();
191static const int prob_likely
192  = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 4)
193    .to_reg_br_prob_note ();
194
195/* Emit code to perform a strcmp.
196
197   OPERANDS[0] is the destination.
198   OPERANDS[1] is the first string.
199   OPERANDS[2] is the second string.
200   OPERANDS[3] is the known alignment.  */
201bool
202sh_expand_cmpstr (rtx *operands)
203{
204  rtx addr1 = operands[1];
205  rtx addr2 = operands[2];
206  rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
207  rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
208  rtx tmp0 = gen_reg_rtx (SImode);
209  rtx tmp1 = gen_reg_rtx (SImode);
210  rtx tmp2 = gen_reg_rtx (SImode);
211  rtx tmp3 = gen_reg_rtx (SImode);
212
213  rtx_insn *jump;
214  rtx_code_label *L_return = gen_label_rtx ();
215  rtx_code_label *L_loop_byte = gen_label_rtx ();
216  rtx_code_label *L_end_loop_byte = gen_label_rtx ();
217  rtx_code_label *L_loop_long = gen_label_rtx ();
218  rtx_code_label *L_end_loop_long = gen_label_rtx ();
219
220  const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
221  const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
222
223  if (addr1_alignment < 4 && addr2_alignment < 4)
224    {
225      emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
226      emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
227      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
228      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
229    }
230  else if (addr1_alignment < 4 && addr2_alignment >= 4)
231    {
232      emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
233      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
234      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
235    }
236  else if (addr1_alignment >= 4 && addr2_alignment < 4)
237    {
238      emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
239      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
240      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
241    }
242
243  addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
244  addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
245
246  /* tmp2 is aligned, OK to load.  */
247  emit_move_insn (tmp3, addr2);
248  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
249
250  /* start long loop.  */
251  emit_label (L_loop_long);
252
253  emit_move_insn (tmp2, tmp3);
254
255  /* tmp1 is aligned, OK to load.  */
256  emit_move_insn (tmp1, addr1);
257  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
258
259  /* Is there a 0 byte ?  */
260  emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
261
262  emit_insn (gen_cmpstr_t (tmp0, tmp3));
263  jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
264  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
265
266  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
267
268  /* tmp2 is aligned, OK to load.  */
269  emit_move_insn (tmp3, addr2);
270  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
271
272  jump = emit_jump_insn (gen_branch_true (L_loop_long));
273  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
274  /* end loop.  */
275
276  /* Fallthu, substract words.  */
277  if (TARGET_LITTLE_ENDIAN)
278    {
279      rtx low_1 = gen_lowpart (HImode, tmp1);
280      rtx low_2 = gen_lowpart (HImode, tmp2);
281
282      emit_insn (gen_rotlhi3_8 (low_1, low_1));
283      emit_insn (gen_rotlhi3_8 (low_2, low_2));
284      emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
285      emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
286      emit_insn (gen_rotlhi3_8 (low_1, low_1));
287      emit_insn (gen_rotlhi3_8 (low_2, low_2));
288    }
289
290  jump = emit_jump_insn (gen_jump_compact (L_return));
291  emit_barrier_after (jump);
292
293  emit_label (L_end_loop_long);
294
295  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
296  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
297
298  /* start byte loop.  */
299  addr1 = adjust_address (addr1, QImode, 0);
300  addr2 = adjust_address (addr2, QImode, 0);
301
302  emit_label (L_loop_byte);
303
304  emit_insn (gen_extendqisi2 (tmp2, addr2));
305  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
306
307  emit_insn (gen_extendqisi2 (tmp1, addr1));
308  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
309
310  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
311  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
312  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
313
314  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
315  if (flag_delayed_branch)
316    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
317  jump = emit_jump_insn (gen_branch_true (L_loop_byte));
318  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
319  /* end loop.  */
320
321  emit_label (L_end_loop_byte);
322
323  if (! flag_delayed_branch)
324    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
325  emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
326
327  emit_label (L_return);
328
329  emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
330
331  return true;
332}
333
334/* Emit code to perform a strncmp.
335
336   OPERANDS[0] is the destination.
337   OPERANDS[1] is the first string.
338   OPERANDS[2] is the second string.
339   OPERANDS[3] is the length.
340   OPERANDS[4] is the known alignment.  */
341bool
342sh_expand_cmpnstr (rtx *operands)
343{
344  rtx addr1 = operands[1];
345  rtx addr2 = operands[2];
346  rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
347  rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
348  rtx tmp1 = gen_reg_rtx (SImode);
349  rtx tmp2 = gen_reg_rtx (SImode);
350
351  rtx_insn *jump;
352  rtx_code_label *L_return = gen_label_rtx ();
353  rtx_code_label *L_loop_byte = gen_label_rtx ();
354  rtx_code_label *L_end_loop_byte = gen_label_rtx ();
355
356  rtx len = copy_to_mode_reg (SImode, operands[3]);
357  int constp = CONST_INT_P (operands[3]);
358  HOST_WIDE_INT bytes = constp ? INTVAL (operands[3]) : 0;
359
360  const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
361  const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
362
363  /* Loop on a register count.  */
364  if (constp && bytes >= 0 && bytes < 32)
365    {
366      rtx tmp0 = gen_reg_rtx (SImode);
367      rtx tmp3 = gen_reg_rtx (SImode);
368      rtx lenw = gen_reg_rtx (SImode);
369
370      rtx_code_label *L_loop_long = gen_label_rtx ();
371      rtx_code_label *L_end_loop_long = gen_label_rtx ();
372
373      int witers = bytes / 4;
374
375      if (witers > 1)
376	{
377	  addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
378	  addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
379
380	  emit_move_insn (tmp0, const0_rtx);
381
382	  if (addr1_alignment < 4 && addr2_alignment < 4)
383	    {
384	      emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
385	      emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
386	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
387	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
388	    }
389	  else if (addr1_alignment < 4 && addr2_alignment >= 4)
390	    {
391	      emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3)));
392	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
393	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
394	    }
395	  else if (addr1_alignment >= 4 && addr2_alignment < 4)
396	    {
397	      emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3)));
398	      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
399	      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
400	    }
401
402	  /* word count. Do we have iterations ?  */
403	  emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
404
405	  /* start long loop.  */
406	  emit_label (L_loop_long);
407
408	  /* tmp2 is aligned, OK to load.  */
409	  emit_move_insn (tmp2, addr2);
410	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
411						  GET_MODE_SIZE (SImode)));
412
413	  /* tmp1 is aligned, OK to load.  */
414	  emit_move_insn (tmp1, addr1);
415	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
416						  GET_MODE_SIZE (SImode)));
417
418	  /* Is there a 0 byte ?  */
419	  emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
420
421	  emit_insn (gen_cmpstr_t (tmp0, tmp3));
422	  jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
423	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
424
425	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
426	  jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
427	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
428
429	  if (TARGET_SH2)
430	    emit_insn (gen_dect (lenw, lenw));
431	  else
432	    {
433	      emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
434	      emit_insn (gen_tstsi_t (lenw, lenw));
435	    }
436
437	  jump = emit_jump_insn (gen_branch_false (L_loop_long));
438	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
439
440	  int sbytes = bytes % 4;
441
442	  /* end loop.  Reached max iterations.  */
443	  if (sbytes == 0)
444	    {
445	      emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
446	      jump = emit_jump_insn (gen_jump_compact (L_return));
447	      emit_barrier_after (jump);
448	    }
449	  else
450	    {
451	      /* Remaining bytes to check.  */
452
453	      addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
454	      addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
455
456	      while (sbytes--)
457		{
458		  emit_insn (gen_extendqisi2 (tmp1, addr1));
459		  emit_insn (gen_extendqisi2 (tmp2, addr2));
460
461		  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
462		  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
463		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
464
465		  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
466		  if (flag_delayed_branch)
467		    emit_insn (gen_zero_extendqisi2 (tmp2,
468						     gen_lowpart (QImode,
469								  tmp2)));
470		  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
471		  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
472
473		  addr1 = adjust_address (addr1, QImode,
474					  GET_MODE_SIZE (QImode));
475		  addr2 = adjust_address (addr2, QImode,
476					  GET_MODE_SIZE (QImode));
477		}
478
479	      jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
480	      emit_barrier_after (jump);
481	    }
482
483	  emit_label (L_end_loop_long);
484
485	  /* Found last word.  Restart it byte per byte.  */
486
487	  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
488						  -GET_MODE_SIZE (SImode)));
489	  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
490						  -GET_MODE_SIZE (SImode)));
491
492	  /* fall thru.  */
493	}
494
495      addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
496      addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
497
498      while (bytes--)
499	{
500	  emit_insn (gen_extendqisi2 (tmp1, addr1));
501	  emit_insn (gen_extendqisi2 (tmp2, addr2));
502
503	  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
504	  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
505	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
506
507	  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
508	  if (flag_delayed_branch)
509	    emit_insn (gen_zero_extendqisi2 (tmp2,
510					     gen_lowpart (QImode, tmp2)));
511	  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
512	  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
513
514	  addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
515	  addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
516	}
517
518      jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
519      emit_barrier_after (jump);
520    }
521  else
522    {
523      emit_insn (gen_cmpeqsi_t (len, const0_rtx));
524      emit_move_insn (operands[0], const0_rtx);
525      jump = emit_jump_insn (gen_branch_true (L_return));
526      add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
527    }
528
529  addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
530  addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
531
532  emit_label (L_loop_byte);
533
534  emit_insn (gen_extendqisi2 (tmp2, addr2));
535  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
536
537  emit_insn (gen_extendqisi2 (tmp1, addr1));
538  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
539
540  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
541  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
542  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
543
544  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
545  if (flag_delayed_branch)
546    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
547  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
548  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
549
550  if (TARGET_SH2)
551    emit_insn (gen_dect (len, len));
552  else
553    {
554      emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
555      emit_insn (gen_tstsi_t (len, len));
556    }
557
558  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
559  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
560  /* end byte loop.  */
561
562  emit_label (L_end_loop_byte);
563
564  if (! flag_delayed_branch)
565    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
566  emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
567
568  emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
569
570  emit_label (L_return);
571
572  return true;
573}
574
575/* Emit code to perform a strlen.
576
577   OPERANDS[0] is the destination.
578   OPERANDS[1] is the string.
579   OPERANDS[2] is the char to search.
580   OPERANDS[3] is the alignment.  */
581bool
582sh_expand_strlen (rtx *operands)
583{
584  rtx addr1 = operands[1];
585  rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
586  rtx start_addr = gen_reg_rtx (Pmode);
587  rtx tmp0 = gen_reg_rtx (SImode);
588  rtx tmp1 = gen_reg_rtx (SImode);
589  rtx_code_label *L_return = gen_label_rtx ();
590  rtx_code_label *L_loop_byte = gen_label_rtx ();
591
592  rtx_insn *jump;
593  rtx_code_label *L_loop_long = gen_label_rtx ();
594  rtx_code_label *L_end_loop_long = gen_label_rtx ();
595
596  int align = INTVAL (operands[3]);
597
598  emit_move_insn (operands[0], GEN_INT (-1));
599
600  /* remember start of string.  */
601  emit_move_insn (start_addr, current_addr);
602
603  if (align < 4)
604    {
605      emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
606      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
607      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
608    }
609
610  emit_move_insn (tmp0, operands[2]);
611
612  addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
613
614  /* start long loop.  */
615  emit_label (L_loop_long);
616
617  /* tmp1 is aligned, OK to load.  */
618  emit_move_insn (tmp1, addr1);
619  emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
620
621  /* Is there a 0 byte ?  */
622  emit_insn (gen_cmpstr_t (tmp0, tmp1));
623
624  jump = emit_jump_insn (gen_branch_false (L_loop_long));
625  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
626  /* end loop.  */
627
628  emit_label (L_end_loop_long);
629
630  emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
631
632  addr1 = adjust_address (addr1, QImode, 0);
633
634  /* unroll remaining bytes.  */
635  for (int i = 0; i < 4; ++i)
636    {
637      emit_insn (gen_extendqisi2 (tmp1, addr1));
638      emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
639      emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
640      jump = emit_jump_insn (gen_branch_true (L_return));
641      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
642    }
643
644  emit_barrier_after (jump);
645
646  /* start byte loop.  */
647  emit_label (L_loop_byte);
648
649  emit_insn (gen_extendqisi2 (tmp1, addr1));
650  emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
651
652  emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
653  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
654  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
655
656  /* end loop.  */
657
658  emit_label (L_return);
659
660  emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
661  emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
662
663  return true;
664}
665
666/* Emit code to perform a memset.
667
668   OPERANDS[0] is the destination.
669   OPERANDS[1] is the size;
670   OPERANDS[2] is the char to search.
671   OPERANDS[3] is the alignment.  */
672void
673sh_expand_setmem (rtx *operands)
674{
675  rtx_code_label *L_loop_byte = gen_label_rtx ();
676  rtx_code_label *L_loop_word = gen_label_rtx ();
677  rtx_code_label *L_return = gen_label_rtx ();
678  rtx_insn *jump;
679  rtx dest = copy_rtx (operands[0]);
680  rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
681  rtx val = copy_to_mode_reg (SImode, operands[2]);
682  int align = INTVAL (operands[3]);
683  rtx len = copy_to_mode_reg (SImode, operands[1]);
684
685  if (! CONST_INT_P (operands[1]))
686    return;
687
688  int count = INTVAL (operands[1]);
689
690  if (CONST_INT_P (operands[2])
691      && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
692    {
693      rtx lenw = gen_reg_rtx (SImode);
694
695      if (align < 4)
696	{
697	  emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
698	  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
699	  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
700	}
701
702      /* word count. Do we have iterations ?  */
703      emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
704
705      dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
706
707      /* start loop.  */
708      emit_label (L_loop_word);
709
710      if (TARGET_SH2)
711        emit_insn (gen_dect (lenw, lenw));
712      else
713	{
714	  emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
715	  emit_insn (gen_tstsi_t (lenw, lenw));
716	}
717
718      emit_move_insn (dest, val);
719      emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
720						GET_MODE_SIZE (SImode)));
721
722
723      jump = emit_jump_insn (gen_branch_false (L_loop_word));
724      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
725      count = count % 4;
726
727      dest = adjust_address (dest, QImode, 0);
728
729      val = gen_lowpart (QImode, val);
730
731      while (count--)
732	{
733	  emit_move_insn (dest, val);
734	  emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
735						    GET_MODE_SIZE (QImode)));
736	}
737
738      jump = emit_jump_insn (gen_jump_compact (L_return));
739      emit_barrier_after (jump);
740    }
741
742  dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
743
744  /* start loop.  */
745  emit_label (L_loop_byte);
746
747  if (TARGET_SH2)
748    emit_insn (gen_dect (len, len));
749  else
750    {
751      emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
752      emit_insn (gen_tstsi_t (len, len));
753    }
754
755  val = gen_lowpart (QImode, val);
756  emit_move_insn (dest, val);
757  emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
758                                            GET_MODE_SIZE (QImode)));
759
760  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
761  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
762
763  emit_label (L_return);
764}
765