1/* Helper routines for memory move and comparison insns. 2 Copyright (C) 2013-2022 Free Software Foundation, Inc. 3 4This file is part of GCC. 5 6GCC is free software; you can redistribute it and/or modify 7it under the terms of the GNU General Public License as published by 8the Free Software Foundation; either version 3, or (at your option) 9any later version. 10 11GCC is distributed in the hope that it will be useful, 12but WITHOUT ANY WARRANTY; without even the implied warranty of 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14GNU General Public License for more details. 15 16You should have received a copy of the GNU General Public License 17along with GCC; see the file COPYING3. If not see 18<http://www.gnu.org/licenses/>. */ 19 20#define IN_TARGET_CODE 1 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "tm.h" 26#include "function.h" 27#include "basic-block.h" 28#include "rtl.h" 29#include "tree.h" 30#include "memmodel.h" 31#include "tm_p.h" 32#include "emit-rtl.h" 33#include "explow.h" 34#include "expr.h" 35 36/* Like force_operand, but guarantees that VALUE ends up in TARGET. */ 37static void 38force_into (rtx value, rtx target) 39{ 40 value = force_operand (value, target); 41 if (! rtx_equal_p (value, target)) 42 emit_insn (gen_move_insn (target, value)); 43} 44 45/* Emit code to perform a block move. Choose the best method. 46 47 OPERANDS[0] is the destination. 48 OPERANDS[1] is the source. 49 OPERANDS[2] is the size. 50 OPERANDS[3] is the alignment safe to use. */ 51bool 52expand_block_move (rtx *operands) 53{ 54 int align = INTVAL (operands[3]); 55 int constp = (CONST_INT_P (operands[2])); 56 int bytes = (constp ? INTVAL (operands[2]) : 0); 57 58 if (! constp) 59 return false; 60 61 /* If we could use mov.l to move words and dest is word-aligned, we 62 can use movua.l for loads and still generate a relatively short 63 and efficient sequence. */ 64 if (TARGET_SH4A && align < 4 65 && MEM_ALIGN (operands[0]) >= 32 66 && can_move_by_pieces (bytes, 32)) 67 { 68 rtx dest = copy_rtx (operands[0]); 69 rtx src = copy_rtx (operands[1]); 70 /* We could use different pseudos for each copied word, but 71 since movua can only load into r0, it's kind of 72 pointless. */ 73 rtx temp = gen_reg_rtx (SImode); 74 rtx src_addr = copy_addr_to_reg (XEXP (src, 0)); 75 int copied = 0; 76 77 while (copied + 4 <= bytes) 78 { 79 rtx to = adjust_address (dest, SImode, copied); 80 rtx from = adjust_automodify_address (src, BLKmode, 81 src_addr, copied); 82 83 set_mem_size (from, 4); 84 emit_insn (gen_movua (temp, from)); 85 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4)); 86 emit_move_insn (to, temp); 87 copied += 4; 88 } 89 90 if (copied < bytes) 91 move_by_pieces (adjust_address (dest, BLKmode, copied), 92 adjust_automodify_address (src, BLKmode, 93 src_addr, copied), 94 bytes - copied, align, RETURN_BEGIN); 95 96 return true; 97 } 98 99 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte 100 alignment, or if it isn't a multiple of 4 bytes, then fail. */ 101 if (align < 4 || (bytes % 4 != 0)) 102 return false; 103 104 if (TARGET_HARD_SH4) 105 { 106 if (bytes < 12) 107 return false; 108 else if (bytes == 12) 109 { 110 rtx func_addr_rtx = gen_reg_rtx (Pmode); 111 rtx r4 = gen_rtx_REG (SImode, 4); 112 rtx r5 = gen_rtx_REG (SImode, 5); 113 114 rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4", 115 SFUNC_STATIC).lab; 116 force_into (XEXP (operands[0], 0), r4); 117 force_into (XEXP (operands[1], 0), r5); 118 emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab)); 119 return true; 120 } 121 else if (! optimize_size) 122 { 123 rtx func_addr_rtx = gen_reg_rtx (Pmode); 124 rtx r4 = gen_rtx_REG (SImode, 4); 125 rtx r5 = gen_rtx_REG (SImode, 5); 126 rtx r6 = gen_rtx_REG (SImode, 6); 127 128 rtx lab = function_symbol (func_addr_rtx, bytes & 4 129 ? "__movmem_i4_odd" 130 : "__movmem_i4_even", 131 SFUNC_STATIC).lab; 132 force_into (XEXP (operands[0], 0), r4); 133 force_into (XEXP (operands[1], 0), r5); 134 135 int dwords = bytes >> 3; 136 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); 137 emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab)); 138 return true; 139 } 140 else 141 return false; 142 } 143 if (bytes < 64) 144 { 145 char entry[30]; 146 rtx func_addr_rtx = gen_reg_rtx (Pmode); 147 rtx r4 = gen_rtx_REG (SImode, 4); 148 rtx r5 = gen_rtx_REG (SImode, 5); 149 150 sprintf (entry, "__movmemSI%d", bytes); 151 rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab; 152 force_into (XEXP (operands[0], 0), r4); 153 force_into (XEXP (operands[1], 0), r5); 154 emit_insn (gen_block_move_real (func_addr_rtx, lab)); 155 return true; 156 } 157 158 /* This is the same number of bytes as a memcpy call, but to a different 159 less common function name, so this will occasionally use more space. */ 160 if (! optimize_size) 161 { 162 rtx func_addr_rtx = gen_reg_rtx (Pmode); 163 int final_switch, while_loop; 164 rtx r4 = gen_rtx_REG (SImode, 4); 165 rtx r5 = gen_rtx_REG (SImode, 5); 166 rtx r6 = gen_rtx_REG (SImode, 6); 167 168 rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab; 169 force_into (XEXP (operands[0], 0), r4); 170 force_into (XEXP (operands[1], 0), r5); 171 172 /* r6 controls the size of the move. 16 is decremented from it 173 for each 64 bytes moved. Then the negative bit left over is used 174 as an index into a list of move instructions. e.g., a 72 byte move 175 would be set up with size(r6) = 14, for one iteration through the 176 big while loop, and a switch of -2 for the last part. */ 177 178 final_switch = 16 - ((bytes / 4) % 16); 179 while_loop = ((bytes / 4) / 16 - 1) * 16; 180 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); 181 emit_insn (gen_block_lump_real (func_addr_rtx, lab)); 182 return true; 183 } 184 185 return false; 186} 187 188static const int prob_unlikely 189 = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 10) 190 .to_reg_br_prob_note (); 191static const int prob_likely 192 = profile_probability::from_reg_br_prob_base (REG_BR_PROB_BASE / 4) 193 .to_reg_br_prob_note (); 194 195/* Emit code to perform a strcmp. 196 197 OPERANDS[0] is the destination. 198 OPERANDS[1] is the first string. 199 OPERANDS[2] is the second string. 200 OPERANDS[3] is the known alignment. */ 201bool 202sh_expand_cmpstr (rtx *operands) 203{ 204 rtx addr1 = operands[1]; 205 rtx addr2 = operands[2]; 206 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); 207 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); 208 rtx tmp0 = gen_reg_rtx (SImode); 209 rtx tmp1 = gen_reg_rtx (SImode); 210 rtx tmp2 = gen_reg_rtx (SImode); 211 rtx tmp3 = gen_reg_rtx (SImode); 212 213 rtx_insn *jump; 214 rtx_code_label *L_return = gen_label_rtx (); 215 rtx_code_label *L_loop_byte = gen_label_rtx (); 216 rtx_code_label *L_end_loop_byte = gen_label_rtx (); 217 rtx_code_label *L_loop_long = gen_label_rtx (); 218 rtx_code_label *L_end_loop_long = gen_label_rtx (); 219 220 const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT; 221 const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT; 222 223 if (addr1_alignment < 4 && addr2_alignment < 4) 224 { 225 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); 226 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3))); 227 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 228 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 229 } 230 else if (addr1_alignment < 4 && addr2_alignment >= 4) 231 { 232 emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3))); 233 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 234 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 235 } 236 else if (addr1_alignment >= 4 && addr2_alignment < 4) 237 { 238 emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3))); 239 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 240 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 241 } 242 243 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); 244 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); 245 246 /* tmp2 is aligned, OK to load. */ 247 emit_move_insn (tmp3, addr2); 248 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); 249 250 /* start long loop. */ 251 emit_label (L_loop_long); 252 253 emit_move_insn (tmp2, tmp3); 254 255 /* tmp1 is aligned, OK to load. */ 256 emit_move_insn (tmp1, addr1); 257 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4)); 258 259 /* Is there a 0 byte ? */ 260 emit_insn (gen_andsi3 (tmp3, tmp3, tmp1)); 261 262 emit_insn (gen_cmpstr_t (tmp0, tmp3)); 263 jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); 264 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 265 266 emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 267 268 /* tmp2 is aligned, OK to load. */ 269 emit_move_insn (tmp3, addr2); 270 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); 271 272 jump = emit_jump_insn (gen_branch_true (L_loop_long)); 273 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 274 /* end loop. */ 275 276 /* Fallthu, substract words. */ 277 if (TARGET_LITTLE_ENDIAN) 278 { 279 rtx low_1 = gen_lowpart (HImode, tmp1); 280 rtx low_2 = gen_lowpart (HImode, tmp2); 281 282 emit_insn (gen_rotlhi3_8 (low_1, low_1)); 283 emit_insn (gen_rotlhi3_8 (low_2, low_2)); 284 emit_insn (gen_rotlsi3_16 (tmp1, tmp1)); 285 emit_insn (gen_rotlsi3_16 (tmp2, tmp2)); 286 emit_insn (gen_rotlhi3_8 (low_1, low_1)); 287 emit_insn (gen_rotlhi3_8 (low_2, low_2)); 288 } 289 290 jump = emit_jump_insn (gen_jump_compact (L_return)); 291 emit_barrier_after (jump); 292 293 emit_label (L_end_loop_long); 294 295 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4)); 296 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4)); 297 298 /* start byte loop. */ 299 addr1 = adjust_address (addr1, QImode, 0); 300 addr2 = adjust_address (addr2, QImode, 0); 301 302 emit_label (L_loop_byte); 303 304 emit_insn (gen_extendqisi2 (tmp2, addr2)); 305 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); 306 307 emit_insn (gen_extendqisi2 (tmp1, addr1)); 308 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); 309 310 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); 311 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); 312 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 313 314 emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 315 if (flag_delayed_branch) 316 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); 317 jump = emit_jump_insn (gen_branch_true (L_loop_byte)); 318 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 319 /* end loop. */ 320 321 emit_label (L_end_loop_byte); 322 323 if (! flag_delayed_branch) 324 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); 325 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); 326 327 emit_label (L_return); 328 329 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); 330 331 return true; 332} 333 334/* Emit code to perform a strncmp. 335 336 OPERANDS[0] is the destination. 337 OPERANDS[1] is the first string. 338 OPERANDS[2] is the second string. 339 OPERANDS[3] is the length. 340 OPERANDS[4] is the known alignment. */ 341bool 342sh_expand_cmpnstr (rtx *operands) 343{ 344 rtx addr1 = operands[1]; 345 rtx addr2 = operands[2]; 346 rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); 347 rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); 348 rtx tmp1 = gen_reg_rtx (SImode); 349 rtx tmp2 = gen_reg_rtx (SImode); 350 351 rtx_insn *jump; 352 rtx_code_label *L_return = gen_label_rtx (); 353 rtx_code_label *L_loop_byte = gen_label_rtx (); 354 rtx_code_label *L_end_loop_byte = gen_label_rtx (); 355 356 rtx len = copy_to_mode_reg (SImode, operands[3]); 357 int constp = CONST_INT_P (operands[3]); 358 HOST_WIDE_INT bytes = constp ? INTVAL (operands[3]) : 0; 359 360 const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT; 361 const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT; 362 363 /* Loop on a register count. */ 364 if (constp && bytes >= 0 && bytes < 32) 365 { 366 rtx tmp0 = gen_reg_rtx (SImode); 367 rtx tmp3 = gen_reg_rtx (SImode); 368 rtx lenw = gen_reg_rtx (SImode); 369 370 rtx_code_label *L_loop_long = gen_label_rtx (); 371 rtx_code_label *L_end_loop_long = gen_label_rtx (); 372 373 int witers = bytes / 4; 374 375 if (witers > 1) 376 { 377 addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); 378 addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); 379 380 emit_move_insn (tmp0, const0_rtx); 381 382 if (addr1_alignment < 4 && addr2_alignment < 4) 383 { 384 emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); 385 emit_insn (gen_tstsi_t (tmp1, GEN_INT (3))); 386 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 387 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 388 } 389 else if (addr1_alignment < 4 && addr2_alignment >= 4) 390 { 391 emit_insn (gen_tstsi_t (s1_addr, GEN_INT (3))); 392 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 393 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 394 } 395 else if (addr1_alignment >= 4 && addr2_alignment < 4) 396 { 397 emit_insn (gen_tstsi_t (s2_addr, GEN_INT (3))); 398 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 399 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 400 } 401 402 /* word count. Do we have iterations ? */ 403 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); 404 405 /* start long loop. */ 406 emit_label (L_loop_long); 407 408 /* tmp2 is aligned, OK to load. */ 409 emit_move_insn (tmp2, addr2); 410 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 411 GET_MODE_SIZE (SImode))); 412 413 /* tmp1 is aligned, OK to load. */ 414 emit_move_insn (tmp1, addr1); 415 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 416 GET_MODE_SIZE (SImode))); 417 418 /* Is there a 0 byte ? */ 419 emit_insn (gen_andsi3 (tmp3, tmp2, tmp1)); 420 421 emit_insn (gen_cmpstr_t (tmp0, tmp3)); 422 jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); 423 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 424 425 emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 426 jump = emit_jump_insn (gen_branch_false (L_end_loop_long)); 427 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 428 429 if (TARGET_SH2) 430 emit_insn (gen_dect (lenw, lenw)); 431 else 432 { 433 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); 434 emit_insn (gen_tstsi_t (lenw, lenw)); 435 } 436 437 jump = emit_jump_insn (gen_branch_false (L_loop_long)); 438 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 439 440 int sbytes = bytes % 4; 441 442 /* end loop. Reached max iterations. */ 443 if (sbytes == 0) 444 { 445 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); 446 jump = emit_jump_insn (gen_jump_compact (L_return)); 447 emit_barrier_after (jump); 448 } 449 else 450 { 451 /* Remaining bytes to check. */ 452 453 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); 454 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); 455 456 while (sbytes--) 457 { 458 emit_insn (gen_extendqisi2 (tmp1, addr1)); 459 emit_insn (gen_extendqisi2 (tmp2, addr2)); 460 461 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); 462 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); 463 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 464 465 emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 466 if (flag_delayed_branch) 467 emit_insn (gen_zero_extendqisi2 (tmp2, 468 gen_lowpart (QImode, 469 tmp2))); 470 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); 471 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 472 473 addr1 = adjust_address (addr1, QImode, 474 GET_MODE_SIZE (QImode)); 475 addr2 = adjust_address (addr2, QImode, 476 GET_MODE_SIZE (QImode)); 477 } 478 479 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); 480 emit_barrier_after (jump); 481 } 482 483 emit_label (L_end_loop_long); 484 485 /* Found last word. Restart it byte per byte. */ 486 487 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 488 -GET_MODE_SIZE (SImode))); 489 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 490 -GET_MODE_SIZE (SImode))); 491 492 /* fall thru. */ 493 } 494 495 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); 496 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); 497 498 while (bytes--) 499 { 500 emit_insn (gen_extendqisi2 (tmp1, addr1)); 501 emit_insn (gen_extendqisi2 (tmp2, addr2)); 502 503 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); 504 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); 505 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 506 507 emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 508 if (flag_delayed_branch) 509 emit_insn (gen_zero_extendqisi2 (tmp2, 510 gen_lowpart (QImode, tmp2))); 511 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); 512 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 513 514 addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode)); 515 addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode)); 516 } 517 518 jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); 519 emit_barrier_after (jump); 520 } 521 else 522 { 523 emit_insn (gen_cmpeqsi_t (len, const0_rtx)); 524 emit_move_insn (operands[0], const0_rtx); 525 jump = emit_jump_insn (gen_branch_true (L_return)); 526 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 527 } 528 529 addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); 530 addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); 531 532 emit_label (L_loop_byte); 533 534 emit_insn (gen_extendqisi2 (tmp2, addr2)); 535 emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1)); 536 537 emit_insn (gen_extendqisi2 (tmp1, addr1)); 538 emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1)); 539 540 emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); 541 jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); 542 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 543 544 emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); 545 if (flag_delayed_branch) 546 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); 547 jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); 548 add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); 549 550 if (TARGET_SH2) 551 emit_insn (gen_dect (len, len)); 552 else 553 { 554 emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); 555 emit_insn (gen_tstsi_t (len, len)); 556 } 557 558 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 559 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 560 /* end byte loop. */ 561 562 emit_label (L_end_loop_byte); 563 564 if (! flag_delayed_branch) 565 emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); 566 emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); 567 568 emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); 569 570 emit_label (L_return); 571 572 return true; 573} 574 575/* Emit code to perform a strlen. 576 577 OPERANDS[0] is the destination. 578 OPERANDS[1] is the string. 579 OPERANDS[2] is the char to search. 580 OPERANDS[3] is the alignment. */ 581bool 582sh_expand_strlen (rtx *operands) 583{ 584 rtx addr1 = operands[1]; 585 rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0)); 586 rtx start_addr = gen_reg_rtx (Pmode); 587 rtx tmp0 = gen_reg_rtx (SImode); 588 rtx tmp1 = gen_reg_rtx (SImode); 589 rtx_code_label *L_return = gen_label_rtx (); 590 rtx_code_label *L_loop_byte = gen_label_rtx (); 591 592 rtx_insn *jump; 593 rtx_code_label *L_loop_long = gen_label_rtx (); 594 rtx_code_label *L_end_loop_long = gen_label_rtx (); 595 596 int align = INTVAL (operands[3]); 597 598 emit_move_insn (operands[0], GEN_INT (-1)); 599 600 /* remember start of string. */ 601 emit_move_insn (start_addr, current_addr); 602 603 if (align < 4) 604 { 605 emit_insn (gen_tstsi_t (current_addr, GEN_INT (3))); 606 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 607 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 608 } 609 610 emit_move_insn (tmp0, operands[2]); 611 612 addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0); 613 614 /* start long loop. */ 615 emit_label (L_loop_long); 616 617 /* tmp1 is aligned, OK to load. */ 618 emit_move_insn (tmp1, addr1); 619 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4)); 620 621 /* Is there a 0 byte ? */ 622 emit_insn (gen_cmpstr_t (tmp0, tmp1)); 623 624 jump = emit_jump_insn (gen_branch_false (L_loop_long)); 625 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 626 /* end loop. */ 627 628 emit_label (L_end_loop_long); 629 630 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4)); 631 632 addr1 = adjust_address (addr1, QImode, 0); 633 634 /* unroll remaining bytes. */ 635 for (int i = 0; i < 4; ++i) 636 { 637 emit_insn (gen_extendqisi2 (tmp1, addr1)); 638 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); 639 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); 640 jump = emit_jump_insn (gen_branch_true (L_return)); 641 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 642 } 643 644 emit_barrier_after (jump); 645 646 /* start byte loop. */ 647 emit_label (L_loop_byte); 648 649 emit_insn (gen_extendqisi2 (tmp1, addr1)); 650 emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); 651 652 emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); 653 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 654 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 655 656 /* end loop. */ 657 658 emit_label (L_return); 659 660 emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1))); 661 emit_insn (gen_subsi3 (operands[0], current_addr, start_addr)); 662 663 return true; 664} 665 666/* Emit code to perform a memset. 667 668 OPERANDS[0] is the destination. 669 OPERANDS[1] is the size; 670 OPERANDS[2] is the char to search. 671 OPERANDS[3] is the alignment. */ 672void 673sh_expand_setmem (rtx *operands) 674{ 675 rtx_code_label *L_loop_byte = gen_label_rtx (); 676 rtx_code_label *L_loop_word = gen_label_rtx (); 677 rtx_code_label *L_return = gen_label_rtx (); 678 rtx_insn *jump; 679 rtx dest = copy_rtx (operands[0]); 680 rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0)); 681 rtx val = copy_to_mode_reg (SImode, operands[2]); 682 int align = INTVAL (operands[3]); 683 rtx len = copy_to_mode_reg (SImode, operands[1]); 684 685 if (! CONST_INT_P (operands[1])) 686 return; 687 688 int count = INTVAL (operands[1]); 689 690 if (CONST_INT_P (operands[2]) 691 && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8) 692 { 693 rtx lenw = gen_reg_rtx (SImode); 694 695 if (align < 4) 696 { 697 emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3))); 698 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 699 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 700 } 701 702 /* word count. Do we have iterations ? */ 703 emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); 704 705 dest = adjust_automodify_address (dest, SImode, dest_addr, 0); 706 707 /* start loop. */ 708 emit_label (L_loop_word); 709 710 if (TARGET_SH2) 711 emit_insn (gen_dect (lenw, lenw)); 712 else 713 { 714 emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); 715 emit_insn (gen_tstsi_t (lenw, lenw)); 716 } 717 718 emit_move_insn (dest, val); 719 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, 720 GET_MODE_SIZE (SImode))); 721 722 723 jump = emit_jump_insn (gen_branch_false (L_loop_word)); 724 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 725 count = count % 4; 726 727 dest = adjust_address (dest, QImode, 0); 728 729 val = gen_lowpart (QImode, val); 730 731 while (count--) 732 { 733 emit_move_insn (dest, val); 734 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, 735 GET_MODE_SIZE (QImode))); 736 } 737 738 jump = emit_jump_insn (gen_jump_compact (L_return)); 739 emit_barrier_after (jump); 740 } 741 742 dest = adjust_automodify_address (dest, QImode, dest_addr, 0); 743 744 /* start loop. */ 745 emit_label (L_loop_byte); 746 747 if (TARGET_SH2) 748 emit_insn (gen_dect (len, len)); 749 else 750 { 751 emit_insn (gen_addsi3 (len, len, GEN_INT (-1))); 752 emit_insn (gen_tstsi_t (len, len)); 753 } 754 755 val = gen_lowpart (QImode, val); 756 emit_move_insn (dest, val); 757 emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr, 758 GET_MODE_SIZE (QImode))); 759 760 jump = emit_jump_insn (gen_branch_false (L_loop_byte)); 761 add_int_reg_note (jump, REG_BR_PROB, prob_likely); 762 763 emit_label (L_return); 764} 765