1/* 2 * String handling functions for PowerPC. 3 * 4 * Copyright (C) 1996 Paul Mackerras. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11#include <asm/processor.h> 12#include <asm/cache.h> 13#include <asm/errno.h> 14#include <asm/ppc_asm.h> 15 16#define COPY_16_BYTES \ 17 lwz r7,4(r4); \ 18 lwz r8,8(r4); \ 19 lwz r9,12(r4); \ 20 lwzu r10,16(r4); \ 21 stw r7,4(r6); \ 22 stw r8,8(r6); \ 23 stw r9,12(r6); \ 24 stwu r10,16(r6) 25 26#define COPY_16_BYTES_WITHEX(n) \ 278 ## n ## 0: \ 28 lwz r7,4(r4); \ 298 ## n ## 1: \ 30 lwz r8,8(r4); \ 318 ## n ## 2: \ 32 lwz r9,12(r4); \ 338 ## n ## 3: \ 34 lwzu r10,16(r4); \ 358 ## n ## 4: \ 36 stw r7,4(r6); \ 378 ## n ## 5: \ 38 stw r8,8(r6); \ 398 ## n ## 6: \ 40 stw r9,12(r6); \ 418 ## n ## 7: \ 42 stwu r10,16(r6) 43 44#define COPY_16_BYTES_EXCODE(n) \ 459 ## n ## 0: \ 46 addi r5,r5,-(16 * n); \ 47 b 104f; \ 489 ## n ## 1: \ 49 addi r5,r5,-(16 * n); \ 50 b 105f; \ 51.section __ex_table,"a"; \ 52 .align 2; \ 53 .long 8 ## n ## 0b,9 ## n ## 0b; \ 54 .long 8 ## n ## 1b,9 ## n ## 0b; \ 55 .long 8 ## n ## 2b,9 ## n ## 0b; \ 56 .long 8 ## n ## 3b,9 ## n ## 0b; \ 57 .long 8 ## n ## 4b,9 ## n ## 1b; \ 58 .long 8 ## n ## 5b,9 ## n ## 1b; \ 59 .long 8 ## n ## 6b,9 ## n ## 1b; \ 60 .long 8 ## n ## 7b,9 ## n ## 1b; \ 61 .text 62 63 .text 64 .stabs "arch/ppc/lib/",N_SO,0,0,0f 65 .stabs "string.S",N_SO,0,0,0f 66 67CACHELINE_BYTES = L1_CACHE_BYTES 68LG_CACHELINE_BYTES = L1_CACHE_SHIFT 69CACHELINE_MASK = (L1_CACHE_BYTES-1) 70 71_GLOBAL(strcpy) 72 addi r5,r3,-1 73 addi r4,r4,-1 741: lbzu r0,1(r4) 75 cmpwi 0,r0,0 76 stbu r0,1(r5) 77 bne 1b 78 blr 79 80/* This clears out any unused part of the destination buffer, 81 just as the libc version does. -- paulus */ 82_GLOBAL(strncpy) 83 cmpwi 0,r5,0 84 beqlr 85 mtctr r5 86 addi r6,r3,-1 87 addi r4,r4,-1 881: lbzu r0,1(r4) 89 cmpwi 0,r0,0 90 stbu r0,1(r6) 91 bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ 92 bnelr /* if we didn't hit a null char, we're done */ 93 mfctr r5 94 cmpwi 0,r5,0 /* any space left in destination buffer? */ 95 beqlr /* we know r0 == 0 here */ 962: stbu r0,1(r6) /* clear it out if so */ 97 bdnz 2b 98 blr 99 100_GLOBAL(strcat) 101 addi r5,r3,-1 102 addi r4,r4,-1 1031: lbzu r0,1(r5) 104 cmpwi 0,r0,0 105 bne 1b 106 addi r5,r5,-1 1071: lbzu r0,1(r4) 108 cmpwi 0,r0,0 109 stbu r0,1(r5) 110 bne 1b 111 blr 112 113_GLOBAL(strcmp) 114 addi r5,r3,-1 115 addi r4,r4,-1 1161: lbzu r3,1(r5) 117 cmpwi 1,r3,0 118 lbzu r0,1(r4) 119 subf. r3,r0,r3 120 beqlr 1 121 beq 1b 122 blr 123 124_GLOBAL(strlen) 125 addi r4,r3,-1 1261: lbzu r0,1(r4) 127 cmpwi 0,r0,0 128 bne 1b 129 subf r3,r3,r4 130 blr 131 132/* 133 * Use dcbz on the complete cache lines in the destination 134 * to set them to zero. This requires that the destination 135 * area is cacheable. -- paulus 136 */ 137_GLOBAL(cacheable_memzero) 138 mr r5,r4 139 li r4,0 140 addi r6,r3,-4 141 cmplwi 0,r5,4 142 blt 7f 143 stwu r4,4(r6) 144 beqlr 145 andi. r0,r6,3 146 add r5,r0,r5 147 subf r6,r0,r6 148 clrlwi r7,r6,32-LG_CACHELINE_BYTES 149 add r8,r7,r5 150 srwi r9,r8,LG_CACHELINE_BYTES 151 addic. r9,r9,-1 /* total number of complete cachelines */ 152 ble 2f 153 xori r0,r7,CACHELINE_MASK & ~3 154 srwi. r0,r0,2 155 beq 3f 156 mtctr r0 1574: stwu r4,4(r6) 158 bdnz 4b 1593: mtctr r9 160 li r7,4 161#if !defined(CONFIG_8xx) 16210: dcbz r7,r6 163#else 16410: stw r4, 4(r6) 165 stw r4, 8(r6) 166 stw r4, 12(r6) 167 stw r4, 16(r6) 168#if CACHE_LINE_SIZE >= 32 169 stw r4, 20(r6) 170 stw r4, 24(r6) 171 stw r4, 28(r6) 172 stw r4, 32(r6) 173#endif /* CACHE_LINE_SIZE */ 174#endif 175 addi r6,r6,CACHELINE_BYTES 176 bdnz 10b 177 clrlwi r5,r8,32-LG_CACHELINE_BYTES 178 addi r5,r5,4 1792: srwi r0,r5,2 180 mtctr r0 181 bdz 6f 1821: stwu r4,4(r6) 183 bdnz 1b 1846: andi. r5,r5,3 1857: cmpwi 0,r5,0 186 beqlr 187 mtctr r5 188 addi r6,r6,3 1898: stbu r4,1(r6) 190 bdnz 8b 191 blr 192 193_GLOBAL(memset) 194 rlwimi r4,r4,8,16,23 195 rlwimi r4,r4,16,0,15 196 addi r6,r3,-4 197 cmplwi 0,r5,4 198 blt 7f 199 stwu r4,4(r6) 200 beqlr 201 andi. r0,r6,3 202 add r5,r0,r5 203 subf r6,r0,r6 204 srwi r0,r5,2 205 mtctr r0 206 bdz 6f 2071: stwu r4,4(r6) 208 bdnz 1b 2096: andi. r5,r5,3 2107: cmpwi 0,r5,0 211 beqlr 212 mtctr r5 213 addi r6,r6,3 2148: stbu r4,1(r6) 215 bdnz 8b 216 blr 217 218/* 219 * This version uses dcbz on the complete cache lines in the 220 * destination area to reduce memory traffic. This requires that 221 * the destination area is cacheable. 222 * We only use this version if the source and dest don't overlap. 223 * -- paulus. 224 */ 225_GLOBAL(cacheable_memcpy) 226 add r7,r3,r5 /* test if the src & dst overlap */ 227 add r8,r4,r5 228 cmplw 0,r4,r7 229 cmplw 1,r3,r8 230 crand 0,0,4 /* cr0.lt &= cr1.lt */ 231 blt memcpy /* if regions overlap */ 232 233 addi r4,r4,-4 234 addi r6,r3,-4 235 neg r0,r3 236 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 237 beq 58f 238 239 cmplw 0,r5,r0 /* is this more than total to do? */ 240 blt 63f /* if not much to do */ 241 andi. r8,r0,3 /* get it word-aligned first */ 242 subf r5,r0,r5 243 mtctr r8 244 beq+ 61f 24570: lbz r9,4(r4) /* do some bytes */ 246 stb r9,4(r6) 247 addi r4,r4,1 248 addi r6,r6,1 249 bdnz 70b 25061: srwi. r0,r0,2 251 mtctr r0 252 beq 58f 25372: lwzu r9,4(r4) /* do some words */ 254 stwu r9,4(r6) 255 bdnz 72b 256 25758: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 258 clrlwi r5,r5,32-LG_CACHELINE_BYTES 259 li r11,4 260 mtctr r0 261 beq 63f 26253: 263#if !defined(CONFIG_8xx) 264 dcbz r11,r6 265#endif 266 COPY_16_BYTES 267#if L1_CACHE_BYTES >= 32 268 COPY_16_BYTES 269#if L1_CACHE_BYTES >= 64 270 COPY_16_BYTES 271 COPY_16_BYTES 272#if L1_CACHE_BYTES >= 128 273 COPY_16_BYTES 274 COPY_16_BYTES 275 COPY_16_BYTES 276 COPY_16_BYTES 277#endif 278#endif 279#endif 280 bdnz 53b 281 28263: srwi. r0,r5,2 283 mtctr r0 284 beq 64f 28530: lwzu r0,4(r4) 286 stwu r0,4(r6) 287 bdnz 30b 288 28964: andi. r0,r5,3 290 mtctr r0 291 beq+ 65f 29240: lbz r0,4(r4) 293 stb r0,4(r6) 294 addi r4,r4,1 295 addi r6,r6,1 296 bdnz 40b 29765: blr 298 299_GLOBAL(memmove) 300 cmplw 0,r3,r4 301 bgt backwards_memcpy 302 /* fall through */ 303 304_GLOBAL(memcpy) 305 srwi. r7,r5,3 306 addi r6,r3,-4 307 addi r4,r4,-4 308 beq 2f /* if less than 8 bytes to do */ 309 andi. r0,r6,3 /* get dest word aligned */ 310 mtctr r7 311 bne 5f 3121: lwz r7,4(r4) 313 lwzu r8,8(r4) 314 stw r7,4(r6) 315 stwu r8,8(r6) 316 bdnz 1b 317 andi. r5,r5,7 3182: cmplwi 0,r5,4 319 blt 3f 320 lwzu r0,4(r4) 321 addi r5,r5,-4 322 stwu r0,4(r6) 3233: cmpwi 0,r5,0 324 beqlr 325 mtctr r5 326 addi r4,r4,3 327 addi r6,r6,3 3284: lbzu r0,1(r4) 329 stbu r0,1(r6) 330 bdnz 4b 331 blr 3325: subfic r0,r0,4 333 mtctr r0 3346: lbz r7,4(r4) 335 addi r4,r4,1 336 stb r7,4(r6) 337 addi r6,r6,1 338 bdnz 6b 339 subf r5,r0,r5 340 rlwinm. r7,r5,32-3,3,31 341 beq 2b 342 mtctr r7 343 b 1b 344 345_GLOBAL(backwards_memcpy) 346 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ 347 add r6,r3,r5 348 add r4,r4,r5 349 beq 2f 350 andi. r0,r6,3 351 mtctr r7 352 bne 5f 3531: lwz r7,-4(r4) 354 lwzu r8,-8(r4) 355 stw r7,-4(r6) 356 stwu r8,-8(r6) 357 bdnz 1b 358 andi. r5,r5,7 3592: cmplwi 0,r5,4 360 blt 3f 361 lwzu r0,-4(r4) 362 subi r5,r5,4 363 stwu r0,-4(r6) 3643: cmpwi 0,r5,0 365 beqlr 366 mtctr r5 3674: lbzu r0,-1(r4) 368 stbu r0,-1(r6) 369 bdnz 4b 370 blr 3715: mtctr r0 3726: lbzu r7,-1(r4) 373 stbu r7,-1(r6) 374 bdnz 6b 375 subf r5,r0,r5 376 rlwinm. r7,r5,32-3,3,31 377 beq 2b 378 mtctr r7 379 b 1b 380 381_GLOBAL(memcmp) 382 cmpwi 0,r5,0 383 ble- 2f 384 mtctr r5 385 addi r6,r3,-1 386 addi r4,r4,-1 3871: lbzu r3,1(r6) 388 lbzu r0,1(r4) 389 subf. r3,r0,r3 390 bdnzt 2,1b 391 blr 3922: li r3,0 393 blr 394 395_GLOBAL(memchr) 396 cmpwi 0,r5,0 397 ble- 2f 398 mtctr r5 399 addi r3,r3,-1 4001: lbzu r0,1(r3) 401 cmpw 0,r0,r4 402 bdnzf 2,1b 403 beqlr 4042: li r3,0 405 blr 406 407_GLOBAL(__copy_tofrom_user) 408 addi r4,r4,-4 409 addi r6,r3,-4 410 neg r0,r3 411 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 412 beq 58f 413 414 cmplw 0,r5,r0 /* is this more than total to do? */ 415 blt 63f /* if not much to do */ 416 andi. r8,r0,3 /* get it word-aligned first */ 417 mtctr r8 418 beq+ 61f 41970: lbz r9,4(r4) /* do some bytes */ 42071: stb r9,4(r6) 421 addi r4,r4,1 422 addi r6,r6,1 423 bdnz 70b 42461: subf r5,r0,r5 425 srwi. r0,r0,2 426 mtctr r0 427 beq 58f 42872: lwzu r9,4(r4) /* do some words */ 42973: stwu r9,4(r6) 430 bdnz 72b 431 432 .section __ex_table,"a" 433 .align 2 434 .long 70b,100f 435 .long 71b,101f 436 .long 72b,102f 437 .long 73b,103f 438 .text 439 44058: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 441 clrlwi r5,r5,32-LG_CACHELINE_BYTES 442 li r11,4 443 beq 63f 444 445#ifdef CONFIG_8xx 446 /* Don't use prefetch on 8xx */ 447 mtctr r0 448 li r0,0 44953: COPY_16_BYTES_WITHEX(0) 450 bdnz 53b 451 452#else /* not CONFIG_8xx */ 453 /* Here we decide how far ahead to prefetch the source */ 454 li r3,4 455 cmpwi r0,1 456 li r7,0 457 ble 114f 458 li r7,1 459#if MAX_COPY_PREFETCH > 1 460 /* Heuristically, for large transfers we prefetch 461 MAX_COPY_PREFETCH cachelines ahead. For small transfers 462 we prefetch 1 cacheline ahead. */ 463 cmpwi r0,MAX_COPY_PREFETCH 464 ble 112f 465 li r7,MAX_COPY_PREFETCH 466112: mtctr r7 467111: dcbt r3,r4 468 addi r3,r3,CACHELINE_BYTES 469 bdnz 111b 470#else 471 dcbt r3,r4 472 addi r3,r3,CACHELINE_BYTES 473#endif /* MAX_COPY_PREFETCH > 1 */ 474 475114: subf r8,r7,r0 476 mr r0,r7 477 mtctr r8 478 47953: dcbt r3,r4 48054: dcbz r11,r6 481 .section __ex_table,"a" 482 .align 2 483 .long 54b,105f 484 .text 485/* the main body of the cacheline loop */ 486 COPY_16_BYTES_WITHEX(0) 487#if L1_CACHE_BYTES >= 32 488 COPY_16_BYTES_WITHEX(1) 489#if L1_CACHE_BYTES >= 64 490 COPY_16_BYTES_WITHEX(2) 491 COPY_16_BYTES_WITHEX(3) 492#if L1_CACHE_BYTES >= 128 493 COPY_16_BYTES_WITHEX(4) 494 COPY_16_BYTES_WITHEX(5) 495 COPY_16_BYTES_WITHEX(6) 496 COPY_16_BYTES_WITHEX(7) 497#endif 498#endif 499#endif 500 bdnz 53b 501 cmpwi r0,0 502 li r3,4 503 li r7,0 504 bne 114b 505#endif /* CONFIG_8xx */ 506 50763: srwi. r0,r5,2 508 mtctr r0 509 beq 64f 51030: lwzu r0,4(r4) 51131: stwu r0,4(r6) 512 bdnz 30b 513 51464: andi. r0,r5,3 515 mtctr r0 516 beq+ 65f 51740: lbz r0,4(r4) 51841: stb r0,4(r6) 519 addi r4,r4,1 520 addi r6,r6,1 521 bdnz 40b 52265: li r3,0 523 blr 524 525/* read fault, initial single-byte copy */ 526100: li r9,0 527 b 90f 528/* write fault, initial single-byte copy */ 529101: li r9,1 53090: subf r5,r8,r5 531 li r3,0 532 b 99f 533/* read fault, initial word copy */ 534102: li r9,0 535 b 91f 536/* write fault, initial word copy */ 537103: li r9,1 53891: li r3,2 539 b 99f 540 541/* 542 * this stuff handles faults in the cacheline loop and branches to either 543 * 104f (if in read part) or 105f (if in write part), after updating r5 544 */ 545 COPY_16_BYTES_EXCODE(0) 546#if L1_CACHE_BYTES >= 32 547 COPY_16_BYTES_EXCODE(1) 548#if L1_CACHE_BYTES >= 64 549 COPY_16_BYTES_EXCODE(2) 550 COPY_16_BYTES_EXCODE(3) 551#if L1_CACHE_BYTES >= 128 552 COPY_16_BYTES_EXCODE(4) 553 COPY_16_BYTES_EXCODE(5) 554 COPY_16_BYTES_EXCODE(6) 555 COPY_16_BYTES_EXCODE(7) 556#endif 557#endif 558#endif 559 560/* read fault in cacheline loop */ 561104: li r9,0 562 b 92f 563/* fault on dcbz (effectively a write fault) */ 564/* or write fault in cacheline loop */ 565105: li r9,1 56692: li r3,LG_CACHELINE_BYTES 567 mfctr r8 568 add r0,r0,r8 569 b 106f 570/* read fault in final word loop */ 571108: li r9,0 572 b 93f 573/* write fault in final word loop */ 574109: li r9,1 57593: andi. r5,r5,3 576 li r3,2 577 b 99f 578/* read fault in final byte loop */ 579110: li r9,0 580 b 94f 581/* write fault in final byte loop */ 582111: li r9,1 58394: li r5,0 584 li r3,0 585/* 586 * At this stage the number of bytes not copied is 587 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. 588 */ 58999: mfctr r0 590106: slw r3,r0,r3 591 add. r3,r3,r5 592 beq 120f /* shouldn't happen */ 593 cmpwi 0,r9,0 594 bne 120f 595/* for a read fault, first try to continue the copy one byte at a time */ 596 mtctr r3 597130: lbz r0,4(r4) 598131: stb r0,4(r6) 599 addi r4,r4,1 600 addi r6,r6,1 601 bdnz 130b 602/* then clear out the destination: r3 bytes starting at 4(r6) */ 603132: mfctr r3 604 srwi. r0,r3,2 605 li r9,0 606 mtctr r0 607 beq 113f 608112: stwu r9,4(r6) 609 bdnz 112b 610113: andi. r0,r3,3 611 mtctr r0 612 beq 120f 613114: stb r9,4(r6) 614 addi r6,r6,1 615 bdnz 114b 616120: blr 617 618 .section __ex_table,"a" 619 .align 2 620 .long 30b,108b 621 .long 31b,109b 622 .long 40b,110b 623 .long 41b,111b 624 .long 130b,132b 625 .long 131b,120b 626 .long 112b,120b 627 .long 114b,120b 628 .text 629 630_GLOBAL(__clear_user) 631 addi r6,r3,-4 632 li r3,0 633 li r5,0 634 cmplwi 0,r4,4 635 blt 7f 636 /* clear a single word */ 63711: stwu r5,4(r6) 638 beqlr 639 /* clear word sized chunks */ 640 andi. r0,r6,3 641 add r4,r0,r4 642 subf r6,r0,r6 643 srwi r0,r4,2 644 andi. r4,r4,3 645 mtctr r0 646 bdz 7f 6471: stwu r5,4(r6) 648 bdnz 1b 649 /* clear byte sized chunks */ 6507: cmpwi 0,r4,0 651 beqlr 652 mtctr r4 653 addi r6,r6,3 6548: stbu r5,1(r6) 655 bdnz 8b 656 blr 65790: mr r3,r4 658 blr 65991: mfctr r3 660 slwi r3,r3,2 661 add r3,r3,r4 662 blr 66392: mfctr r3 664 blr 665 666 .section __ex_table,"a" 667 .align 2 668 .long 11b,90b 669 .long 1b,91b 670 .long 8b,92b 671 .text 672 673_GLOBAL(__strncpy_from_user) 674 addi r6,r3,-1 675 addi r4,r4,-1 676 cmpwi 0,r5,0 677 beq 2f 678 mtctr r5 6791: lbzu r0,1(r4) 680 cmpwi 0,r0,0 681 stbu r0,1(r6) 682 bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ 683 beq 3f 6842: addi r6,r6,1 6853: subf r3,r3,r6 686 blr 68799: li r3,-EFAULT 688 blr 689 690 .section __ex_table,"a" 691 .align 2 692 .long 1b,99b 693 .text 694 695/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */ 696_GLOBAL(__strnlen_user) 697 addi r7,r3,-1 698 subf r6,r7,r5 /* top+1 - str */ 699 cmplw 0,r4,r6 700 bge 0f 701 mr r6,r4 7020: mtctr r6 /* ctr = min(len, top - str) */ 7031: lbzu r0,1(r7) /* get next byte */ 704 cmpwi 0,r0,0 705 bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ 706 addi r7,r7,1 707 subf r3,r3,r7 /* number of bytes we have looked at */ 708 beqlr /* return if we found a 0 byte */ 709 cmpw 0,r3,r4 /* did we look at all len bytes? */ 710 blt 99f /* if not, must have hit top */ 711 addi r3,r4,1 /* return len + 1 to indicate no null found */ 712 blr 71399: li r3,0 /* bad address, return 0 */ 714 blr 715 716 .section __ex_table,"a" 717 .align 2 718 .long 1b,99b 719