bcopyinout_xscale.S revision 275767
1/* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */ 2 3/*- 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD: stable/10/sys/arm/arm/bcopyinout_xscale.S 275767 2014-12-14 16:28:53Z andrew $"); 40 41 .syntax unified 42 .text 43 .align 0 44 45#ifdef _ARM_ARCH_6 46#define GET_PCB(tmp) \ 47 mrc p15, 0, tmp, c13, c0, 4; \ 48 add tmp, tmp, #(TD_PCB) 49#else 50.Lcurpcb: 51 .word _C_LABEL(__pcpu) + PC_CURPCB 52#define GET_PCB(tmp) \ 53 ldr tmp, .Lcurpcb 54#endif 55 56/* 57 * r0 = user space address 58 * r1 = kernel space address 59 * r2 = length 60 * 61 * Copies bytes from user space to kernel space 62 */ 63ENTRY(copyin) 64 cmp r2, #0x00 65 movle r0, #0x00 66 movle pc, lr /* Bail early if length is <= 0 */ 67 68 ldr r3, .L_arm_memcpy 69 ldr r3, [r3] 70 cmp r3, #0 71 beq .Lnormal 72 ldr r3, .L_min_memcpy_size 73 ldr r3, [r3] 74 cmp r2, r3 75 blt .Lnormal 76 stmfd sp!, {r0-r2, r4, lr} 77 mov r3, r0 78 mov r0, r1 79 mov r1, r3 80 mov r3, #2 /* SRC_IS_USER */ 81 ldr r4, .L_arm_memcpy 82 mov lr, pc 83 ldr pc, [r4] 84 cmp r0, #0 85 ldmfd sp!, {r0-r2, r4, lr} 86 moveq r0, #0 87 RETeq 88 89.Lnormal: 90 stmfd sp!, {r10-r11, lr} 91 92 GET_PCB(r10) 93 ldr r10, [r10] 94 95 mov r3, #0x00 96 adr ip, .Lcopyin_fault 97 ldr r11, [r10, #PCB_ONFAULT] 98 str ip, [r10, #PCB_ONFAULT] 99 bl .Lcopyin_guts 100 str r11, [r10, #PCB_ONFAULT] 101 mov r0, #0x00 102 ldmfd sp!, {r10-r11, pc} 103 104.Lcopyin_fault: 105 ldr r0, =EFAULT 106 str r11, [r10, #PCB_ONFAULT] 107 cmp r3, #0x00 108 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 109 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 110 ldmfd sp!, {r10-r11, pc} 111 112.Lcopyin_guts: 113 pld [r0] 114 /* Word-align the destination buffer */ 115 ands ip, r1, #0x03 /* Already word aligned? */ 116 beq .Lcopyin_wordaligned /* Yup */ 117 rsb ip, ip, #0x04 118 cmp r2, ip /* Enough bytes left to align it? */ 119 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */ 120 sub r2, r2, ip 121 rsbs ip, ip, #0x03 122 addne pc, pc, ip, lsl #3 123 nop 124 ldrbt ip, [r0], #0x01 125 strb ip, [r1], #0x01 126 ldrbt ip, [r0], #0x01 127 strb ip, [r1], #0x01 128 ldrbt ip, [r0], #0x01 129 strb ip, [r1], #0x01 130 cmp r2, #0x00 /* All done? */ 131 RETeq 132 133 /* Destination buffer is now word aligned */ 134.Lcopyin_wordaligned: 135 ands ip, r0, #0x03 /* Is src also word-aligned? */ 136 bne .Lcopyin_bad_align /* Nope. Things just got bad */ 137 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 138 blt .Lcopyin_w_less_than8 139 140 /* Quad-align the destination buffer */ 141 tst r1, #0x07 /* Already quad aligned? */ 142 ldrtne ip, [r0], #0x04 143 strne ip, [r1], #0x04 144 subne r2, r2, #0x04 145 stmfd sp!, {r4-r9} /* Free up some registers */ 146 mov r3, #-1 /* Signal restore r4-r9 */ 147 148 /* Destination buffer quad aligned, source is word aligned */ 149 subs r2, r2, #0x80 150 blt .Lcopyin_w_lessthan128 151 152 /* Copy 128 bytes at a time */ 153.Lcopyin_w_loop128: 154 ldrt r4, [r0], #0x04 /* LD:00-03 */ 155 ldrt r5, [r0], #0x04 /* LD:04-07 */ 156 pld [r0, #0x18] /* Prefetch 0x20 */ 157 ldrt r6, [r0], #0x04 /* LD:08-0b */ 158 ldrt r7, [r0], #0x04 /* LD:0c-0f */ 159 ldrt r8, [r0], #0x04 /* LD:10-13 */ 160 ldrt r9, [r0], #0x04 /* LD:14-17 */ 161 strd r4, [r1], #0x08 /* ST:00-07 */ 162 ldrt r4, [r0], #0x04 /* LD:18-1b */ 163 ldrt r5, [r0], #0x04 /* LD:1c-1f */ 164 strd r6, [r1], #0x08 /* ST:08-0f */ 165 ldrt r6, [r0], #0x04 /* LD:20-23 */ 166 ldrt r7, [r0], #0x04 /* LD:24-27 */ 167 pld [r0, #0x18] /* Prefetch 0x40 */ 168 strd r8, [r1], #0x08 /* ST:10-17 */ 169 ldrt r8, [r0], #0x04 /* LD:28-2b */ 170 ldrt r9, [r0], #0x04 /* LD:2c-2f */ 171 strd r4, [r1], #0x08 /* ST:18-1f */ 172 ldrt r4, [r0], #0x04 /* LD:30-33 */ 173 ldrt r5, [r0], #0x04 /* LD:34-37 */ 174 strd r6, [r1], #0x08 /* ST:20-27 */ 175 ldrt r6, [r0], #0x04 /* LD:38-3b */ 176 ldrt r7, [r0], #0x04 /* LD:3c-3f */ 177 strd r8, [r1], #0x08 /* ST:28-2f */ 178 ldrt r8, [r0], #0x04 /* LD:40-43 */ 179 ldrt r9, [r0], #0x04 /* LD:44-47 */ 180 pld [r0, #0x18] /* Prefetch 0x60 */ 181 strd r4, [r1], #0x08 /* ST:30-37 */ 182 ldrt r4, [r0], #0x04 /* LD:48-4b */ 183 ldrt r5, [r0], #0x04 /* LD:4c-4f */ 184 strd r6, [r1], #0x08 /* ST:38-3f */ 185 ldrt r6, [r0], #0x04 /* LD:50-53 */ 186 ldrt r7, [r0], #0x04 /* LD:54-57 */ 187 strd r8, [r1], #0x08 /* ST:40-47 */ 188 ldrt r8, [r0], #0x04 /* LD:58-5b */ 189 ldrt r9, [r0], #0x04 /* LD:5c-5f */ 190 strd r4, [r1], #0x08 /* ST:48-4f */ 191 ldrt r4, [r0], #0x04 /* LD:60-63 */ 192 ldrt r5, [r0], #0x04 /* LD:64-67 */ 193 pld [r0, #0x18] /* Prefetch 0x80 */ 194 strd r6, [r1], #0x08 /* ST:50-57 */ 195 ldrt r6, [r0], #0x04 /* LD:68-6b */ 196 ldrt r7, [r0], #0x04 /* LD:6c-6f */ 197 strd r8, [r1], #0x08 /* ST:58-5f */ 198 ldrt r8, [r0], #0x04 /* LD:70-73 */ 199 ldrt r9, [r0], #0x04 /* LD:74-77 */ 200 strd r4, [r1], #0x08 /* ST:60-67 */ 201 ldrt r4, [r0], #0x04 /* LD:78-7b */ 202 ldrt r5, [r0], #0x04 /* LD:7c-7f */ 203 strd r6, [r1], #0x08 /* ST:68-6f */ 204 strd r8, [r1], #0x08 /* ST:70-77 */ 205 subs r2, r2, #0x80 206 strd r4, [r1], #0x08 /* ST:78-7f */ 207 bge .Lcopyin_w_loop128 208 209.Lcopyin_w_lessthan128: 210 adds r2, r2, #0x80 /* Adjust for extra sub */ 211 ldmfdeq sp!, {r4-r9} 212 RETeq 213 subs r2, r2, #0x20 214 blt .Lcopyin_w_lessthan32 215 216 /* Copy 32 bytes at a time */ 217.Lcopyin_w_loop32: 218 ldrt r4, [r0], #0x04 219 ldrt r5, [r0], #0x04 220 pld [r0, #0x18] 221 ldrt r6, [r0], #0x04 222 ldrt r7, [r0], #0x04 223 ldrt r8, [r0], #0x04 224 ldrt r9, [r0], #0x04 225 strd r4, [r1], #0x08 226 ldrt r4, [r0], #0x04 227 ldrt r5, [r0], #0x04 228 strd r6, [r1], #0x08 229 strd r8, [r1], #0x08 230 subs r2, r2, #0x20 231 strd r4, [r1], #0x08 232 bge .Lcopyin_w_loop32 233 234.Lcopyin_w_lessthan32: 235 adds r2, r2, #0x20 /* Adjust for extra sub */ 236 ldmfdeq sp!, {r4-r9} 237 RETeq /* Return now if done */ 238 239 and r4, r2, #0x18 240 rsb r5, r4, #0x18 241 subs r2, r2, r4 242 add pc, pc, r5, lsl #1 243 nop 244 245 /* At least 24 bytes remaining */ 246 ldrt r4, [r0], #0x04 247 ldrt r5, [r0], #0x04 248 nop 249 strd r4, [r1], #0x08 250 251 /* At least 16 bytes remaining */ 252 ldrt r4, [r0], #0x04 253 ldrt r5, [r0], #0x04 254 nop 255 strd r4, [r1], #0x08 256 257 /* At least 8 bytes remaining */ 258 ldrt r4, [r0], #0x04 259 ldrt r5, [r0], #0x04 260 nop 261 strd r4, [r1], #0x08 262 263 /* Less than 8 bytes remaining */ 264 ldmfd sp!, {r4-r9} 265 RETeq /* Return now if done */ 266 mov r3, #0x00 267 268.Lcopyin_w_less_than8: 269 subs r2, r2, #0x04 270 ldrtge ip, [r0], #0x04 271 strge ip, [r1], #0x04 272 RETeq /* Return now if done */ 273 addlt r2, r2, #0x04 274 ldrbt ip, [r0], #0x01 275 cmp r2, #0x02 276 ldrbtge r2, [r0], #0x01 277 strb ip, [r1], #0x01 278 ldrbtgt ip, [r0] 279 strbge r2, [r1], #0x01 280 strbgt ip, [r1] 281 RET 282 283/* 284 * At this point, it has not been possible to word align both buffers. 285 * The destination buffer (r1) is word aligned, but the source buffer 286 * (r0) is not. 287 */ 288.Lcopyin_bad_align: 289 stmfd sp!, {r4-r7} 290 mov r3, #0x01 291 bic r0, r0, #0x03 292 cmp ip, #2 293 ldrt ip, [r0], #0x04 294 bgt .Lcopyin_bad3 295 beq .Lcopyin_bad2 296 b .Lcopyin_bad1 297 298.Lcopyin_bad1_loop16: 299#ifdef __ARMEB__ 300 mov r4, ip, lsl #8 301#else 302 mov r4, ip, lsr #8 303#endif 304 ldrt r5, [r0], #0x04 305 pld [r0, #0x018] 306 ldrt r6, [r0], #0x04 307 ldrt r7, [r0], #0x04 308 ldrt ip, [r0], #0x04 309#ifdef __ARMEB__ 310 orr r4, r4, r5, lsr #24 311 mov r5, r5, lsl #8 312 orr r5, r5, r6, lsr #24 313 mov r6, r6, lsl #8 314 orr r6, r6, r7, lsr #24 315 mov r7, r7, lsl #8 316 orr r7, r7, ip, lsr #24 317#else 318 orr r4, r4, r5, lsl #24 319 mov r5, r5, lsr #8 320 orr r5, r5, r6, lsl #24 321 mov r6, r6, lsr #8 322 orr r6, r6, r7, lsl #24 323 mov r7, r7, lsr #8 324 orr r7, r7, ip, lsl #24 325#endif 326 str r4, [r1], #0x04 327 str r5, [r1], #0x04 328 str r6, [r1], #0x04 329 str r7, [r1], #0x04 330.Lcopyin_bad1: 331 subs r2, r2, #0x10 332 bge .Lcopyin_bad1_loop16 333 334 adds r2, r2, #0x10 335 ldmfdeq sp!, {r4-r7} 336 RETeq /* Return now if done */ 337 subs r2, r2, #0x04 338 sublt r0, r0, #0x03 339 blt .Lcopyin_l4 340 341.Lcopyin_bad1_loop4: 342#ifdef __ARMEB__ 343 mov r4, ip, lsl #8 344#else 345 mov r4, ip, lsr #8 346#endif 347 ldrt ip, [r0], #0x04 348 subs r2, r2, #0x04 349#ifdef __ARMEB__ 350 orr r4, r4, ip, lsr #24 351#else 352 orr r4, r4, ip, lsl #24 353#endif 354 str r4, [r1], #0x04 355 bge .Lcopyin_bad1_loop4 356 sub r0, r0, #0x03 357 b .Lcopyin_l4 358 359.Lcopyin_bad2_loop16: 360#ifdef __ARMEB__ 361 mov r4, ip, lsl #16 362#else 363 mov r4, ip, lsr #16 364#endif 365 ldrt r5, [r0], #0x04 366 pld [r0, #0x018] 367 ldrt r6, [r0], #0x04 368 ldrt r7, [r0], #0x04 369 ldrt ip, [r0], #0x04 370#ifdef __ARMEB__ 371 orr r4, r4, r5, lsr #16 372 mov r5, r5, lsl #16 373 orr r5, r5, r6, lsr #16 374 mov r6, r6, lsl #16 375 orr r6, r6, r7, lsr #16 376 mov r7, r7, lsl #16 377 orr r7, r7, ip, lsr #16 378#else 379 orr r4, r4, r5, lsl #16 380 mov r5, r5, lsr #16 381 orr r5, r5, r6, lsl #16 382 mov r6, r6, lsr #16 383 orr r6, r6, r7, lsl #16 384 mov r7, r7, lsr #16 385 orr r7, r7, ip, lsl #16 386#endif 387 str r4, [r1], #0x04 388 str r5, [r1], #0x04 389 str r6, [r1], #0x04 390 str r7, [r1], #0x04 391.Lcopyin_bad2: 392 subs r2, r2, #0x10 393 bge .Lcopyin_bad2_loop16 394 395 adds r2, r2, #0x10 396 ldmfdeq sp!, {r4-r7} 397 RETeq /* Return now if done */ 398 subs r2, r2, #0x04 399 sublt r0, r0, #0x02 400 blt .Lcopyin_l4 401 402.Lcopyin_bad2_loop4: 403#ifdef __ARMEB__ 404 mov r4, ip, lsl #16 405#else 406 mov r4, ip, lsr #16 407#endif 408 ldrt ip, [r0], #0x04 409 subs r2, r2, #0x04 410#ifdef __ARMEB__ 411 orr r4, r4, ip, lsr #16 412#else 413 orr r4, r4, ip, lsl #16 414#endif 415 str r4, [r1], #0x04 416 bge .Lcopyin_bad2_loop4 417 sub r0, r0, #0x02 418 b .Lcopyin_l4 419 420.Lcopyin_bad3_loop16: 421#ifdef __ARMEB__ 422 mov r4, ip, lsl #24 423#else 424 mov r4, ip, lsr #24 425#endif 426 ldrt r5, [r0], #0x04 427 pld [r0, #0x018] 428 ldrt r6, [r0], #0x04 429 ldrt r7, [r0], #0x04 430 ldrt ip, [r0], #0x04 431#ifdef __ARMEB__ 432 orr r4, r4, r5, lsr #8 433 mov r5, r5, lsl #24 434 orr r5, r5, r6, lsr #8 435 mov r6, r6, lsl #24 436 orr r6, r6, r7, lsr #8 437 mov r7, r7, lsl #24 438 orr r7, r7, ip, lsr #8 439#else 440 orr r4, r4, r5, lsl #8 441 mov r5, r5, lsr #24 442 orr r5, r5, r6, lsl #8 443 mov r6, r6, lsr #24 444 orr r6, r6, r7, lsl #8 445 mov r7, r7, lsr #24 446 orr r7, r7, ip, lsl #8 447#endif 448 str r4, [r1], #0x04 449 str r5, [r1], #0x04 450 str r6, [r1], #0x04 451 str r7, [r1], #0x04 452.Lcopyin_bad3: 453 subs r2, r2, #0x10 454 bge .Lcopyin_bad3_loop16 455 456 adds r2, r2, #0x10 457 ldmfdeq sp!, {r4-r7} 458 RETeq /* Return now if done */ 459 subs r2, r2, #0x04 460 sublt r0, r0, #0x01 461 blt .Lcopyin_l4 462 463.Lcopyin_bad3_loop4: 464#ifdef __ARMEB__ 465 mov r4, ip, lsl #24 466#else 467 mov r4, ip, lsr #24 468#endif 469 ldrt ip, [r0], #0x04 470 subs r2, r2, #0x04 471#ifdef __ARMEB__ 472 orr r4, r4, ip, lsr #8 473#else 474 orr r4, r4, ip, lsl #8 475#endif 476 str r4, [r1], #0x04 477 bge .Lcopyin_bad3_loop4 478 sub r0, r0, #0x01 479 480.Lcopyin_l4: 481 ldmfd sp!, {r4-r7} 482 mov r3, #0x00 483 adds r2, r2, #0x04 484 RETeq 485.Lcopyin_l4_2: 486 rsbs r2, r2, #0x03 487 addne pc, pc, r2, lsl #3 488 nop 489 ldrbt ip, [r0], #0x01 490 strb ip, [r1], #0x01 491 ldrbt ip, [r0], #0x01 492 strb ip, [r1], #0x01 493 ldrbt ip, [r0] 494 strb ip, [r1] 495 RET 496END(copyin) 497 498/* 499 * r0 = kernel space address 500 * r1 = user space address 501 * r2 = length 502 * 503 * Copies bytes from kernel space to user space 504 */ 505ENTRY(copyout) 506 cmp r2, #0x00 507 movle r0, #0x00 508 movle pc, lr /* Bail early if length is <= 0 */ 509 510 ldr r3, .L_arm_memcpy 511 ldr r3, [r3] 512 cmp r3, #0 513 beq .Lnormale 514 ldr r3, .L_min_memcpy_size 515 ldr r3, [r3] 516 cmp r2, r3 517 blt .Lnormale 518 stmfd sp!, {r0-r2, r4, lr} 519 mov r3, r0 520 mov r0, r1 521 mov r1, r3 522 mov r3, #1 /* DST_IS_USER */ 523 ldr r4, .L_arm_memcpy 524 mov lr, pc 525 ldr pc, [r4] 526 cmp r0, #0 527 ldmfd sp!, {r0-r2, r4, lr} 528 moveq r0, #0 529 RETeq 530 531.Lnormale: 532 stmfd sp!, {r10-r11, lr} 533 534 GET_PCB(r10) 535 ldr r10, [r10] 536 537 mov r3, #0x00 538 adr ip, .Lcopyout_fault 539 ldr r11, [r10, #PCB_ONFAULT] 540 str ip, [r10, #PCB_ONFAULT] 541 bl .Lcopyout_guts 542 str r11, [r10, #PCB_ONFAULT] 543 mov r0, #0x00 544 ldmfd sp!, {r10-r11, pc} 545 546.Lcopyout_fault: 547 ldr r0, =EFAULT 548 str r11, [r10, #PCB_ONFAULT] 549 cmp r3, #0x00 550 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 551 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 552 ldmfd sp!, {r10-r11, pc} 553 554.Lcopyout_guts: 555 pld [r0] 556 /* Word-align the destination buffer */ 557 ands ip, r1, #0x03 /* Already word aligned? */ 558 beq .Lcopyout_wordaligned /* Yup */ 559 rsb ip, ip, #0x04 560 cmp r2, ip /* Enough bytes left to align it? */ 561 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */ 562 sub r2, r2, ip 563 rsbs ip, ip, #0x03 564 addne pc, pc, ip, lsl #3 565 nop 566 ldrb ip, [r0], #0x01 567 strbt ip, [r1], #0x01 568 ldrb ip, [r0], #0x01 569 strbt ip, [r1], #0x01 570 ldrb ip, [r0], #0x01 571 strbt ip, [r1], #0x01 572 cmp r2, #0x00 /* All done? */ 573 RETeq 574 575 /* Destination buffer is now word aligned */ 576.Lcopyout_wordaligned: 577 ands ip, r0, #0x03 /* Is src also word-aligned? */ 578 bne .Lcopyout_bad_align /* Nope. Things just got bad */ 579 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 580 blt .Lcopyout_w_less_than8 581 582 /* Quad-align the destination buffer */ 583 tst r0, #0x07 /* Already quad aligned? */ 584 ldrne ip, [r0], #0x04 585 subne r2, r2, #0x04 586 strtne ip, [r1], #0x04 587 588 stmfd sp!, {r4-r9} /* Free up some registers */ 589 mov r3, #-1 /* Signal restore r4-r9 */ 590 591 /* Destination buffer word aligned, source is quad aligned */ 592 subs r2, r2, #0x80 593 blt .Lcopyout_w_lessthan128 594 595 /* Copy 128 bytes at a time */ 596.Lcopyout_w_loop128: 597 ldrd r4, [r0], #0x08 /* LD:00-07 */ 598 pld [r0, #0x18] /* Prefetch 0x20 */ 599 ldrd r6, [r0], #0x08 /* LD:08-0f */ 600 ldrd r8, [r0], #0x08 /* LD:10-17 */ 601 strt r4, [r1], #0x04 /* ST:00-03 */ 602 strt r5, [r1], #0x04 /* ST:04-07 */ 603 ldrd r4, [r0], #0x08 /* LD:18-1f */ 604 strt r6, [r1], #0x04 /* ST:08-0b */ 605 strt r7, [r1], #0x04 /* ST:0c-0f */ 606 ldrd r6, [r0], #0x08 /* LD:20-27 */ 607 pld [r0, #0x18] /* Prefetch 0x40 */ 608 strt r8, [r1], #0x04 /* ST:10-13 */ 609 strt r9, [r1], #0x04 /* ST:14-17 */ 610 ldrd r8, [r0], #0x08 /* LD:28-2f */ 611 strt r4, [r1], #0x04 /* ST:18-1b */ 612 strt r5, [r1], #0x04 /* ST:1c-1f */ 613 ldrd r4, [r0], #0x08 /* LD:30-37 */ 614 strt r6, [r1], #0x04 /* ST:20-23 */ 615 strt r7, [r1], #0x04 /* ST:24-27 */ 616 ldrd r6, [r0], #0x08 /* LD:38-3f */ 617 strt r8, [r1], #0x04 /* ST:28-2b */ 618 strt r9, [r1], #0x04 /* ST:2c-2f */ 619 ldrd r8, [r0], #0x08 /* LD:40-47 */ 620 pld [r0, #0x18] /* Prefetch 0x60 */ 621 strt r4, [r1], #0x04 /* ST:30-33 */ 622 strt r5, [r1], #0x04 /* ST:34-37 */ 623 ldrd r4, [r0], #0x08 /* LD:48-4f */ 624 strt r6, [r1], #0x04 /* ST:38-3b */ 625 strt r7, [r1], #0x04 /* ST:3c-3f */ 626 ldrd r6, [r0], #0x08 /* LD:50-57 */ 627 strt r8, [r1], #0x04 /* ST:40-43 */ 628 strt r9, [r1], #0x04 /* ST:44-47 */ 629 ldrd r8, [r0], #0x08 /* LD:58-4f */ 630 strt r4, [r1], #0x04 /* ST:48-4b */ 631 strt r5, [r1], #0x04 /* ST:4c-4f */ 632 ldrd r4, [r0], #0x08 /* LD:60-67 */ 633 pld [r0, #0x18] /* Prefetch 0x80 */ 634 strt r6, [r1], #0x04 /* ST:50-53 */ 635 strt r7, [r1], #0x04 /* ST:54-57 */ 636 ldrd r6, [r0], #0x08 /* LD:68-6f */ 637 strt r8, [r1], #0x04 /* ST:58-5b */ 638 strt r9, [r1], #0x04 /* ST:5c-5f */ 639 ldrd r8, [r0], #0x08 /* LD:70-77 */ 640 strt r4, [r1], #0x04 /* ST:60-63 */ 641 strt r5, [r1], #0x04 /* ST:64-67 */ 642 ldrd r4, [r0], #0x08 /* LD:78-7f */ 643 strt r6, [r1], #0x04 /* ST:68-6b */ 644 strt r7, [r1], #0x04 /* ST:6c-6f */ 645 strt r8, [r1], #0x04 /* ST:70-73 */ 646 strt r9, [r1], #0x04 /* ST:74-77 */ 647 subs r2, r2, #0x80 648 strt r4, [r1], #0x04 /* ST:78-7b */ 649 strt r5, [r1], #0x04 /* ST:7c-7f */ 650 bge .Lcopyout_w_loop128 651 652.Lcopyout_w_lessthan128: 653 adds r2, r2, #0x80 /* Adjust for extra sub */ 654 ldmfdeq sp!, {r4-r9} 655 RETeq /* Return now if done */ 656 subs r2, r2, #0x20 657 blt .Lcopyout_w_lessthan32 658 659 /* Copy 32 bytes at a time */ 660.Lcopyout_w_loop32: 661 ldrd r4, [r0], #0x08 662 pld [r0, #0x18] 663 ldrd r6, [r0], #0x08 664 ldrd r8, [r0], #0x08 665 strt r4, [r1], #0x04 666 strt r5, [r1], #0x04 667 ldrd r4, [r0], #0x08 668 strt r6, [r1], #0x04 669 strt r7, [r1], #0x04 670 strt r8, [r1], #0x04 671 strt r9, [r1], #0x04 672 subs r2, r2, #0x20 673 strt r4, [r1], #0x04 674 strt r5, [r1], #0x04 675 bge .Lcopyout_w_loop32 676 677.Lcopyout_w_lessthan32: 678 adds r2, r2, #0x20 /* Adjust for extra sub */ 679 ldmfdeq sp!, {r4-r9} 680 RETeq /* Return now if done */ 681 682 and r4, r2, #0x18 683 rsb r5, r4, #0x18 684 subs r2, r2, r4 685 add pc, pc, r5, lsl #1 686 nop 687 688 /* At least 24 bytes remaining */ 689 ldrd r4, [r0], #0x08 690 strt r4, [r1], #0x04 691 strt r5, [r1], #0x04 692 nop 693 694 /* At least 16 bytes remaining */ 695 ldrd r4, [r0], #0x08 696 strt r4, [r1], #0x04 697 strt r5, [r1], #0x04 698 nop 699 700 /* At least 8 bytes remaining */ 701 ldrd r4, [r0], #0x08 702 strt r4, [r1], #0x04 703 strt r5, [r1], #0x04 704 nop 705 706 /* Less than 8 bytes remaining */ 707 ldmfd sp!, {r4-r9} 708 RETeq /* Return now if done */ 709 mov r3, #0x00 710 711.Lcopyout_w_less_than8: 712 subs r2, r2, #0x04 713 ldrge ip, [r0], #0x04 714 strtge ip, [r1], #0x04 715 RETeq /* Return now if done */ 716 addlt r2, r2, #0x04 717 ldrb ip, [r0], #0x01 718 cmp r2, #0x02 719 ldrbge r2, [r0], #0x01 720 strbt ip, [r1], #0x01 721 ldrbgt ip, [r0] 722 strbtge r2, [r1], #0x01 723 strbtgt ip, [r1] 724 RET 725 726/* 727 * At this point, it has not been possible to word align both buffers. 728 * The destination buffer (r1) is word aligned, but the source buffer 729 * (r0) is not. 730 */ 731.Lcopyout_bad_align: 732 stmfd sp!, {r4-r7} 733 mov r3, #0x01 734 bic r0, r0, #0x03 735 cmp ip, #2 736 ldr ip, [r0], #0x04 737 bgt .Lcopyout_bad3 738 beq .Lcopyout_bad2 739 b .Lcopyout_bad1 740 741.Lcopyout_bad1_loop16: 742#ifdef __ARMEB__ 743 mov r4, ip, lsl #8 744#else 745 mov r4, ip, lsr #8 746#endif 747 ldr r5, [r0], #0x04 748 pld [r0, #0x018] 749 ldr r6, [r0], #0x04 750 ldr r7, [r0], #0x04 751 ldr ip, [r0], #0x04 752#ifdef __ARMEB__ 753 orr r4, r4, r5, lsr #24 754 mov r5, r5, lsl #8 755 orr r5, r5, r6, lsr #24 756 mov r6, r6, lsl #8 757 orr r6, r6, r7, lsr #24 758 mov r7, r7, lsl #8 759 orr r7, r7, ip, lsr #24 760#else 761 orr r4, r4, r5, lsl #24 762 mov r5, r5, lsr #8 763 orr r5, r5, r6, lsl #24 764 mov r6, r6, lsr #8 765 orr r6, r6, r7, lsl #24 766 mov r7, r7, lsr #8 767 orr r7, r7, ip, lsl #24 768#endif 769 strt r4, [r1], #0x04 770 strt r5, [r1], #0x04 771 strt r6, [r1], #0x04 772 strt r7, [r1], #0x04 773.Lcopyout_bad1: 774 subs r2, r2, #0x10 775 bge .Lcopyout_bad1_loop16 776 777 adds r2, r2, #0x10 778 ldmfdeq sp!, {r4-r7} 779 RETeq /* Return now if done */ 780 subs r2, r2, #0x04 781 sublt r0, r0, #0x03 782 blt .Lcopyout_l4 783 784.Lcopyout_bad1_loop4: 785#ifdef __ARMEB__ 786 mov r4, ip, lsl #8 787#else 788 mov r4, ip, lsr #8 789#endif 790 ldr ip, [r0], #0x04 791 subs r2, r2, #0x04 792#ifdef __ARMEB__ 793 orr r4, r4, ip, lsr #24 794#else 795 orr r4, r4, ip, lsl #24 796#endif 797 strt r4, [r1], #0x04 798 bge .Lcopyout_bad1_loop4 799 sub r0, r0, #0x03 800 b .Lcopyout_l4 801 802.Lcopyout_bad2_loop16: 803#ifdef __ARMEB__ 804 mov r4, ip, lsl #16 805#else 806 mov r4, ip, lsr #16 807#endif 808 ldr r5, [r0], #0x04 809 pld [r0, #0x018] 810 ldr r6, [r0], #0x04 811 ldr r7, [r0], #0x04 812 ldr ip, [r0], #0x04 813#ifdef __ARMEB__ 814 orr r4, r4, r5, lsr #16 815 mov r5, r5, lsl #16 816 orr r5, r5, r6, lsr #16 817 mov r6, r6, lsl #16 818 orr r6, r6, r7, lsr #16 819 mov r7, r7, lsl #16 820 orr r7, r7, ip, lsr #16 821#else 822 orr r4, r4, r5, lsl #16 823 mov r5, r5, lsr #16 824 orr r5, r5, r6, lsl #16 825 mov r6, r6, lsr #16 826 orr r6, r6, r7, lsl #16 827 mov r7, r7, lsr #16 828 orr r7, r7, ip, lsl #16 829#endif 830 strt r4, [r1], #0x04 831 strt r5, [r1], #0x04 832 strt r6, [r1], #0x04 833 strt r7, [r1], #0x04 834.Lcopyout_bad2: 835 subs r2, r2, #0x10 836 bge .Lcopyout_bad2_loop16 837 838 adds r2, r2, #0x10 839 ldmfdeq sp!, {r4-r7} 840 RETeq /* Return now if done */ 841 subs r2, r2, #0x04 842 sublt r0, r0, #0x02 843 blt .Lcopyout_l4 844 845.Lcopyout_bad2_loop4: 846#ifdef __ARMEB__ 847 mov r4, ip, lsl #16 848#else 849 mov r4, ip, lsr #16 850#endif 851 ldr ip, [r0], #0x04 852 subs r2, r2, #0x04 853#ifdef __ARMEB__ 854 orr r4, r4, ip, lsr #16 855#else 856 orr r4, r4, ip, lsl #16 857#endif 858 strt r4, [r1], #0x04 859 bge .Lcopyout_bad2_loop4 860 sub r0, r0, #0x02 861 b .Lcopyout_l4 862 863.Lcopyout_bad3_loop16: 864#ifdef __ARMEB__ 865 mov r4, ip, lsl #24 866#else 867 mov r4, ip, lsr #24 868#endif 869 ldr r5, [r0], #0x04 870 pld [r0, #0x018] 871 ldr r6, [r0], #0x04 872 ldr r7, [r0], #0x04 873 ldr ip, [r0], #0x04 874#ifdef __ARMEB__ 875 orr r4, r4, r5, lsr #8 876 mov r5, r5, lsl #24 877 orr r5, r5, r6, lsr #8 878 mov r6, r6, lsl #24 879 orr r6, r6, r7, lsr #8 880 mov r7, r7, lsl #24 881 orr r7, r7, ip, lsr #8 882#else 883 orr r4, r4, r5, lsl #8 884 mov r5, r5, lsr #24 885 orr r5, r5, r6, lsl #8 886 mov r6, r6, lsr #24 887 orr r6, r6, r7, lsl #8 888 mov r7, r7, lsr #24 889 orr r7, r7, ip, lsl #8 890#endif 891 strt r4, [r1], #0x04 892 strt r5, [r1], #0x04 893 strt r6, [r1], #0x04 894 strt r7, [r1], #0x04 895.Lcopyout_bad3: 896 subs r2, r2, #0x10 897 bge .Lcopyout_bad3_loop16 898 899 adds r2, r2, #0x10 900 ldmfdeq sp!, {r4-r7} 901 RETeq /* Return now if done */ 902 subs r2, r2, #0x04 903 sublt r0, r0, #0x01 904 blt .Lcopyout_l4 905 906.Lcopyout_bad3_loop4: 907#ifdef __ARMEB__ 908 mov r4, ip, lsl #24 909#else 910 mov r4, ip, lsr #24 911#endif 912 ldr ip, [r0], #0x04 913 subs r2, r2, #0x04 914#ifdef __ARMEB__ 915 orr r4, r4, ip, lsr #8 916#else 917 orr r4, r4, ip, lsl #8 918#endif 919 strt r4, [r1], #0x04 920 bge .Lcopyout_bad3_loop4 921 sub r0, r0, #0x01 922 923.Lcopyout_l4: 924 ldmfd sp!, {r4-r7} 925 mov r3, #0x00 926 adds r2, r2, #0x04 927 RETeq 928.Lcopyout_l4_2: 929 rsbs r2, r2, #0x03 930 addne pc, pc, r2, lsl #3 931 nop 932 ldrb ip, [r0], #0x01 933 strbt ip, [r1], #0x01 934 ldrb ip, [r0], #0x01 935 strbt ip, [r1], #0x01 936 ldrb ip, [r0] 937 strbt ip, [r1] 938 RET 939END(copyout) 940 941