bcopyinout_xscale.S revision 284264
1/* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */ 2 3/*- 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD: head/sys/arm/arm/bcopyinout_xscale.S 284264 2015-06-11 13:54:18Z andrew $"); 40 41#include <machine/acle-compat.h> 42 43 .syntax unified 44 .text 45 .align 2 46 47#if __ARM_ARCH >= 6 48#define GET_PCB(tmp) \ 49 mrc p15, 0, tmp, c13, c0, 4; \ 50 add tmp, tmp, #(TD_PCB) 51#else 52.Lcurpcb: 53 .word _C_LABEL(__pcpu) + PC_CURPCB 54#define GET_PCB(tmp) \ 55 ldr tmp, .Lcurpcb 56#endif 57 58/* 59 * r0 = user space address 60 * r1 = kernel space address 61 * r2 = length 62 * 63 * Copies bytes from user space to kernel space 64 */ 65ENTRY(copyin) 66 cmp r2, #0x00 67 movle r0, #0x00 68 movle pc, lr /* Bail early if length is <= 0 */ 69 70 ldr r3, .L_arm_memcpy 71 ldr r3, [r3] 72 cmp r3, #0 73 beq .Lnormal 74 ldr r3, .L_min_memcpy_size 75 ldr r3, [r3] 76 cmp r2, r3 77 blt .Lnormal 78 stmfd sp!, {r0-r2, r4, lr} 79 mov r3, r0 80 mov r0, r1 81 mov r1, r3 82 mov r3, #2 /* SRC_IS_USER */ 83 ldr r4, .L_arm_memcpy 84 mov lr, pc 85 ldr pc, [r4] 86 cmp r0, #0 87 ldmfd sp!, {r0-r2, r4, lr} 88 moveq r0, #0 89 RETeq 90 91.Lnormal: 92 stmfd sp!, {r10-r11, lr} 93 94 GET_PCB(r10) 95 ldr r10, [r10] 96 97 mov r3, #0x00 98 adr ip, .Lcopyin_fault 99 ldr r11, [r10, #PCB_ONFAULT] 100 str ip, [r10, #PCB_ONFAULT] 101 bl .Lcopyin_guts 102 str r11, [r10, #PCB_ONFAULT] 103 mov r0, #0x00 104 ldmfd sp!, {r10-r11, pc} 105 106.Lcopyin_fault: 107 ldr r0, =EFAULT 108 str r11, [r10, #PCB_ONFAULT] 109 cmp r3, #0x00 110 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 111 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 112 ldmfd sp!, {r10-r11, pc} 113 114.Lcopyin_guts: 115 pld [r0] 116 /* Word-align the destination buffer */ 117 ands ip, r1, #0x03 /* Already word aligned? */ 118 beq .Lcopyin_wordaligned /* Yup */ 119 rsb ip, ip, #0x04 120 cmp r2, ip /* Enough bytes left to align it? */ 121 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */ 122 sub r2, r2, ip 123 rsbs ip, ip, #0x03 124 addne pc, pc, ip, lsl #3 125 nop 126 ldrbt ip, [r0], #0x01 127 strb ip, [r1], #0x01 128 ldrbt ip, [r0], #0x01 129 strb ip, [r1], #0x01 130 ldrbt ip, [r0], #0x01 131 strb ip, [r1], #0x01 132 cmp r2, #0x00 /* All done? */ 133 RETeq 134 135 /* Destination buffer is now word aligned */ 136.Lcopyin_wordaligned: 137 ands ip, r0, #0x03 /* Is src also word-aligned? */ 138 bne .Lcopyin_bad_align /* Nope. Things just got bad */ 139 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 140 blt .Lcopyin_w_less_than8 141 142 /* Quad-align the destination buffer */ 143 tst r1, #0x07 /* Already quad aligned? */ 144 ldrtne ip, [r0], #0x04 145 strne ip, [r1], #0x04 146 subne r2, r2, #0x04 147 stmfd sp!, {r4-r9} /* Free up some registers */ 148 mov r3, #-1 /* Signal restore r4-r9 */ 149 150 /* Destination buffer quad aligned, source is word aligned */ 151 subs r2, r2, #0x80 152 blt .Lcopyin_w_lessthan128 153 154 /* Copy 128 bytes at a time */ 155.Lcopyin_w_loop128: 156 ldrt r4, [r0], #0x04 /* LD:00-03 */ 157 ldrt r5, [r0], #0x04 /* LD:04-07 */ 158 pld [r0, #0x18] /* Prefetch 0x20 */ 159 ldrt r6, [r0], #0x04 /* LD:08-0b */ 160 ldrt r7, [r0], #0x04 /* LD:0c-0f */ 161 ldrt r8, [r0], #0x04 /* LD:10-13 */ 162 ldrt r9, [r0], #0x04 /* LD:14-17 */ 163 strd r4, [r1], #0x08 /* ST:00-07 */ 164 ldrt r4, [r0], #0x04 /* LD:18-1b */ 165 ldrt r5, [r0], #0x04 /* LD:1c-1f */ 166 strd r6, [r1], #0x08 /* ST:08-0f */ 167 ldrt r6, [r0], #0x04 /* LD:20-23 */ 168 ldrt r7, [r0], #0x04 /* LD:24-27 */ 169 pld [r0, #0x18] /* Prefetch 0x40 */ 170 strd r8, [r1], #0x08 /* ST:10-17 */ 171 ldrt r8, [r0], #0x04 /* LD:28-2b */ 172 ldrt r9, [r0], #0x04 /* LD:2c-2f */ 173 strd r4, [r1], #0x08 /* ST:18-1f */ 174 ldrt r4, [r0], #0x04 /* LD:30-33 */ 175 ldrt r5, [r0], #0x04 /* LD:34-37 */ 176 strd r6, [r1], #0x08 /* ST:20-27 */ 177 ldrt r6, [r0], #0x04 /* LD:38-3b */ 178 ldrt r7, [r0], #0x04 /* LD:3c-3f */ 179 strd r8, [r1], #0x08 /* ST:28-2f */ 180 ldrt r8, [r0], #0x04 /* LD:40-43 */ 181 ldrt r9, [r0], #0x04 /* LD:44-47 */ 182 pld [r0, #0x18] /* Prefetch 0x60 */ 183 strd r4, [r1], #0x08 /* ST:30-37 */ 184 ldrt r4, [r0], #0x04 /* LD:48-4b */ 185 ldrt r5, [r0], #0x04 /* LD:4c-4f */ 186 strd r6, [r1], #0x08 /* ST:38-3f */ 187 ldrt r6, [r0], #0x04 /* LD:50-53 */ 188 ldrt r7, [r0], #0x04 /* LD:54-57 */ 189 strd r8, [r1], #0x08 /* ST:40-47 */ 190 ldrt r8, [r0], #0x04 /* LD:58-5b */ 191 ldrt r9, [r0], #0x04 /* LD:5c-5f */ 192 strd r4, [r1], #0x08 /* ST:48-4f */ 193 ldrt r4, [r0], #0x04 /* LD:60-63 */ 194 ldrt r5, [r0], #0x04 /* LD:64-67 */ 195 pld [r0, #0x18] /* Prefetch 0x80 */ 196 strd r6, [r1], #0x08 /* ST:50-57 */ 197 ldrt r6, [r0], #0x04 /* LD:68-6b */ 198 ldrt r7, [r0], #0x04 /* LD:6c-6f */ 199 strd r8, [r1], #0x08 /* ST:58-5f */ 200 ldrt r8, [r0], #0x04 /* LD:70-73 */ 201 ldrt r9, [r0], #0x04 /* LD:74-77 */ 202 strd r4, [r1], #0x08 /* ST:60-67 */ 203 ldrt r4, [r0], #0x04 /* LD:78-7b */ 204 ldrt r5, [r0], #0x04 /* LD:7c-7f */ 205 strd r6, [r1], #0x08 /* ST:68-6f */ 206 strd r8, [r1], #0x08 /* ST:70-77 */ 207 subs r2, r2, #0x80 208 strd r4, [r1], #0x08 /* ST:78-7f */ 209 bge .Lcopyin_w_loop128 210 211.Lcopyin_w_lessthan128: 212 adds r2, r2, #0x80 /* Adjust for extra sub */ 213 ldmfdeq sp!, {r4-r9} 214 RETeq 215 subs r2, r2, #0x20 216 blt .Lcopyin_w_lessthan32 217 218 /* Copy 32 bytes at a time */ 219.Lcopyin_w_loop32: 220 ldrt r4, [r0], #0x04 221 ldrt r5, [r0], #0x04 222 pld [r0, #0x18] 223 ldrt r6, [r0], #0x04 224 ldrt r7, [r0], #0x04 225 ldrt r8, [r0], #0x04 226 ldrt r9, [r0], #0x04 227 strd r4, [r1], #0x08 228 ldrt r4, [r0], #0x04 229 ldrt r5, [r0], #0x04 230 strd r6, [r1], #0x08 231 strd r8, [r1], #0x08 232 subs r2, r2, #0x20 233 strd r4, [r1], #0x08 234 bge .Lcopyin_w_loop32 235 236.Lcopyin_w_lessthan32: 237 adds r2, r2, #0x20 /* Adjust for extra sub */ 238 ldmfdeq sp!, {r4-r9} 239 RETeq /* Return now if done */ 240 241 and r4, r2, #0x18 242 rsb r5, r4, #0x18 243 subs r2, r2, r4 244 add pc, pc, r5, lsl #1 245 nop 246 247 /* At least 24 bytes remaining */ 248 ldrt r4, [r0], #0x04 249 ldrt r5, [r0], #0x04 250 nop 251 strd r4, [r1], #0x08 252 253 /* At least 16 bytes remaining */ 254 ldrt r4, [r0], #0x04 255 ldrt r5, [r0], #0x04 256 nop 257 strd r4, [r1], #0x08 258 259 /* At least 8 bytes remaining */ 260 ldrt r4, [r0], #0x04 261 ldrt r5, [r0], #0x04 262 nop 263 strd r4, [r1], #0x08 264 265 /* Less than 8 bytes remaining */ 266 ldmfd sp!, {r4-r9} 267 RETeq /* Return now if done */ 268 mov r3, #0x00 269 270.Lcopyin_w_less_than8: 271 subs r2, r2, #0x04 272 ldrtge ip, [r0], #0x04 273 strge ip, [r1], #0x04 274 RETeq /* Return now if done */ 275 addlt r2, r2, #0x04 276 ldrbt ip, [r0], #0x01 277 cmp r2, #0x02 278 ldrbtge r2, [r0], #0x01 279 strb ip, [r1], #0x01 280 ldrbtgt ip, [r0] 281 strbge r2, [r1], #0x01 282 strbgt ip, [r1] 283 RET 284 285/* 286 * At this point, it has not been possible to word align both buffers. 287 * The destination buffer (r1) is word aligned, but the source buffer 288 * (r0) is not. 289 */ 290.Lcopyin_bad_align: 291 stmfd sp!, {r4-r7} 292 mov r3, #0x01 293 bic r0, r0, #0x03 294 cmp ip, #2 295 ldrt ip, [r0], #0x04 296 bgt .Lcopyin_bad3 297 beq .Lcopyin_bad2 298 b .Lcopyin_bad1 299 300.Lcopyin_bad1_loop16: 301#ifdef __ARMEB__ 302 mov r4, ip, lsl #8 303#else 304 mov r4, ip, lsr #8 305#endif 306 ldrt r5, [r0], #0x04 307 pld [r0, #0x018] 308 ldrt r6, [r0], #0x04 309 ldrt r7, [r0], #0x04 310 ldrt ip, [r0], #0x04 311#ifdef __ARMEB__ 312 orr r4, r4, r5, lsr #24 313 mov r5, r5, lsl #8 314 orr r5, r5, r6, lsr #24 315 mov r6, r6, lsl #8 316 orr r6, r6, r7, lsr #24 317 mov r7, r7, lsl #8 318 orr r7, r7, ip, lsr #24 319#else 320 orr r4, r4, r5, lsl #24 321 mov r5, r5, lsr #8 322 orr r5, r5, r6, lsl #24 323 mov r6, r6, lsr #8 324 orr r6, r6, r7, lsl #24 325 mov r7, r7, lsr #8 326 orr r7, r7, ip, lsl #24 327#endif 328 str r4, [r1], #0x04 329 str r5, [r1], #0x04 330 str r6, [r1], #0x04 331 str r7, [r1], #0x04 332.Lcopyin_bad1: 333 subs r2, r2, #0x10 334 bge .Lcopyin_bad1_loop16 335 336 adds r2, r2, #0x10 337 ldmfdeq sp!, {r4-r7} 338 RETeq /* Return now if done */ 339 subs r2, r2, #0x04 340 sublt r0, r0, #0x03 341 blt .Lcopyin_l4 342 343.Lcopyin_bad1_loop4: 344#ifdef __ARMEB__ 345 mov r4, ip, lsl #8 346#else 347 mov r4, ip, lsr #8 348#endif 349 ldrt ip, [r0], #0x04 350 subs r2, r2, #0x04 351#ifdef __ARMEB__ 352 orr r4, r4, ip, lsr #24 353#else 354 orr r4, r4, ip, lsl #24 355#endif 356 str r4, [r1], #0x04 357 bge .Lcopyin_bad1_loop4 358 sub r0, r0, #0x03 359 b .Lcopyin_l4 360 361.Lcopyin_bad2_loop16: 362#ifdef __ARMEB__ 363 mov r4, ip, lsl #16 364#else 365 mov r4, ip, lsr #16 366#endif 367 ldrt r5, [r0], #0x04 368 pld [r0, #0x018] 369 ldrt r6, [r0], #0x04 370 ldrt r7, [r0], #0x04 371 ldrt ip, [r0], #0x04 372#ifdef __ARMEB__ 373 orr r4, r4, r5, lsr #16 374 mov r5, r5, lsl #16 375 orr r5, r5, r6, lsr #16 376 mov r6, r6, lsl #16 377 orr r6, r6, r7, lsr #16 378 mov r7, r7, lsl #16 379 orr r7, r7, ip, lsr #16 380#else 381 orr r4, r4, r5, lsl #16 382 mov r5, r5, lsr #16 383 orr r5, r5, r6, lsl #16 384 mov r6, r6, lsr #16 385 orr r6, r6, r7, lsl #16 386 mov r7, r7, lsr #16 387 orr r7, r7, ip, lsl #16 388#endif 389 str r4, [r1], #0x04 390 str r5, [r1], #0x04 391 str r6, [r1], #0x04 392 str r7, [r1], #0x04 393.Lcopyin_bad2: 394 subs r2, r2, #0x10 395 bge .Lcopyin_bad2_loop16 396 397 adds r2, r2, #0x10 398 ldmfdeq sp!, {r4-r7} 399 RETeq /* Return now if done */ 400 subs r2, r2, #0x04 401 sublt r0, r0, #0x02 402 blt .Lcopyin_l4 403 404.Lcopyin_bad2_loop4: 405#ifdef __ARMEB__ 406 mov r4, ip, lsl #16 407#else 408 mov r4, ip, lsr #16 409#endif 410 ldrt ip, [r0], #0x04 411 subs r2, r2, #0x04 412#ifdef __ARMEB__ 413 orr r4, r4, ip, lsr #16 414#else 415 orr r4, r4, ip, lsl #16 416#endif 417 str r4, [r1], #0x04 418 bge .Lcopyin_bad2_loop4 419 sub r0, r0, #0x02 420 b .Lcopyin_l4 421 422.Lcopyin_bad3_loop16: 423#ifdef __ARMEB__ 424 mov r4, ip, lsl #24 425#else 426 mov r4, ip, lsr #24 427#endif 428 ldrt r5, [r0], #0x04 429 pld [r0, #0x018] 430 ldrt r6, [r0], #0x04 431 ldrt r7, [r0], #0x04 432 ldrt ip, [r0], #0x04 433#ifdef __ARMEB__ 434 orr r4, r4, r5, lsr #8 435 mov r5, r5, lsl #24 436 orr r5, r5, r6, lsr #8 437 mov r6, r6, lsl #24 438 orr r6, r6, r7, lsr #8 439 mov r7, r7, lsl #24 440 orr r7, r7, ip, lsr #8 441#else 442 orr r4, r4, r5, lsl #8 443 mov r5, r5, lsr #24 444 orr r5, r5, r6, lsl #8 445 mov r6, r6, lsr #24 446 orr r6, r6, r7, lsl #8 447 mov r7, r7, lsr #24 448 orr r7, r7, ip, lsl #8 449#endif 450 str r4, [r1], #0x04 451 str r5, [r1], #0x04 452 str r6, [r1], #0x04 453 str r7, [r1], #0x04 454.Lcopyin_bad3: 455 subs r2, r2, #0x10 456 bge .Lcopyin_bad3_loop16 457 458 adds r2, r2, #0x10 459 ldmfdeq sp!, {r4-r7} 460 RETeq /* Return now if done */ 461 subs r2, r2, #0x04 462 sublt r0, r0, #0x01 463 blt .Lcopyin_l4 464 465.Lcopyin_bad3_loop4: 466#ifdef __ARMEB__ 467 mov r4, ip, lsl #24 468#else 469 mov r4, ip, lsr #24 470#endif 471 ldrt ip, [r0], #0x04 472 subs r2, r2, #0x04 473#ifdef __ARMEB__ 474 orr r4, r4, ip, lsr #8 475#else 476 orr r4, r4, ip, lsl #8 477#endif 478 str r4, [r1], #0x04 479 bge .Lcopyin_bad3_loop4 480 sub r0, r0, #0x01 481 482.Lcopyin_l4: 483 ldmfd sp!, {r4-r7} 484 mov r3, #0x00 485 adds r2, r2, #0x04 486 RETeq 487.Lcopyin_l4_2: 488 rsbs r2, r2, #0x03 489 addne pc, pc, r2, lsl #3 490 nop 491 ldrbt ip, [r0], #0x01 492 strb ip, [r1], #0x01 493 ldrbt ip, [r0], #0x01 494 strb ip, [r1], #0x01 495 ldrbt ip, [r0] 496 strb ip, [r1] 497 RET 498END(copyin) 499 500/* 501 * r0 = kernel space address 502 * r1 = user space address 503 * r2 = length 504 * 505 * Copies bytes from kernel space to user space 506 */ 507ENTRY(copyout) 508 cmp r2, #0x00 509 movle r0, #0x00 510 movle pc, lr /* Bail early if length is <= 0 */ 511 512 ldr r3, .L_arm_memcpy 513 ldr r3, [r3] 514 cmp r3, #0 515 beq .Lnormale 516 ldr r3, .L_min_memcpy_size 517 ldr r3, [r3] 518 cmp r2, r3 519 blt .Lnormale 520 stmfd sp!, {r0-r2, r4, lr} 521 mov r3, r0 522 mov r0, r1 523 mov r1, r3 524 mov r3, #1 /* DST_IS_USER */ 525 ldr r4, .L_arm_memcpy 526 mov lr, pc 527 ldr pc, [r4] 528 cmp r0, #0 529 ldmfd sp!, {r0-r2, r4, lr} 530 moveq r0, #0 531 RETeq 532 533.Lnormale: 534 stmfd sp!, {r10-r11, lr} 535 536 GET_PCB(r10) 537 ldr r10, [r10] 538 539 mov r3, #0x00 540 adr ip, .Lcopyout_fault 541 ldr r11, [r10, #PCB_ONFAULT] 542 str ip, [r10, #PCB_ONFAULT] 543 bl .Lcopyout_guts 544 str r11, [r10, #PCB_ONFAULT] 545 mov r0, #0x00 546 ldmfd sp!, {r10-r11, pc} 547 548.Lcopyout_fault: 549 ldr r0, =EFAULT 550 str r11, [r10, #PCB_ONFAULT] 551 cmp r3, #0x00 552 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 553 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 554 ldmfd sp!, {r10-r11, pc} 555 556.Lcopyout_guts: 557 pld [r0] 558 /* Word-align the destination buffer */ 559 ands ip, r1, #0x03 /* Already word aligned? */ 560 beq .Lcopyout_wordaligned /* Yup */ 561 rsb ip, ip, #0x04 562 cmp r2, ip /* Enough bytes left to align it? */ 563 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */ 564 sub r2, r2, ip 565 rsbs ip, ip, #0x03 566 addne pc, pc, ip, lsl #3 567 nop 568 ldrb ip, [r0], #0x01 569 strbt ip, [r1], #0x01 570 ldrb ip, [r0], #0x01 571 strbt ip, [r1], #0x01 572 ldrb ip, [r0], #0x01 573 strbt ip, [r1], #0x01 574 cmp r2, #0x00 /* All done? */ 575 RETeq 576 577 /* Destination buffer is now word aligned */ 578.Lcopyout_wordaligned: 579 ands ip, r0, #0x03 /* Is src also word-aligned? */ 580 bne .Lcopyout_bad_align /* Nope. Things just got bad */ 581 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 582 blt .Lcopyout_w_less_than8 583 584 /* Quad-align the destination buffer */ 585 tst r0, #0x07 /* Already quad aligned? */ 586 ldrne ip, [r0], #0x04 587 subne r2, r2, #0x04 588 strtne ip, [r1], #0x04 589 590 stmfd sp!, {r4-r9} /* Free up some registers */ 591 mov r3, #-1 /* Signal restore r4-r9 */ 592 593 /* Destination buffer word aligned, source is quad aligned */ 594 subs r2, r2, #0x80 595 blt .Lcopyout_w_lessthan128 596 597 /* Copy 128 bytes at a time */ 598.Lcopyout_w_loop128: 599 ldrd r4, [r0], #0x08 /* LD:00-07 */ 600 pld [r0, #0x18] /* Prefetch 0x20 */ 601 ldrd r6, [r0], #0x08 /* LD:08-0f */ 602 ldrd r8, [r0], #0x08 /* LD:10-17 */ 603 strt r4, [r1], #0x04 /* ST:00-03 */ 604 strt r5, [r1], #0x04 /* ST:04-07 */ 605 ldrd r4, [r0], #0x08 /* LD:18-1f */ 606 strt r6, [r1], #0x04 /* ST:08-0b */ 607 strt r7, [r1], #0x04 /* ST:0c-0f */ 608 ldrd r6, [r0], #0x08 /* LD:20-27 */ 609 pld [r0, #0x18] /* Prefetch 0x40 */ 610 strt r8, [r1], #0x04 /* ST:10-13 */ 611 strt r9, [r1], #0x04 /* ST:14-17 */ 612 ldrd r8, [r0], #0x08 /* LD:28-2f */ 613 strt r4, [r1], #0x04 /* ST:18-1b */ 614 strt r5, [r1], #0x04 /* ST:1c-1f */ 615 ldrd r4, [r0], #0x08 /* LD:30-37 */ 616 strt r6, [r1], #0x04 /* ST:20-23 */ 617 strt r7, [r1], #0x04 /* ST:24-27 */ 618 ldrd r6, [r0], #0x08 /* LD:38-3f */ 619 strt r8, [r1], #0x04 /* ST:28-2b */ 620 strt r9, [r1], #0x04 /* ST:2c-2f */ 621 ldrd r8, [r0], #0x08 /* LD:40-47 */ 622 pld [r0, #0x18] /* Prefetch 0x60 */ 623 strt r4, [r1], #0x04 /* ST:30-33 */ 624 strt r5, [r1], #0x04 /* ST:34-37 */ 625 ldrd r4, [r0], #0x08 /* LD:48-4f */ 626 strt r6, [r1], #0x04 /* ST:38-3b */ 627 strt r7, [r1], #0x04 /* ST:3c-3f */ 628 ldrd r6, [r0], #0x08 /* LD:50-57 */ 629 strt r8, [r1], #0x04 /* ST:40-43 */ 630 strt r9, [r1], #0x04 /* ST:44-47 */ 631 ldrd r8, [r0], #0x08 /* LD:58-4f */ 632 strt r4, [r1], #0x04 /* ST:48-4b */ 633 strt r5, [r1], #0x04 /* ST:4c-4f */ 634 ldrd r4, [r0], #0x08 /* LD:60-67 */ 635 pld [r0, #0x18] /* Prefetch 0x80 */ 636 strt r6, [r1], #0x04 /* ST:50-53 */ 637 strt r7, [r1], #0x04 /* ST:54-57 */ 638 ldrd r6, [r0], #0x08 /* LD:68-6f */ 639 strt r8, [r1], #0x04 /* ST:58-5b */ 640 strt r9, [r1], #0x04 /* ST:5c-5f */ 641 ldrd r8, [r0], #0x08 /* LD:70-77 */ 642 strt r4, [r1], #0x04 /* ST:60-63 */ 643 strt r5, [r1], #0x04 /* ST:64-67 */ 644 ldrd r4, [r0], #0x08 /* LD:78-7f */ 645 strt r6, [r1], #0x04 /* ST:68-6b */ 646 strt r7, [r1], #0x04 /* ST:6c-6f */ 647 strt r8, [r1], #0x04 /* ST:70-73 */ 648 strt r9, [r1], #0x04 /* ST:74-77 */ 649 subs r2, r2, #0x80 650 strt r4, [r1], #0x04 /* ST:78-7b */ 651 strt r5, [r1], #0x04 /* ST:7c-7f */ 652 bge .Lcopyout_w_loop128 653 654.Lcopyout_w_lessthan128: 655 adds r2, r2, #0x80 /* Adjust for extra sub */ 656 ldmfdeq sp!, {r4-r9} 657 RETeq /* Return now if done */ 658 subs r2, r2, #0x20 659 blt .Lcopyout_w_lessthan32 660 661 /* Copy 32 bytes at a time */ 662.Lcopyout_w_loop32: 663 ldrd r4, [r0], #0x08 664 pld [r0, #0x18] 665 ldrd r6, [r0], #0x08 666 ldrd r8, [r0], #0x08 667 strt r4, [r1], #0x04 668 strt r5, [r1], #0x04 669 ldrd r4, [r0], #0x08 670 strt r6, [r1], #0x04 671 strt r7, [r1], #0x04 672 strt r8, [r1], #0x04 673 strt r9, [r1], #0x04 674 subs r2, r2, #0x20 675 strt r4, [r1], #0x04 676 strt r5, [r1], #0x04 677 bge .Lcopyout_w_loop32 678 679.Lcopyout_w_lessthan32: 680 adds r2, r2, #0x20 /* Adjust for extra sub */ 681 ldmfdeq sp!, {r4-r9} 682 RETeq /* Return now if done */ 683 684 and r4, r2, #0x18 685 rsb r5, r4, #0x18 686 subs r2, r2, r4 687 add pc, pc, r5, lsl #1 688 nop 689 690 /* At least 24 bytes remaining */ 691 ldrd r4, [r0], #0x08 692 strt r4, [r1], #0x04 693 strt r5, [r1], #0x04 694 nop 695 696 /* At least 16 bytes remaining */ 697 ldrd r4, [r0], #0x08 698 strt r4, [r1], #0x04 699 strt r5, [r1], #0x04 700 nop 701 702 /* At least 8 bytes remaining */ 703 ldrd r4, [r0], #0x08 704 strt r4, [r1], #0x04 705 strt r5, [r1], #0x04 706 nop 707 708 /* Less than 8 bytes remaining */ 709 ldmfd sp!, {r4-r9} 710 RETeq /* Return now if done */ 711 mov r3, #0x00 712 713.Lcopyout_w_less_than8: 714 subs r2, r2, #0x04 715 ldrge ip, [r0], #0x04 716 strtge ip, [r1], #0x04 717 RETeq /* Return now if done */ 718 addlt r2, r2, #0x04 719 ldrb ip, [r0], #0x01 720 cmp r2, #0x02 721 ldrbge r2, [r0], #0x01 722 strbt ip, [r1], #0x01 723 ldrbgt ip, [r0] 724 strbtge r2, [r1], #0x01 725 strbtgt ip, [r1] 726 RET 727 728/* 729 * At this point, it has not been possible to word align both buffers. 730 * The destination buffer (r1) is word aligned, but the source buffer 731 * (r0) is not. 732 */ 733.Lcopyout_bad_align: 734 stmfd sp!, {r4-r7} 735 mov r3, #0x01 736 bic r0, r0, #0x03 737 cmp ip, #2 738 ldr ip, [r0], #0x04 739 bgt .Lcopyout_bad3 740 beq .Lcopyout_bad2 741 b .Lcopyout_bad1 742 743.Lcopyout_bad1_loop16: 744#ifdef __ARMEB__ 745 mov r4, ip, lsl #8 746#else 747 mov r4, ip, lsr #8 748#endif 749 ldr r5, [r0], #0x04 750 pld [r0, #0x018] 751 ldr r6, [r0], #0x04 752 ldr r7, [r0], #0x04 753 ldr ip, [r0], #0x04 754#ifdef __ARMEB__ 755 orr r4, r4, r5, lsr #24 756 mov r5, r5, lsl #8 757 orr r5, r5, r6, lsr #24 758 mov r6, r6, lsl #8 759 orr r6, r6, r7, lsr #24 760 mov r7, r7, lsl #8 761 orr r7, r7, ip, lsr #24 762#else 763 orr r4, r4, r5, lsl #24 764 mov r5, r5, lsr #8 765 orr r5, r5, r6, lsl #24 766 mov r6, r6, lsr #8 767 orr r6, r6, r7, lsl #24 768 mov r7, r7, lsr #8 769 orr r7, r7, ip, lsl #24 770#endif 771 strt r4, [r1], #0x04 772 strt r5, [r1], #0x04 773 strt r6, [r1], #0x04 774 strt r7, [r1], #0x04 775.Lcopyout_bad1: 776 subs r2, r2, #0x10 777 bge .Lcopyout_bad1_loop16 778 779 adds r2, r2, #0x10 780 ldmfdeq sp!, {r4-r7} 781 RETeq /* Return now if done */ 782 subs r2, r2, #0x04 783 sublt r0, r0, #0x03 784 blt .Lcopyout_l4 785 786.Lcopyout_bad1_loop4: 787#ifdef __ARMEB__ 788 mov r4, ip, lsl #8 789#else 790 mov r4, ip, lsr #8 791#endif 792 ldr ip, [r0], #0x04 793 subs r2, r2, #0x04 794#ifdef __ARMEB__ 795 orr r4, r4, ip, lsr #24 796#else 797 orr r4, r4, ip, lsl #24 798#endif 799 strt r4, [r1], #0x04 800 bge .Lcopyout_bad1_loop4 801 sub r0, r0, #0x03 802 b .Lcopyout_l4 803 804.Lcopyout_bad2_loop16: 805#ifdef __ARMEB__ 806 mov r4, ip, lsl #16 807#else 808 mov r4, ip, lsr #16 809#endif 810 ldr r5, [r0], #0x04 811 pld [r0, #0x018] 812 ldr r6, [r0], #0x04 813 ldr r7, [r0], #0x04 814 ldr ip, [r0], #0x04 815#ifdef __ARMEB__ 816 orr r4, r4, r5, lsr #16 817 mov r5, r5, lsl #16 818 orr r5, r5, r6, lsr #16 819 mov r6, r6, lsl #16 820 orr r6, r6, r7, lsr #16 821 mov r7, r7, lsl #16 822 orr r7, r7, ip, lsr #16 823#else 824 orr r4, r4, r5, lsl #16 825 mov r5, r5, lsr #16 826 orr r5, r5, r6, lsl #16 827 mov r6, r6, lsr #16 828 orr r6, r6, r7, lsl #16 829 mov r7, r7, lsr #16 830 orr r7, r7, ip, lsl #16 831#endif 832 strt r4, [r1], #0x04 833 strt r5, [r1], #0x04 834 strt r6, [r1], #0x04 835 strt r7, [r1], #0x04 836.Lcopyout_bad2: 837 subs r2, r2, #0x10 838 bge .Lcopyout_bad2_loop16 839 840 adds r2, r2, #0x10 841 ldmfdeq sp!, {r4-r7} 842 RETeq /* Return now if done */ 843 subs r2, r2, #0x04 844 sublt r0, r0, #0x02 845 blt .Lcopyout_l4 846 847.Lcopyout_bad2_loop4: 848#ifdef __ARMEB__ 849 mov r4, ip, lsl #16 850#else 851 mov r4, ip, lsr #16 852#endif 853 ldr ip, [r0], #0x04 854 subs r2, r2, #0x04 855#ifdef __ARMEB__ 856 orr r4, r4, ip, lsr #16 857#else 858 orr r4, r4, ip, lsl #16 859#endif 860 strt r4, [r1], #0x04 861 bge .Lcopyout_bad2_loop4 862 sub r0, r0, #0x02 863 b .Lcopyout_l4 864 865.Lcopyout_bad3_loop16: 866#ifdef __ARMEB__ 867 mov r4, ip, lsl #24 868#else 869 mov r4, ip, lsr #24 870#endif 871 ldr r5, [r0], #0x04 872 pld [r0, #0x018] 873 ldr r6, [r0], #0x04 874 ldr r7, [r0], #0x04 875 ldr ip, [r0], #0x04 876#ifdef __ARMEB__ 877 orr r4, r4, r5, lsr #8 878 mov r5, r5, lsl #24 879 orr r5, r5, r6, lsr #8 880 mov r6, r6, lsl #24 881 orr r6, r6, r7, lsr #8 882 mov r7, r7, lsl #24 883 orr r7, r7, ip, lsr #8 884#else 885 orr r4, r4, r5, lsl #8 886 mov r5, r5, lsr #24 887 orr r5, r5, r6, lsl #8 888 mov r6, r6, lsr #24 889 orr r6, r6, r7, lsl #8 890 mov r7, r7, lsr #24 891 orr r7, r7, ip, lsl #8 892#endif 893 strt r4, [r1], #0x04 894 strt r5, [r1], #0x04 895 strt r6, [r1], #0x04 896 strt r7, [r1], #0x04 897.Lcopyout_bad3: 898 subs r2, r2, #0x10 899 bge .Lcopyout_bad3_loop16 900 901 adds r2, r2, #0x10 902 ldmfdeq sp!, {r4-r7} 903 RETeq /* Return now if done */ 904 subs r2, r2, #0x04 905 sublt r0, r0, #0x01 906 blt .Lcopyout_l4 907 908.Lcopyout_bad3_loop4: 909#ifdef __ARMEB__ 910 mov r4, ip, lsl #24 911#else 912 mov r4, ip, lsr #24 913#endif 914 ldr ip, [r0], #0x04 915 subs r2, r2, #0x04 916#ifdef __ARMEB__ 917 orr r4, r4, ip, lsr #8 918#else 919 orr r4, r4, ip, lsl #8 920#endif 921 strt r4, [r1], #0x04 922 bge .Lcopyout_bad3_loop4 923 sub r0, r0, #0x01 924 925.Lcopyout_l4: 926 ldmfd sp!, {r4-r7} 927 mov r3, #0x00 928 adds r2, r2, #0x04 929 RETeq 930.Lcopyout_l4_2: 931 rsbs r2, r2, #0x03 932 addne pc, pc, r2, lsl #3 933 nop 934 ldrb ip, [r0], #0x01 935 strbt ip, [r1], #0x01 936 ldrb ip, [r0], #0x01 937 strbt ip, [r1], #0x01 938 ldrb ip, [r0] 939 strbt ip, [r1] 940 RET 941END(copyout) 942 943