1/* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */ 2 3/*- 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD$"); 40 41 .syntax unified 42 .text 43 .align 2 44 45#if __ARM_ARCH >= 6 46#define GET_PCB(tmp) \ 47 mrc p15, 0, tmp, c13, c0, 4; \ 48 add tmp, tmp, #(TD_PCB) 49#else 50.Lcurpcb: 51 .word _C_LABEL(__pcpu) + PC_CURPCB 52#define GET_PCB(tmp) \ 53 ldr tmp, .Lcurpcb 54#endif 55 56/* 57 * r0 = user space address 58 * r1 = kernel space address 59 * r2 = length 60 * 61 * Copies bytes from user space to kernel space 62 */ 63ENTRY(copyin) 64 cmp r2, #0x00 65 movle r0, #0x00 66 movle pc, lr /* Bail early if length is <= 0 */ 67 68 adds r3, r0, r2 69 movcs r0, #EFAULT 70 RETc(cs) 71 72 ldr r12, =(VM_MAXUSER_ADDRESS + 1) 73 cmp r3, r12 74 movcs r0, #EFAULT 75 RETc(cs) 76 77 ldr r3, .L_arm_memcpy 78 ldr r3, [r3] 79 cmp r3, #0 80 beq .Lnormal 81 ldr r3, .L_min_memcpy_size 82 ldr r3, [r3] 83 cmp r2, r3 84 blt .Lnormal 85 stmfd sp!, {r0-r2, r4, lr} 86 mov r3, r0 87 mov r0, r1 88 mov r1, r3 89 mov r3, #2 /* SRC_IS_USER */ 90 ldr r4, .L_arm_memcpy 91 mov lr, pc 92 ldr pc, [r4] 93 cmp r0, #0 94 ldmfd sp!, {r0-r2, r4, lr} 95 moveq r0, #0 96 RETeq 97 98.Lnormal: 99 stmfd sp!, {r10-r11, lr} 100 101 GET_PCB(r10) 102 ldr r10, [r10] 103 104 mov r3, #0x00 105 adr ip, .Lcopyin_fault 106 ldr r11, [r10, #PCB_ONFAULT] 107 str ip, [r10, #PCB_ONFAULT] 108 bl .Lcopyin_guts 109 str r11, [r10, #PCB_ONFAULT] 110 mov r0, #0x00 111 ldmfd sp!, {r10-r11, pc} 112 113.Lcopyin_fault: 114 ldr r0, =EFAULT 115 str r11, [r10, #PCB_ONFAULT] 116 cmp r3, #0x00 117 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 118 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 119 ldmfd sp!, {r10-r11, pc} 120 121.Lcopyin_guts: 122 pld [r0] 123 /* Word-align the destination buffer */ 124 ands ip, r1, #0x03 /* Already word aligned? */ 125 beq .Lcopyin_wordaligned /* Yup */ 126 rsb ip, ip, #0x04 127 cmp r2, ip /* Enough bytes left to align it? */ 128 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */ 129 sub r2, r2, ip 130 rsbs ip, ip, #0x03 131 addne pc, pc, ip, lsl #3 132 nop 133 ldrbt ip, [r0], #0x01 134 strb ip, [r1], #0x01 135 ldrbt ip, [r0], #0x01 136 strb ip, [r1], #0x01 137 ldrbt ip, [r0], #0x01 138 strb ip, [r1], #0x01 139 cmp r2, #0x00 /* All done? */ 140 RETeq 141 142 /* Destination buffer is now word aligned */ 143.Lcopyin_wordaligned: 144 ands ip, r0, #0x03 /* Is src also word-aligned? */ 145 bne .Lcopyin_bad_align /* Nope. Things just got bad */ 146 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 147 blt .Lcopyin_w_less_than8 148 149 /* Quad-align the destination buffer */ 150 tst r1, #0x07 /* Already quad aligned? */ 151 ldrtne ip, [r0], #0x04 152 strne ip, [r1], #0x04 153 subne r2, r2, #0x04 154 stmfd sp!, {r4-r9} /* Free up some registers */ 155 mov r3, #-1 /* Signal restore r4-r9 */ 156 157 /* Destination buffer quad aligned, source is word aligned */ 158 subs r2, r2, #0x80 159 blt .Lcopyin_w_lessthan128 160 161 /* Copy 128 bytes at a time */ 162.Lcopyin_w_loop128: 163 ldrt r4, [r0], #0x04 /* LD:00-03 */ 164 ldrt r5, [r0], #0x04 /* LD:04-07 */ 165 pld [r0, #0x18] /* Prefetch 0x20 */ 166 ldrt r6, [r0], #0x04 /* LD:08-0b */ 167 ldrt r7, [r0], #0x04 /* LD:0c-0f */ 168 ldrt r8, [r0], #0x04 /* LD:10-13 */ 169 ldrt r9, [r0], #0x04 /* LD:14-17 */ 170 strd r4, [r1], #0x08 /* ST:00-07 */ 171 ldrt r4, [r0], #0x04 /* LD:18-1b */ 172 ldrt r5, [r0], #0x04 /* LD:1c-1f */ 173 strd r6, [r1], #0x08 /* ST:08-0f */ 174 ldrt r6, [r0], #0x04 /* LD:20-23 */ 175 ldrt r7, [r0], #0x04 /* LD:24-27 */ 176 pld [r0, #0x18] /* Prefetch 0x40 */ 177 strd r8, [r1], #0x08 /* ST:10-17 */ 178 ldrt r8, [r0], #0x04 /* LD:28-2b */ 179 ldrt r9, [r0], #0x04 /* LD:2c-2f */ 180 strd r4, [r1], #0x08 /* ST:18-1f */ 181 ldrt r4, [r0], #0x04 /* LD:30-33 */ 182 ldrt r5, [r0], #0x04 /* LD:34-37 */ 183 strd r6, [r1], #0x08 /* ST:20-27 */ 184 ldrt r6, [r0], #0x04 /* LD:38-3b */ 185 ldrt r7, [r0], #0x04 /* LD:3c-3f */ 186 strd r8, [r1], #0x08 /* ST:28-2f */ 187 ldrt r8, [r0], #0x04 /* LD:40-43 */ 188 ldrt r9, [r0], #0x04 /* LD:44-47 */ 189 pld [r0, #0x18] /* Prefetch 0x60 */ 190 strd r4, [r1], #0x08 /* ST:30-37 */ 191 ldrt r4, [r0], #0x04 /* LD:48-4b */ 192 ldrt r5, [r0], #0x04 /* LD:4c-4f */ 193 strd r6, [r1], #0x08 /* ST:38-3f */ 194 ldrt r6, [r0], #0x04 /* LD:50-53 */ 195 ldrt r7, [r0], #0x04 /* LD:54-57 */ 196 strd r8, [r1], #0x08 /* ST:40-47 */ 197 ldrt r8, [r0], #0x04 /* LD:58-5b */ 198 ldrt r9, [r0], #0x04 /* LD:5c-5f */ 199 strd r4, [r1], #0x08 /* ST:48-4f */ 200 ldrt r4, [r0], #0x04 /* LD:60-63 */ 201 ldrt r5, [r0], #0x04 /* LD:64-67 */ 202 pld [r0, #0x18] /* Prefetch 0x80 */ 203 strd r6, [r1], #0x08 /* ST:50-57 */ 204 ldrt r6, [r0], #0x04 /* LD:68-6b */ 205 ldrt r7, [r0], #0x04 /* LD:6c-6f */ 206 strd r8, [r1], #0x08 /* ST:58-5f */ 207 ldrt r8, [r0], #0x04 /* LD:70-73 */ 208 ldrt r9, [r0], #0x04 /* LD:74-77 */ 209 strd r4, [r1], #0x08 /* ST:60-67 */ 210 ldrt r4, [r0], #0x04 /* LD:78-7b */ 211 ldrt r5, [r0], #0x04 /* LD:7c-7f */ 212 strd r6, [r1], #0x08 /* ST:68-6f */ 213 strd r8, [r1], #0x08 /* ST:70-77 */ 214 subs r2, r2, #0x80 215 strd r4, [r1], #0x08 /* ST:78-7f */ 216 bge .Lcopyin_w_loop128 217 218.Lcopyin_w_lessthan128: 219 adds r2, r2, #0x80 /* Adjust for extra sub */ 220 ldmfdeq sp!, {r4-r9} 221 RETeq 222 subs r2, r2, #0x20 223 blt .Lcopyin_w_lessthan32 224 225 /* Copy 32 bytes at a time */ 226.Lcopyin_w_loop32: 227 ldrt r4, [r0], #0x04 228 ldrt r5, [r0], #0x04 229 pld [r0, #0x18] 230 ldrt r6, [r0], #0x04 231 ldrt r7, [r0], #0x04 232 ldrt r8, [r0], #0x04 233 ldrt r9, [r0], #0x04 234 strd r4, [r1], #0x08 235 ldrt r4, [r0], #0x04 236 ldrt r5, [r0], #0x04 237 strd r6, [r1], #0x08 238 strd r8, [r1], #0x08 239 subs r2, r2, #0x20 240 strd r4, [r1], #0x08 241 bge .Lcopyin_w_loop32 242 243.Lcopyin_w_lessthan32: 244 adds r2, r2, #0x20 /* Adjust for extra sub */ 245 ldmfdeq sp!, {r4-r9} 246 RETeq /* Return now if done */ 247 248 and r4, r2, #0x18 249 rsb r5, r4, #0x18 250 subs r2, r2, r4 251 add pc, pc, r5, lsl #1 252 nop 253 254 /* At least 24 bytes remaining */ 255 ldrt r4, [r0], #0x04 256 ldrt r5, [r0], #0x04 257 nop 258 strd r4, [r1], #0x08 259 260 /* At least 16 bytes remaining */ 261 ldrt r4, [r0], #0x04 262 ldrt r5, [r0], #0x04 263 nop 264 strd r4, [r1], #0x08 265 266 /* At least 8 bytes remaining */ 267 ldrt r4, [r0], #0x04 268 ldrt r5, [r0], #0x04 269 nop 270 strd r4, [r1], #0x08 271 272 /* Less than 8 bytes remaining */ 273 ldmfd sp!, {r4-r9} 274 RETeq /* Return now if done */ 275 mov r3, #0x00 276 277.Lcopyin_w_less_than8: 278 subs r2, r2, #0x04 279 ldrtge ip, [r0], #0x04 280 strge ip, [r1], #0x04 281 RETeq /* Return now if done */ 282 addlt r2, r2, #0x04 283 ldrbt ip, [r0], #0x01 284 cmp r2, #0x02 285 ldrbtge r2, [r0], #0x01 286 strb ip, [r1], #0x01 287 ldrbtgt ip, [r0] 288 strbge r2, [r1], #0x01 289 strbgt ip, [r1] 290 RET 291 292/* 293 * At this point, it has not been possible to word align both buffers. 294 * The destination buffer (r1) is word aligned, but the source buffer 295 * (r0) is not. 296 */ 297.Lcopyin_bad_align: 298 stmfd sp!, {r4-r7} 299 mov r3, #0x01 300 bic r0, r0, #0x03 301 cmp ip, #2 302 ldrt ip, [r0], #0x04 303 bgt .Lcopyin_bad3 304 beq .Lcopyin_bad2 305 b .Lcopyin_bad1 306 307.Lcopyin_bad1_loop16: 308#ifdef __ARMEB__ 309 mov r4, ip, lsl #8 310#else 311 mov r4, ip, lsr #8 312#endif 313 ldrt r5, [r0], #0x04 314 pld [r0, #0x018] 315 ldrt r6, [r0], #0x04 316 ldrt r7, [r0], #0x04 317 ldrt ip, [r0], #0x04 318#ifdef __ARMEB__ 319 orr r4, r4, r5, lsr #24 320 mov r5, r5, lsl #8 321 orr r5, r5, r6, lsr #24 322 mov r6, r6, lsl #8 323 orr r6, r6, r7, lsr #24 324 mov r7, r7, lsl #8 325 orr r7, r7, ip, lsr #24 326#else 327 orr r4, r4, r5, lsl #24 328 mov r5, r5, lsr #8 329 orr r5, r5, r6, lsl #24 330 mov r6, r6, lsr #8 331 orr r6, r6, r7, lsl #24 332 mov r7, r7, lsr #8 333 orr r7, r7, ip, lsl #24 334#endif 335 str r4, [r1], #0x04 336 str r5, [r1], #0x04 337 str r6, [r1], #0x04 338 str r7, [r1], #0x04 339.Lcopyin_bad1: 340 subs r2, r2, #0x10 341 bge .Lcopyin_bad1_loop16 342 343 adds r2, r2, #0x10 344 ldmfdeq sp!, {r4-r7} 345 RETeq /* Return now if done */ 346 subs r2, r2, #0x04 347 sublt r0, r0, #0x03 348 blt .Lcopyin_l4 349 350.Lcopyin_bad1_loop4: 351#ifdef __ARMEB__ 352 mov r4, ip, lsl #8 353#else 354 mov r4, ip, lsr #8 355#endif 356 ldrt ip, [r0], #0x04 357 subs r2, r2, #0x04 358#ifdef __ARMEB__ 359 orr r4, r4, ip, lsr #24 360#else 361 orr r4, r4, ip, lsl #24 362#endif 363 str r4, [r1], #0x04 364 bge .Lcopyin_bad1_loop4 365 sub r0, r0, #0x03 366 b .Lcopyin_l4 367 368.Lcopyin_bad2_loop16: 369#ifdef __ARMEB__ 370 mov r4, ip, lsl #16 371#else 372 mov r4, ip, lsr #16 373#endif 374 ldrt r5, [r0], #0x04 375 pld [r0, #0x018] 376 ldrt r6, [r0], #0x04 377 ldrt r7, [r0], #0x04 378 ldrt ip, [r0], #0x04 379#ifdef __ARMEB__ 380 orr r4, r4, r5, lsr #16 381 mov r5, r5, lsl #16 382 orr r5, r5, r6, lsr #16 383 mov r6, r6, lsl #16 384 orr r6, r6, r7, lsr #16 385 mov r7, r7, lsl #16 386 orr r7, r7, ip, lsr #16 387#else 388 orr r4, r4, r5, lsl #16 389 mov r5, r5, lsr #16 390 orr r5, r5, r6, lsl #16 391 mov r6, r6, lsr #16 392 orr r6, r6, r7, lsl #16 393 mov r7, r7, lsr #16 394 orr r7, r7, ip, lsl #16 395#endif 396 str r4, [r1], #0x04 397 str r5, [r1], #0x04 398 str r6, [r1], #0x04 399 str r7, [r1], #0x04 400.Lcopyin_bad2: 401 subs r2, r2, #0x10 402 bge .Lcopyin_bad2_loop16 403 404 adds r2, r2, #0x10 405 ldmfdeq sp!, {r4-r7} 406 RETeq /* Return now if done */ 407 subs r2, r2, #0x04 408 sublt r0, r0, #0x02 409 blt .Lcopyin_l4 410 411.Lcopyin_bad2_loop4: 412#ifdef __ARMEB__ 413 mov r4, ip, lsl #16 414#else 415 mov r4, ip, lsr #16 416#endif 417 ldrt ip, [r0], #0x04 418 subs r2, r2, #0x04 419#ifdef __ARMEB__ 420 orr r4, r4, ip, lsr #16 421#else 422 orr r4, r4, ip, lsl #16 423#endif 424 str r4, [r1], #0x04 425 bge .Lcopyin_bad2_loop4 426 sub r0, r0, #0x02 427 b .Lcopyin_l4 428 429.Lcopyin_bad3_loop16: 430#ifdef __ARMEB__ 431 mov r4, ip, lsl #24 432#else 433 mov r4, ip, lsr #24 434#endif 435 ldrt r5, [r0], #0x04 436 pld [r0, #0x018] 437 ldrt r6, [r0], #0x04 438 ldrt r7, [r0], #0x04 439 ldrt ip, [r0], #0x04 440#ifdef __ARMEB__ 441 orr r4, r4, r5, lsr #8 442 mov r5, r5, lsl #24 443 orr r5, r5, r6, lsr #8 444 mov r6, r6, lsl #24 445 orr r6, r6, r7, lsr #8 446 mov r7, r7, lsl #24 447 orr r7, r7, ip, lsr #8 448#else 449 orr r4, r4, r5, lsl #8 450 mov r5, r5, lsr #24 451 orr r5, r5, r6, lsl #8 452 mov r6, r6, lsr #24 453 orr r6, r6, r7, lsl #8 454 mov r7, r7, lsr #24 455 orr r7, r7, ip, lsl #8 456#endif 457 str r4, [r1], #0x04 458 str r5, [r1], #0x04 459 str r6, [r1], #0x04 460 str r7, [r1], #0x04 461.Lcopyin_bad3: 462 subs r2, r2, #0x10 463 bge .Lcopyin_bad3_loop16 464 465 adds r2, r2, #0x10 466 ldmfdeq sp!, {r4-r7} 467 RETeq /* Return now if done */ 468 subs r2, r2, #0x04 469 sublt r0, r0, #0x01 470 blt .Lcopyin_l4 471 472.Lcopyin_bad3_loop4: 473#ifdef __ARMEB__ 474 mov r4, ip, lsl #24 475#else 476 mov r4, ip, lsr #24 477#endif 478 ldrt ip, [r0], #0x04 479 subs r2, r2, #0x04 480#ifdef __ARMEB__ 481 orr r4, r4, ip, lsr #8 482#else 483 orr r4, r4, ip, lsl #8 484#endif 485 str r4, [r1], #0x04 486 bge .Lcopyin_bad3_loop4 487 sub r0, r0, #0x01 488 489.Lcopyin_l4: 490 ldmfd sp!, {r4-r7} 491 mov r3, #0x00 492 adds r2, r2, #0x04 493 RETeq 494.Lcopyin_l4_2: 495 rsbs r2, r2, #0x03 496 addne pc, pc, r2, lsl #3 497 nop 498 ldrbt ip, [r0], #0x01 499 strb ip, [r1], #0x01 500 ldrbt ip, [r0], #0x01 501 strb ip, [r1], #0x01 502 ldrbt ip, [r0] 503 strb ip, [r1] 504 RET 505END(copyin) 506 507/* 508 * r0 = kernel space address 509 * r1 = user space address 510 * r2 = length 511 * 512 * Copies bytes from kernel space to user space 513 */ 514ENTRY(copyout) 515 cmp r2, #0x00 516 movle r0, #0x00 517 movle pc, lr /* Bail early if length is <= 0 */ 518 519 adds r3, r1, r2 520 movcs r0, #EFAULT 521 RETc(cs) 522 523 ldr r12, =(VM_MAXUSER_ADDRESS + 1) 524 cmp r3, r12 525 movcs r0, #EFAULT 526 RETc(cs) 527 528 ldr r3, .L_arm_memcpy 529 ldr r3, [r3] 530 cmp r3, #0 531 beq .Lnormale 532 ldr r3, .L_min_memcpy_size 533 ldr r3, [r3] 534 cmp r2, r3 535 blt .Lnormale 536 stmfd sp!, {r0-r2, r4, lr} 537 mov r3, r0 538 mov r0, r1 539 mov r1, r3 540 mov r3, #1 /* DST_IS_USER */ 541 ldr r4, .L_arm_memcpy 542 mov lr, pc 543 ldr pc, [r4] 544 cmp r0, #0 545 ldmfd sp!, {r0-r2, r4, lr} 546 moveq r0, #0 547 RETeq 548 549.Lnormale: 550 stmfd sp!, {r10-r11, lr} 551 552 GET_PCB(r10) 553 ldr r10, [r10] 554 555 mov r3, #0x00 556 adr ip, .Lcopyout_fault 557 ldr r11, [r10, #PCB_ONFAULT] 558 str ip, [r10, #PCB_ONFAULT] 559 bl .Lcopyout_guts 560 str r11, [r10, #PCB_ONFAULT] 561 mov r0, #0x00 562 ldmfd sp!, {r10-r11, pc} 563 564.Lcopyout_fault: 565 ldr r0, =EFAULT 566 str r11, [r10, #PCB_ONFAULT] 567 cmp r3, #0x00 568 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 569 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 570 ldmfd sp!, {r10-r11, pc} 571 572.Lcopyout_guts: 573 pld [r0] 574 /* Word-align the destination buffer */ 575 ands ip, r1, #0x03 /* Already word aligned? */ 576 beq .Lcopyout_wordaligned /* Yup */ 577 rsb ip, ip, #0x04 578 cmp r2, ip /* Enough bytes left to align it? */ 579 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */ 580 sub r2, r2, ip 581 rsbs ip, ip, #0x03 582 addne pc, pc, ip, lsl #3 583 nop 584 ldrb ip, [r0], #0x01 585 strbt ip, [r1], #0x01 586 ldrb ip, [r0], #0x01 587 strbt ip, [r1], #0x01 588 ldrb ip, [r0], #0x01 589 strbt ip, [r1], #0x01 590 cmp r2, #0x00 /* All done? */ 591 RETeq 592 593 /* Destination buffer is now word aligned */ 594.Lcopyout_wordaligned: 595 ands ip, r0, #0x03 /* Is src also word-aligned? */ 596 bne .Lcopyout_bad_align /* Nope. Things just got bad */ 597 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 598 blt .Lcopyout_w_less_than8 599 600 /* Quad-align the destination buffer */ 601 tst r0, #0x07 /* Already quad aligned? */ 602 ldrne ip, [r0], #0x04 603 subne r2, r2, #0x04 604 strtne ip, [r1], #0x04 605 606 stmfd sp!, {r4-r9} /* Free up some registers */ 607 mov r3, #-1 /* Signal restore r4-r9 */ 608 609 /* Destination buffer word aligned, source is quad aligned */ 610 subs r2, r2, #0x80 611 blt .Lcopyout_w_lessthan128 612 613 /* Copy 128 bytes at a time */ 614.Lcopyout_w_loop128: 615 ldrd r4, [r0], #0x08 /* LD:00-07 */ 616 pld [r0, #0x18] /* Prefetch 0x20 */ 617 ldrd r6, [r0], #0x08 /* LD:08-0f */ 618 ldrd r8, [r0], #0x08 /* LD:10-17 */ 619 strt r4, [r1], #0x04 /* ST:00-03 */ 620 strt r5, [r1], #0x04 /* ST:04-07 */ 621 ldrd r4, [r0], #0x08 /* LD:18-1f */ 622 strt r6, [r1], #0x04 /* ST:08-0b */ 623 strt r7, [r1], #0x04 /* ST:0c-0f */ 624 ldrd r6, [r0], #0x08 /* LD:20-27 */ 625 pld [r0, #0x18] /* Prefetch 0x40 */ 626 strt r8, [r1], #0x04 /* ST:10-13 */ 627 strt r9, [r1], #0x04 /* ST:14-17 */ 628 ldrd r8, [r0], #0x08 /* LD:28-2f */ 629 strt r4, [r1], #0x04 /* ST:18-1b */ 630 strt r5, [r1], #0x04 /* ST:1c-1f */ 631 ldrd r4, [r0], #0x08 /* LD:30-37 */ 632 strt r6, [r1], #0x04 /* ST:20-23 */ 633 strt r7, [r1], #0x04 /* ST:24-27 */ 634 ldrd r6, [r0], #0x08 /* LD:38-3f */ 635 strt r8, [r1], #0x04 /* ST:28-2b */ 636 strt r9, [r1], #0x04 /* ST:2c-2f */ 637 ldrd r8, [r0], #0x08 /* LD:40-47 */ 638 pld [r0, #0x18] /* Prefetch 0x60 */ 639 strt r4, [r1], #0x04 /* ST:30-33 */ 640 strt r5, [r1], #0x04 /* ST:34-37 */ 641 ldrd r4, [r0], #0x08 /* LD:48-4f */ 642 strt r6, [r1], #0x04 /* ST:38-3b */ 643 strt r7, [r1], #0x04 /* ST:3c-3f */ 644 ldrd r6, [r0], #0x08 /* LD:50-57 */ 645 strt r8, [r1], #0x04 /* ST:40-43 */ 646 strt r9, [r1], #0x04 /* ST:44-47 */ 647 ldrd r8, [r0], #0x08 /* LD:58-4f */ 648 strt r4, [r1], #0x04 /* ST:48-4b */ 649 strt r5, [r1], #0x04 /* ST:4c-4f */ 650 ldrd r4, [r0], #0x08 /* LD:60-67 */ 651 pld [r0, #0x18] /* Prefetch 0x80 */ 652 strt r6, [r1], #0x04 /* ST:50-53 */ 653 strt r7, [r1], #0x04 /* ST:54-57 */ 654 ldrd r6, [r0], #0x08 /* LD:68-6f */ 655 strt r8, [r1], #0x04 /* ST:58-5b */ 656 strt r9, [r1], #0x04 /* ST:5c-5f */ 657 ldrd r8, [r0], #0x08 /* LD:70-77 */ 658 strt r4, [r1], #0x04 /* ST:60-63 */ 659 strt r5, [r1], #0x04 /* ST:64-67 */ 660 ldrd r4, [r0], #0x08 /* LD:78-7f */ 661 strt r6, [r1], #0x04 /* ST:68-6b */ 662 strt r7, [r1], #0x04 /* ST:6c-6f */ 663 strt r8, [r1], #0x04 /* ST:70-73 */ 664 strt r9, [r1], #0x04 /* ST:74-77 */ 665 subs r2, r2, #0x80 666 strt r4, [r1], #0x04 /* ST:78-7b */ 667 strt r5, [r1], #0x04 /* ST:7c-7f */ 668 bge .Lcopyout_w_loop128 669 670.Lcopyout_w_lessthan128: 671 adds r2, r2, #0x80 /* Adjust for extra sub */ 672 ldmfdeq sp!, {r4-r9} 673 RETeq /* Return now if done */ 674 subs r2, r2, #0x20 675 blt .Lcopyout_w_lessthan32 676 677 /* Copy 32 bytes at a time */ 678.Lcopyout_w_loop32: 679 ldrd r4, [r0], #0x08 680 pld [r0, #0x18] 681 ldrd r6, [r0], #0x08 682 ldrd r8, [r0], #0x08 683 strt r4, [r1], #0x04 684 strt r5, [r1], #0x04 685 ldrd r4, [r0], #0x08 686 strt r6, [r1], #0x04 687 strt r7, [r1], #0x04 688 strt r8, [r1], #0x04 689 strt r9, [r1], #0x04 690 subs r2, r2, #0x20 691 strt r4, [r1], #0x04 692 strt r5, [r1], #0x04 693 bge .Lcopyout_w_loop32 694 695.Lcopyout_w_lessthan32: 696 adds r2, r2, #0x20 /* Adjust for extra sub */ 697 ldmfdeq sp!, {r4-r9} 698 RETeq /* Return now if done */ 699 700 and r4, r2, #0x18 701 rsb r5, r4, #0x18 702 subs r2, r2, r4 703 add pc, pc, r5, lsl #1 704 nop 705 706 /* At least 24 bytes remaining */ 707 ldrd r4, [r0], #0x08 708 strt r4, [r1], #0x04 709 strt r5, [r1], #0x04 710 nop 711 712 /* At least 16 bytes remaining */ 713 ldrd r4, [r0], #0x08 714 strt r4, [r1], #0x04 715 strt r5, [r1], #0x04 716 nop 717 718 /* At least 8 bytes remaining */ 719 ldrd r4, [r0], #0x08 720 strt r4, [r1], #0x04 721 strt r5, [r1], #0x04 722 nop 723 724 /* Less than 8 bytes remaining */ 725 ldmfd sp!, {r4-r9} 726 RETeq /* Return now if done */ 727 mov r3, #0x00 728 729.Lcopyout_w_less_than8: 730 subs r2, r2, #0x04 731 ldrge ip, [r0], #0x04 732 strtge ip, [r1], #0x04 733 RETeq /* Return now if done */ 734 addlt r2, r2, #0x04 735 ldrb ip, [r0], #0x01 736 cmp r2, #0x02 737 ldrbge r2, [r0], #0x01 738 strbt ip, [r1], #0x01 739 ldrbgt ip, [r0] 740 strbtge r2, [r1], #0x01 741 strbtgt ip, [r1] 742 RET 743 744/* 745 * At this point, it has not been possible to word align both buffers. 746 * The destination buffer (r1) is word aligned, but the source buffer 747 * (r0) is not. 748 */ 749.Lcopyout_bad_align: 750 stmfd sp!, {r4-r7} 751 mov r3, #0x01 752 bic r0, r0, #0x03 753 cmp ip, #2 754 ldr ip, [r0], #0x04 755 bgt .Lcopyout_bad3 756 beq .Lcopyout_bad2 757 b .Lcopyout_bad1 758 759.Lcopyout_bad1_loop16: 760#ifdef __ARMEB__ 761 mov r4, ip, lsl #8 762#else 763 mov r4, ip, lsr #8 764#endif 765 ldr r5, [r0], #0x04 766 pld [r0, #0x018] 767 ldr r6, [r0], #0x04 768 ldr r7, [r0], #0x04 769 ldr ip, [r0], #0x04 770#ifdef __ARMEB__ 771 orr r4, r4, r5, lsr #24 772 mov r5, r5, lsl #8 773 orr r5, r5, r6, lsr #24 774 mov r6, r6, lsl #8 775 orr r6, r6, r7, lsr #24 776 mov r7, r7, lsl #8 777 orr r7, r7, ip, lsr #24 778#else 779 orr r4, r4, r5, lsl #24 780 mov r5, r5, lsr #8 781 orr r5, r5, r6, lsl #24 782 mov r6, r6, lsr #8 783 orr r6, r6, r7, lsl #24 784 mov r7, r7, lsr #8 785 orr r7, r7, ip, lsl #24 786#endif 787 strt r4, [r1], #0x04 788 strt r5, [r1], #0x04 789 strt r6, [r1], #0x04 790 strt r7, [r1], #0x04 791.Lcopyout_bad1: 792 subs r2, r2, #0x10 793 bge .Lcopyout_bad1_loop16 794 795 adds r2, r2, #0x10 796 ldmfdeq sp!, {r4-r7} 797 RETeq /* Return now if done */ 798 subs r2, r2, #0x04 799 sublt r0, r0, #0x03 800 blt .Lcopyout_l4 801 802.Lcopyout_bad1_loop4: 803#ifdef __ARMEB__ 804 mov r4, ip, lsl #8 805#else 806 mov r4, ip, lsr #8 807#endif 808 ldr ip, [r0], #0x04 809 subs r2, r2, #0x04 810#ifdef __ARMEB__ 811 orr r4, r4, ip, lsr #24 812#else 813 orr r4, r4, ip, lsl #24 814#endif 815 strt r4, [r1], #0x04 816 bge .Lcopyout_bad1_loop4 817 sub r0, r0, #0x03 818 b .Lcopyout_l4 819 820.Lcopyout_bad2_loop16: 821#ifdef __ARMEB__ 822 mov r4, ip, lsl #16 823#else 824 mov r4, ip, lsr #16 825#endif 826 ldr r5, [r0], #0x04 827 pld [r0, #0x018] 828 ldr r6, [r0], #0x04 829 ldr r7, [r0], #0x04 830 ldr ip, [r0], #0x04 831#ifdef __ARMEB__ 832 orr r4, r4, r5, lsr #16 833 mov r5, r5, lsl #16 834 orr r5, r5, r6, lsr #16 835 mov r6, r6, lsl #16 836 orr r6, r6, r7, lsr #16 837 mov r7, r7, lsl #16 838 orr r7, r7, ip, lsr #16 839#else 840 orr r4, r4, r5, lsl #16 841 mov r5, r5, lsr #16 842 orr r5, r5, r6, lsl #16 843 mov r6, r6, lsr #16 844 orr r6, r6, r7, lsl #16 845 mov r7, r7, lsr #16 846 orr r7, r7, ip, lsl #16 847#endif 848 strt r4, [r1], #0x04 849 strt r5, [r1], #0x04 850 strt r6, [r1], #0x04 851 strt r7, [r1], #0x04 852.Lcopyout_bad2: 853 subs r2, r2, #0x10 854 bge .Lcopyout_bad2_loop16 855 856 adds r2, r2, #0x10 857 ldmfdeq sp!, {r4-r7} 858 RETeq /* Return now if done */ 859 subs r2, r2, #0x04 860 sublt r0, r0, #0x02 861 blt .Lcopyout_l4 862 863.Lcopyout_bad2_loop4: 864#ifdef __ARMEB__ 865 mov r4, ip, lsl #16 866#else 867 mov r4, ip, lsr #16 868#endif 869 ldr ip, [r0], #0x04 870 subs r2, r2, #0x04 871#ifdef __ARMEB__ 872 orr r4, r4, ip, lsr #16 873#else 874 orr r4, r4, ip, lsl #16 875#endif 876 strt r4, [r1], #0x04 877 bge .Lcopyout_bad2_loop4 878 sub r0, r0, #0x02 879 b .Lcopyout_l4 880 881.Lcopyout_bad3_loop16: 882#ifdef __ARMEB__ 883 mov r4, ip, lsl #24 884#else 885 mov r4, ip, lsr #24 886#endif 887 ldr r5, [r0], #0x04 888 pld [r0, #0x018] 889 ldr r6, [r0], #0x04 890 ldr r7, [r0], #0x04 891 ldr ip, [r0], #0x04 892#ifdef __ARMEB__ 893 orr r4, r4, r5, lsr #8 894 mov r5, r5, lsl #24 895 orr r5, r5, r6, lsr #8 896 mov r6, r6, lsl #24 897 orr r6, r6, r7, lsr #8 898 mov r7, r7, lsl #24 899 orr r7, r7, ip, lsr #8 900#else 901 orr r4, r4, r5, lsl #8 902 mov r5, r5, lsr #24 903 orr r5, r5, r6, lsl #8 904 mov r6, r6, lsr #24 905 orr r6, r6, r7, lsl #8 906 mov r7, r7, lsr #24 907 orr r7, r7, ip, lsl #8 908#endif 909 strt r4, [r1], #0x04 910 strt r5, [r1], #0x04 911 strt r6, [r1], #0x04 912 strt r7, [r1], #0x04 913.Lcopyout_bad3: 914 subs r2, r2, #0x10 915 bge .Lcopyout_bad3_loop16 916 917 adds r2, r2, #0x10 918 ldmfdeq sp!, {r4-r7} 919 RETeq /* Return now if done */ 920 subs r2, r2, #0x04 921 sublt r0, r0, #0x01 922 blt .Lcopyout_l4 923 924.Lcopyout_bad3_loop4: 925#ifdef __ARMEB__ 926 mov r4, ip, lsl #24 927#else 928 mov r4, ip, lsr #24 929#endif 930 ldr ip, [r0], #0x04 931 subs r2, r2, #0x04 932#ifdef __ARMEB__ 933 orr r4, r4, ip, lsr #8 934#else 935 orr r4, r4, ip, lsl #8 936#endif 937 strt r4, [r1], #0x04 938 bge .Lcopyout_bad3_loop4 939 sub r0, r0, #0x01 940 941.Lcopyout_l4: 942 ldmfd sp!, {r4-r7} 943 mov r3, #0x00 944 adds r2, r2, #0x04 945 RETeq 946.Lcopyout_l4_2: 947 rsbs r2, r2, #0x03 948 addne pc, pc, r2, lsl #3 949 nop 950 ldrb ip, [r0], #0x01 951 strbt ip, [r1], #0x01 952 ldrb ip, [r0], #0x01 953 strbt ip, [r1], #0x01 954 ldrb ip, [r0] 955 strbt ip, [r1] 956 RET 957END(copyout) 958 959