bcopyinout_xscale.S revision 239268
1/* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */ 2 3/*- 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD: head/sys/arm/arm/bcopyinout_xscale.S 239268 2012-08-15 03:03:03Z gonzo $"); 40 41 .text 42 .align 0 43 44#ifdef _ARM_ARCH_6 45#define GET_PCB(tmp) \ 46 mrc p15, 0, tmp, c13, c0, 4; \ 47 add tmp, tmp, #(PC_CURPCB) 48#else 49.Lcurpcb: 50 .word _C_LABEL(__pcpu) + PC_CURPCB 51#define GET_PCB(tmp) \ 52 ldr tmp, .Lcurpcb 53#endif 54 55/* 56 * r0 = user space address 57 * r1 = kernel space address 58 * r2 = length 59 * 60 * Copies bytes from user space to kernel space 61 */ 62ENTRY(copyin) 63 cmp r2, #0x00 64 movle r0, #0x00 65 movle pc, lr /* Bail early if length is <= 0 */ 66 67 ldr r3, .L_arm_memcpy 68 ldr r3, [r3] 69 cmp r3, #0 70 beq .Lnormal 71 ldr r3, .L_min_memcpy_size 72 ldr r3, [r3] 73 cmp r2, r3 74 blt .Lnormal 75 stmfd sp!, {r0-r2, r4, lr} 76 mov r3, r0 77 mov r0, r1 78 mov r1, r3 79 mov r3, #2 /* SRC_IS_USER */ 80 ldr r4, .L_arm_memcpy 81 mov lr, pc 82 ldr pc, [r4] 83 cmp r0, #0 84 ldmfd sp!, {r0-r2, r4, lr} 85 moveq r0, #0 86 RETeq 87 88.Lnormal: 89 stmfd sp!, {r10-r11, lr} 90 91 GET_PCB(r10) 92 ldr r10, [r10] 93 94 mov r3, #0x00 95 adr ip, .Lcopyin_fault 96 ldr r11, [r10, #PCB_ONFAULT] 97 str ip, [r10, #PCB_ONFAULT] 98 bl .Lcopyin_guts 99 str r11, [r10, #PCB_ONFAULT] 100 mov r0, #0x00 101 ldmfd sp!, {r10-r11, pc} 102 103.Lcopyin_fault: 104 ldr r0, =EFAULT 105 str r11, [r10, #PCB_ONFAULT] 106 cmp r3, #0x00 107 ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 108 ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 109 ldmfd sp!, {r10-r11, pc} 110 111.Lcopyin_guts: 112 pld [r0] 113 /* Word-align the destination buffer */ 114 ands ip, r1, #0x03 /* Already word aligned? */ 115 beq .Lcopyin_wordaligned /* Yup */ 116 rsb ip, ip, #0x04 117 cmp r2, ip /* Enough bytes left to align it? */ 118 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */ 119 sub r2, r2, ip 120 rsbs ip, ip, #0x03 121 addne pc, pc, ip, lsl #3 122 nop 123 ldrbt ip, [r0], #0x01 124 strb ip, [r1], #0x01 125 ldrbt ip, [r0], #0x01 126 strb ip, [r1], #0x01 127 ldrbt ip, [r0], #0x01 128 strb ip, [r1], #0x01 129 cmp r2, #0x00 /* All done? */ 130 RETeq 131 132 /* Destination buffer is now word aligned */ 133.Lcopyin_wordaligned: 134 ands ip, r0, #0x03 /* Is src also word-aligned? */ 135 bne .Lcopyin_bad_align /* Nope. Things just got bad */ 136 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 137 blt .Lcopyin_w_less_than8 138 139 /* Quad-align the destination buffer */ 140 tst r1, #0x07 /* Already quad aligned? */ 141 ldrnet ip, [r0], #0x04 142 strne ip, [r1], #0x04 143 subne r2, r2, #0x04 144 stmfd sp!, {r4-r9} /* Free up some registers */ 145 mov r3, #-1 /* Signal restore r4-r9 */ 146 147 /* Destination buffer quad aligned, source is word aligned */ 148 subs r2, r2, #0x80 149 blt .Lcopyin_w_lessthan128 150 151 /* Copy 128 bytes at a time */ 152.Lcopyin_w_loop128: 153 ldrt r4, [r0], #0x04 /* LD:00-03 */ 154 ldrt r5, [r0], #0x04 /* LD:04-07 */ 155 pld [r0, #0x18] /* Prefetch 0x20 */ 156 ldrt r6, [r0], #0x04 /* LD:08-0b */ 157 ldrt r7, [r0], #0x04 /* LD:0c-0f */ 158 ldrt r8, [r0], #0x04 /* LD:10-13 */ 159 ldrt r9, [r0], #0x04 /* LD:14-17 */ 160 strd r4, [r1], #0x08 /* ST:00-07 */ 161 ldrt r4, [r0], #0x04 /* LD:18-1b */ 162 ldrt r5, [r0], #0x04 /* LD:1c-1f */ 163 strd r6, [r1], #0x08 /* ST:08-0f */ 164 ldrt r6, [r0], #0x04 /* LD:20-23 */ 165 ldrt r7, [r0], #0x04 /* LD:24-27 */ 166 pld [r0, #0x18] /* Prefetch 0x40 */ 167 strd r8, [r1], #0x08 /* ST:10-17 */ 168 ldrt r8, [r0], #0x04 /* LD:28-2b */ 169 ldrt r9, [r0], #0x04 /* LD:2c-2f */ 170 strd r4, [r1], #0x08 /* ST:18-1f */ 171 ldrt r4, [r0], #0x04 /* LD:30-33 */ 172 ldrt r5, [r0], #0x04 /* LD:34-37 */ 173 strd r6, [r1], #0x08 /* ST:20-27 */ 174 ldrt r6, [r0], #0x04 /* LD:38-3b */ 175 ldrt r7, [r0], #0x04 /* LD:3c-3f */ 176 strd r8, [r1], #0x08 /* ST:28-2f */ 177 ldrt r8, [r0], #0x04 /* LD:40-43 */ 178 ldrt r9, [r0], #0x04 /* LD:44-47 */ 179 pld [r0, #0x18] /* Prefetch 0x60 */ 180 strd r4, [r1], #0x08 /* ST:30-37 */ 181 ldrt r4, [r0], #0x04 /* LD:48-4b */ 182 ldrt r5, [r0], #0x04 /* LD:4c-4f */ 183 strd r6, [r1], #0x08 /* ST:38-3f */ 184 ldrt r6, [r0], #0x04 /* LD:50-53 */ 185 ldrt r7, [r0], #0x04 /* LD:54-57 */ 186 strd r8, [r1], #0x08 /* ST:40-47 */ 187 ldrt r8, [r0], #0x04 /* LD:58-5b */ 188 ldrt r9, [r0], #0x04 /* LD:5c-5f */ 189 strd r4, [r1], #0x08 /* ST:48-4f */ 190 ldrt r4, [r0], #0x04 /* LD:60-63 */ 191 ldrt r5, [r0], #0x04 /* LD:64-67 */ 192 pld [r0, #0x18] /* Prefetch 0x80 */ 193 strd r6, [r1], #0x08 /* ST:50-57 */ 194 ldrt r6, [r0], #0x04 /* LD:68-6b */ 195 ldrt r7, [r0], #0x04 /* LD:6c-6f */ 196 strd r8, [r1], #0x08 /* ST:58-5f */ 197 ldrt r8, [r0], #0x04 /* LD:70-73 */ 198 ldrt r9, [r0], #0x04 /* LD:74-77 */ 199 strd r4, [r1], #0x08 /* ST:60-67 */ 200 ldrt r4, [r0], #0x04 /* LD:78-7b */ 201 ldrt r5, [r0], #0x04 /* LD:7c-7f */ 202 strd r6, [r1], #0x08 /* ST:68-6f */ 203 strd r8, [r1], #0x08 /* ST:70-77 */ 204 subs r2, r2, #0x80 205 strd r4, [r1], #0x08 /* ST:78-7f */ 206 bge .Lcopyin_w_loop128 207 208.Lcopyin_w_lessthan128: 209 adds r2, r2, #0x80 /* Adjust for extra sub */ 210 ldmeqfd sp!, {r4-r9} 211 RETeq 212 subs r2, r2, #0x20 213 blt .Lcopyin_w_lessthan32 214 215 /* Copy 32 bytes at a time */ 216.Lcopyin_w_loop32: 217 ldrt r4, [r0], #0x04 218 ldrt r5, [r0], #0x04 219 pld [r0, #0x18] 220 ldrt r6, [r0], #0x04 221 ldrt r7, [r0], #0x04 222 ldrt r8, [r0], #0x04 223 ldrt r9, [r0], #0x04 224 strd r4, [r1], #0x08 225 ldrt r4, [r0], #0x04 226 ldrt r5, [r0], #0x04 227 strd r6, [r1], #0x08 228 strd r8, [r1], #0x08 229 subs r2, r2, #0x20 230 strd r4, [r1], #0x08 231 bge .Lcopyin_w_loop32 232 233.Lcopyin_w_lessthan32: 234 adds r2, r2, #0x20 /* Adjust for extra sub */ 235 ldmeqfd sp!, {r4-r9} 236 RETeq /* Return now if done */ 237 238 and r4, r2, #0x18 239 rsb r5, r4, #0x18 240 subs r2, r2, r4 241 add pc, pc, r5, lsl #1 242 nop 243 244 /* At least 24 bytes remaining */ 245 ldrt r4, [r0], #0x04 246 ldrt r5, [r0], #0x04 247 nop 248 strd r4, [r1], #0x08 249 250 /* At least 16 bytes remaining */ 251 ldrt r4, [r0], #0x04 252 ldrt r5, [r0], #0x04 253 nop 254 strd r4, [r1], #0x08 255 256 /* At least 8 bytes remaining */ 257 ldrt r4, [r0], #0x04 258 ldrt r5, [r0], #0x04 259 nop 260 strd r4, [r1], #0x08 261 262 /* Less than 8 bytes remaining */ 263 ldmfd sp!, {r4-r9} 264 RETeq /* Return now if done */ 265 mov r3, #0x00 266 267.Lcopyin_w_less_than8: 268 subs r2, r2, #0x04 269 ldrget ip, [r0], #0x04 270 strge ip, [r1], #0x04 271 RETeq /* Return now if done */ 272 addlt r2, r2, #0x04 273 ldrbt ip, [r0], #0x01 274 cmp r2, #0x02 275 ldrgebt r2, [r0], #0x01 276 strb ip, [r1], #0x01 277 ldrgtbt ip, [r0] 278 strgeb r2, [r1], #0x01 279 strgtb ip, [r1] 280 RET 281 282/* 283 * At this point, it has not been possible to word align both buffers. 284 * The destination buffer (r1) is word aligned, but the source buffer 285 * (r0) is not. 286 */ 287.Lcopyin_bad_align: 288 stmfd sp!, {r4-r7} 289 mov r3, #0x01 290 bic r0, r0, #0x03 291 cmp ip, #2 292 ldrt ip, [r0], #0x04 293 bgt .Lcopyin_bad3 294 beq .Lcopyin_bad2 295 b .Lcopyin_bad1 296 297.Lcopyin_bad1_loop16: 298#ifdef __ARMEB__ 299 mov r4, ip, lsl #8 300#else 301 mov r4, ip, lsr #8 302#endif 303 ldrt r5, [r0], #0x04 304 pld [r0, #0x018] 305 ldrt r6, [r0], #0x04 306 ldrt r7, [r0], #0x04 307 ldrt ip, [r0], #0x04 308#ifdef __ARMEB__ 309 orr r4, r4, r5, lsr #24 310 mov r5, r5, lsl #8 311 orr r5, r5, r6, lsr #24 312 mov r6, r6, lsl #8 313 orr r6, r6, r7, lsr #24 314 mov r7, r7, lsl #8 315 orr r7, r7, ip, lsr #24 316#else 317 orr r4, r4, r5, lsl #24 318 mov r5, r5, lsr #8 319 orr r5, r5, r6, lsl #24 320 mov r6, r6, lsr #8 321 orr r6, r6, r7, lsl #24 322 mov r7, r7, lsr #8 323 orr r7, r7, ip, lsl #24 324#endif 325 str r4, [r1], #0x04 326 str r5, [r1], #0x04 327 str r6, [r1], #0x04 328 str r7, [r1], #0x04 329.Lcopyin_bad1: 330 subs r2, r2, #0x10 331 bge .Lcopyin_bad1_loop16 332 333 adds r2, r2, #0x10 334 ldmeqfd sp!, {r4-r7} 335 RETeq /* Return now if done */ 336 subs r2, r2, #0x04 337 sublt r0, r0, #0x03 338 blt .Lcopyin_l4 339 340.Lcopyin_bad1_loop4: 341#ifdef __ARMEB__ 342 mov r4, ip, lsl #8 343#else 344 mov r4, ip, lsr #8 345#endif 346 ldrt ip, [r0], #0x04 347 subs r2, r2, #0x04 348#ifdef __ARMEB__ 349 orr r4, r4, ip, lsr #24 350#else 351 orr r4, r4, ip, lsl #24 352#endif 353 str r4, [r1], #0x04 354 bge .Lcopyin_bad1_loop4 355 sub r0, r0, #0x03 356 b .Lcopyin_l4 357 358.Lcopyin_bad2_loop16: 359#ifdef __ARMEB__ 360 mov r4, ip, lsl #16 361#else 362 mov r4, ip, lsr #16 363#endif 364 ldrt r5, [r0], #0x04 365 pld [r0, #0x018] 366 ldrt r6, [r0], #0x04 367 ldrt r7, [r0], #0x04 368 ldrt ip, [r0], #0x04 369#ifdef __ARMEB__ 370 orr r4, r4, r5, lsr #16 371 mov r5, r5, lsl #16 372 orr r5, r5, r6, lsr #16 373 mov r6, r6, lsl #16 374 orr r6, r6, r7, lsr #16 375 mov r7, r7, lsl #16 376 orr r7, r7, ip, lsr #16 377#else 378 orr r4, r4, r5, lsl #16 379 mov r5, r5, lsr #16 380 orr r5, r5, r6, lsl #16 381 mov r6, r6, lsr #16 382 orr r6, r6, r7, lsl #16 383 mov r7, r7, lsr #16 384 orr r7, r7, ip, lsl #16 385#endif 386 str r4, [r1], #0x04 387 str r5, [r1], #0x04 388 str r6, [r1], #0x04 389 str r7, [r1], #0x04 390.Lcopyin_bad2: 391 subs r2, r2, #0x10 392 bge .Lcopyin_bad2_loop16 393 394 adds r2, r2, #0x10 395 ldmeqfd sp!, {r4-r7} 396 RETeq /* Return now if done */ 397 subs r2, r2, #0x04 398 sublt r0, r0, #0x02 399 blt .Lcopyin_l4 400 401.Lcopyin_bad2_loop4: 402#ifdef __ARMEB__ 403 mov r4, ip, lsl #16 404#else 405 mov r4, ip, lsr #16 406#endif 407 ldrt ip, [r0], #0x04 408 subs r2, r2, #0x04 409#ifdef __ARMEB__ 410 orr r4, r4, ip, lsr #16 411#else 412 orr r4, r4, ip, lsl #16 413#endif 414 str r4, [r1], #0x04 415 bge .Lcopyin_bad2_loop4 416 sub r0, r0, #0x02 417 b .Lcopyin_l4 418 419.Lcopyin_bad3_loop16: 420#ifdef __ARMEB__ 421 mov r4, ip, lsl #24 422#else 423 mov r4, ip, lsr #24 424#endif 425 ldrt r5, [r0], #0x04 426 pld [r0, #0x018] 427 ldrt r6, [r0], #0x04 428 ldrt r7, [r0], #0x04 429 ldrt ip, [r0], #0x04 430#ifdef __ARMEB__ 431 orr r4, r4, r5, lsr #8 432 mov r5, r5, lsl #24 433 orr r5, r5, r6, lsr #8 434 mov r6, r6, lsl #24 435 orr r6, r6, r7, lsr #8 436 mov r7, r7, lsl #24 437 orr r7, r7, ip, lsr #8 438#else 439 orr r4, r4, r5, lsl #8 440 mov r5, r5, lsr #24 441 orr r5, r5, r6, lsl #8 442 mov r6, r6, lsr #24 443 orr r6, r6, r7, lsl #8 444 mov r7, r7, lsr #24 445 orr r7, r7, ip, lsl #8 446#endif 447 str r4, [r1], #0x04 448 str r5, [r1], #0x04 449 str r6, [r1], #0x04 450 str r7, [r1], #0x04 451.Lcopyin_bad3: 452 subs r2, r2, #0x10 453 bge .Lcopyin_bad3_loop16 454 455 adds r2, r2, #0x10 456 ldmeqfd sp!, {r4-r7} 457 RETeq /* Return now if done */ 458 subs r2, r2, #0x04 459 sublt r0, r0, #0x01 460 blt .Lcopyin_l4 461 462.Lcopyin_bad3_loop4: 463#ifdef __ARMEB__ 464 mov r4, ip, lsl #24 465#else 466 mov r4, ip, lsr #24 467#endif 468 ldrt ip, [r0], #0x04 469 subs r2, r2, #0x04 470#ifdef __ARMEB__ 471 orr r4, r4, ip, lsr #8 472#else 473 orr r4, r4, ip, lsl #8 474#endif 475 str r4, [r1], #0x04 476 bge .Lcopyin_bad3_loop4 477 sub r0, r0, #0x01 478 479.Lcopyin_l4: 480 ldmfd sp!, {r4-r7} 481 mov r3, #0x00 482 adds r2, r2, #0x04 483 RETeq 484.Lcopyin_l4_2: 485 rsbs r2, r2, #0x03 486 addne pc, pc, r2, lsl #3 487 nop 488 ldrbt ip, [r0], #0x01 489 strb ip, [r1], #0x01 490 ldrbt ip, [r0], #0x01 491 strb ip, [r1], #0x01 492 ldrbt ip, [r0] 493 strb ip, [r1] 494 RET 495 496 497/* 498 * r0 = kernel space address 499 * r1 = user space address 500 * r2 = length 501 * 502 * Copies bytes from kernel space to user space 503 */ 504ENTRY(copyout) 505 cmp r2, #0x00 506 movle r0, #0x00 507 movle pc, lr /* Bail early if length is <= 0 */ 508 509 ldr r3, .L_arm_memcpy 510 ldr r3, [r3] 511 cmp r3, #0 512 beq .Lnormale 513 ldr r3, .L_min_memcpy_size 514 ldr r3, [r3] 515 cmp r2, r3 516 blt .Lnormale 517 stmfd sp!, {r0-r2, r4, lr} 518 mov r3, r0 519 mov r0, r1 520 mov r1, r3 521 mov r3, #1 /* DST_IS_USER */ 522 ldr r4, .L_arm_memcpy 523 mov lr, pc 524 ldr pc, [r4] 525 cmp r0, #0 526 ldmfd sp!, {r0-r2, r4, lr} 527 moveq r0, #0 528 RETeq 529 530.Lnormale: 531 stmfd sp!, {r10-r11, lr} 532 533 GET_PCB(r10) 534 ldr r10, [r10] 535 536 mov r3, #0x00 537 adr ip, .Lcopyout_fault 538 ldr r11, [r10, #PCB_ONFAULT] 539 str ip, [r10, #PCB_ONFAULT] 540 bl .Lcopyout_guts 541 str r11, [r10, #PCB_ONFAULT] 542 mov r0, #0x00 543 ldmfd sp!, {r10-r11, pc} 544 545.Lcopyout_fault: 546 ldr r0, =EFAULT 547 str r11, [r10, #PCB_ONFAULT] 548 cmp r3, #0x00 549 ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 550 ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 551 ldmfd sp!, {r10-r11, pc} 552 553.Lcopyout_guts: 554 pld [r0] 555 /* Word-align the destination buffer */ 556 ands ip, r1, #0x03 /* Already word aligned? */ 557 beq .Lcopyout_wordaligned /* Yup */ 558 rsb ip, ip, #0x04 559 cmp r2, ip /* Enough bytes left to align it? */ 560 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */ 561 sub r2, r2, ip 562 rsbs ip, ip, #0x03 563 addne pc, pc, ip, lsl #3 564 nop 565 ldrb ip, [r0], #0x01 566 strbt ip, [r1], #0x01 567 ldrb ip, [r0], #0x01 568 strbt ip, [r1], #0x01 569 ldrb ip, [r0], #0x01 570 strbt ip, [r1], #0x01 571 cmp r2, #0x00 /* All done? */ 572 RETeq 573 574 /* Destination buffer is now word aligned */ 575.Lcopyout_wordaligned: 576 ands ip, r0, #0x03 /* Is src also word-aligned? */ 577 bne .Lcopyout_bad_align /* Nope. Things just got bad */ 578 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 579 blt .Lcopyout_w_less_than8 580 581 /* Quad-align the destination buffer */ 582 tst r0, #0x07 /* Already quad aligned? */ 583 ldrne ip, [r0], #0x04 584 subne r2, r2, #0x04 585 strnet ip, [r1], #0x04 586 587 stmfd sp!, {r4-r9} /* Free up some registers */ 588 mov r3, #-1 /* Signal restore r4-r9 */ 589 590 /* Destination buffer word aligned, source is quad aligned */ 591 subs r2, r2, #0x80 592 blt .Lcopyout_w_lessthan128 593 594 /* Copy 128 bytes at a time */ 595.Lcopyout_w_loop128: 596 ldrd r4, [r0], #0x08 /* LD:00-07 */ 597 pld [r0, #0x18] /* Prefetch 0x20 */ 598 ldrd r6, [r0], #0x08 /* LD:08-0f */ 599 ldrd r8, [r0], #0x08 /* LD:10-17 */ 600 strt r4, [r1], #0x04 /* ST:00-03 */ 601 strt r5, [r1], #0x04 /* ST:04-07 */ 602 ldrd r4, [r0], #0x08 /* LD:18-1f */ 603 strt r6, [r1], #0x04 /* ST:08-0b */ 604 strt r7, [r1], #0x04 /* ST:0c-0f */ 605 ldrd r6, [r0], #0x08 /* LD:20-27 */ 606 pld [r0, #0x18] /* Prefetch 0x40 */ 607 strt r8, [r1], #0x04 /* ST:10-13 */ 608 strt r9, [r1], #0x04 /* ST:14-17 */ 609 ldrd r8, [r0], #0x08 /* LD:28-2f */ 610 strt r4, [r1], #0x04 /* ST:18-1b */ 611 strt r5, [r1], #0x04 /* ST:1c-1f */ 612 ldrd r4, [r0], #0x08 /* LD:30-37 */ 613 strt r6, [r1], #0x04 /* ST:20-23 */ 614 strt r7, [r1], #0x04 /* ST:24-27 */ 615 ldrd r6, [r0], #0x08 /* LD:38-3f */ 616 strt r8, [r1], #0x04 /* ST:28-2b */ 617 strt r9, [r1], #0x04 /* ST:2c-2f */ 618 ldrd r8, [r0], #0x08 /* LD:40-47 */ 619 pld [r0, #0x18] /* Prefetch 0x60 */ 620 strt r4, [r1], #0x04 /* ST:30-33 */ 621 strt r5, [r1], #0x04 /* ST:34-37 */ 622 ldrd r4, [r0], #0x08 /* LD:48-4f */ 623 strt r6, [r1], #0x04 /* ST:38-3b */ 624 strt r7, [r1], #0x04 /* ST:3c-3f */ 625 ldrd r6, [r0], #0x08 /* LD:50-57 */ 626 strt r8, [r1], #0x04 /* ST:40-43 */ 627 strt r9, [r1], #0x04 /* ST:44-47 */ 628 ldrd r8, [r0], #0x08 /* LD:58-4f */ 629 strt r4, [r1], #0x04 /* ST:48-4b */ 630 strt r5, [r1], #0x04 /* ST:4c-4f */ 631 ldrd r4, [r0], #0x08 /* LD:60-67 */ 632 pld [r0, #0x18] /* Prefetch 0x80 */ 633 strt r6, [r1], #0x04 /* ST:50-53 */ 634 strt r7, [r1], #0x04 /* ST:54-57 */ 635 ldrd r6, [r0], #0x08 /* LD:68-6f */ 636 strt r8, [r1], #0x04 /* ST:58-5b */ 637 strt r9, [r1], #0x04 /* ST:5c-5f */ 638 ldrd r8, [r0], #0x08 /* LD:70-77 */ 639 strt r4, [r1], #0x04 /* ST:60-63 */ 640 strt r5, [r1], #0x04 /* ST:64-67 */ 641 ldrd r4, [r0], #0x08 /* LD:78-7f */ 642 strt r6, [r1], #0x04 /* ST:68-6b */ 643 strt r7, [r1], #0x04 /* ST:6c-6f */ 644 strt r8, [r1], #0x04 /* ST:70-73 */ 645 strt r9, [r1], #0x04 /* ST:74-77 */ 646 subs r2, r2, #0x80 647 strt r4, [r1], #0x04 /* ST:78-7b */ 648 strt r5, [r1], #0x04 /* ST:7c-7f */ 649 bge .Lcopyout_w_loop128 650 651.Lcopyout_w_lessthan128: 652 adds r2, r2, #0x80 /* Adjust for extra sub */ 653 ldmeqfd sp!, {r4-r9} 654 RETeq /* Return now if done */ 655 subs r2, r2, #0x20 656 blt .Lcopyout_w_lessthan32 657 658 /* Copy 32 bytes at a time */ 659.Lcopyout_w_loop32: 660 ldrd r4, [r0], #0x08 661 pld [r0, #0x18] 662 ldrd r6, [r0], #0x08 663 ldrd r8, [r0], #0x08 664 strt r4, [r1], #0x04 665 strt r5, [r1], #0x04 666 ldrd r4, [r0], #0x08 667 strt r6, [r1], #0x04 668 strt r7, [r1], #0x04 669 strt r8, [r1], #0x04 670 strt r9, [r1], #0x04 671 subs r2, r2, #0x20 672 strt r4, [r1], #0x04 673 strt r5, [r1], #0x04 674 bge .Lcopyout_w_loop32 675 676.Lcopyout_w_lessthan32: 677 adds r2, r2, #0x20 /* Adjust for extra sub */ 678 ldmeqfd sp!, {r4-r9} 679 RETeq /* Return now if done */ 680 681 and r4, r2, #0x18 682 rsb r5, r4, #0x18 683 subs r2, r2, r4 684 add pc, pc, r5, lsl #1 685 nop 686 687 /* At least 24 bytes remaining */ 688 ldrd r4, [r0], #0x08 689 strt r4, [r1], #0x04 690 strt r5, [r1], #0x04 691 nop 692 693 /* At least 16 bytes remaining */ 694 ldrd r4, [r0], #0x08 695 strt r4, [r1], #0x04 696 strt r5, [r1], #0x04 697 nop 698 699 /* At least 8 bytes remaining */ 700 ldrd r4, [r0], #0x08 701 strt r4, [r1], #0x04 702 strt r5, [r1], #0x04 703 nop 704 705 /* Less than 8 bytes remaining */ 706 ldmfd sp!, {r4-r9} 707 RETeq /* Return now if done */ 708 mov r3, #0x00 709 710.Lcopyout_w_less_than8: 711 subs r2, r2, #0x04 712 ldrge ip, [r0], #0x04 713 strget ip, [r1], #0x04 714 RETeq /* Return now if done */ 715 addlt r2, r2, #0x04 716 ldrb ip, [r0], #0x01 717 cmp r2, #0x02 718 ldrgeb r2, [r0], #0x01 719 strbt ip, [r1], #0x01 720 ldrgtb ip, [r0] 721 strgebt r2, [r1], #0x01 722 strgtbt ip, [r1] 723 RET 724 725/* 726 * At this point, it has not been possible to word align both buffers. 727 * The destination buffer (r1) is word aligned, but the source buffer 728 * (r0) is not. 729 */ 730.Lcopyout_bad_align: 731 stmfd sp!, {r4-r7} 732 mov r3, #0x01 733 bic r0, r0, #0x03 734 cmp ip, #2 735 ldr ip, [r0], #0x04 736 bgt .Lcopyout_bad3 737 beq .Lcopyout_bad2 738 b .Lcopyout_bad1 739 740.Lcopyout_bad1_loop16: 741#ifdef __ARMEB__ 742 mov r4, ip, lsl #8 743#else 744 mov r4, ip, lsr #8 745#endif 746 ldr r5, [r0], #0x04 747 pld [r0, #0x018] 748 ldr r6, [r0], #0x04 749 ldr r7, [r0], #0x04 750 ldr ip, [r0], #0x04 751#ifdef __ARMEB__ 752 orr r4, r4, r5, lsr #24 753 mov r5, r5, lsl #8 754 orr r5, r5, r6, lsr #24 755 mov r6, r6, lsl #8 756 orr r6, r6, r7, lsr #24 757 mov r7, r7, lsl #8 758 orr r7, r7, ip, lsr #24 759#else 760 orr r4, r4, r5, lsl #24 761 mov r5, r5, lsr #8 762 orr r5, r5, r6, lsl #24 763 mov r6, r6, lsr #8 764 orr r6, r6, r7, lsl #24 765 mov r7, r7, lsr #8 766 orr r7, r7, ip, lsl #24 767#endif 768 strt r4, [r1], #0x04 769 strt r5, [r1], #0x04 770 strt r6, [r1], #0x04 771 strt r7, [r1], #0x04 772.Lcopyout_bad1: 773 subs r2, r2, #0x10 774 bge .Lcopyout_bad1_loop16 775 776 adds r2, r2, #0x10 777 ldmeqfd sp!, {r4-r7} 778 RETeq /* Return now if done */ 779 subs r2, r2, #0x04 780 sublt r0, r0, #0x03 781 blt .Lcopyout_l4 782 783.Lcopyout_bad1_loop4: 784#ifdef __ARMEB__ 785 mov r4, ip, lsl #8 786#else 787 mov r4, ip, lsr #8 788#endif 789 ldr ip, [r0], #0x04 790 subs r2, r2, #0x04 791#ifdef __ARMEB__ 792 orr r4, r4, ip, lsr #24 793#else 794 orr r4, r4, ip, lsl #24 795#endif 796 strt r4, [r1], #0x04 797 bge .Lcopyout_bad1_loop4 798 sub r0, r0, #0x03 799 b .Lcopyout_l4 800 801.Lcopyout_bad2_loop16: 802#ifdef __ARMEB__ 803 mov r4, ip, lsl #16 804#else 805 mov r4, ip, lsr #16 806#endif 807 ldr r5, [r0], #0x04 808 pld [r0, #0x018] 809 ldr r6, [r0], #0x04 810 ldr r7, [r0], #0x04 811 ldr ip, [r0], #0x04 812#ifdef __ARMEB__ 813 orr r4, r4, r5, lsr #16 814 mov r5, r5, lsl #16 815 orr r5, r5, r6, lsr #16 816 mov r6, r6, lsl #16 817 orr r6, r6, r7, lsr #16 818 mov r7, r7, lsl #16 819 orr r7, r7, ip, lsr #16 820#else 821 orr r4, r4, r5, lsl #16 822 mov r5, r5, lsr #16 823 orr r5, r5, r6, lsl #16 824 mov r6, r6, lsr #16 825 orr r6, r6, r7, lsl #16 826 mov r7, r7, lsr #16 827 orr r7, r7, ip, lsl #16 828#endif 829 strt r4, [r1], #0x04 830 strt r5, [r1], #0x04 831 strt r6, [r1], #0x04 832 strt r7, [r1], #0x04 833.Lcopyout_bad2: 834 subs r2, r2, #0x10 835 bge .Lcopyout_bad2_loop16 836 837 adds r2, r2, #0x10 838 ldmeqfd sp!, {r4-r7} 839 RETeq /* Return now if done */ 840 subs r2, r2, #0x04 841 sublt r0, r0, #0x02 842 blt .Lcopyout_l4 843 844.Lcopyout_bad2_loop4: 845#ifdef __ARMEB__ 846 mov r4, ip, lsl #16 847#else 848 mov r4, ip, lsr #16 849#endif 850 ldr ip, [r0], #0x04 851 subs r2, r2, #0x04 852#ifdef __ARMEB__ 853 orr r4, r4, ip, lsr #16 854#else 855 orr r4, r4, ip, lsl #16 856#endif 857 strt r4, [r1], #0x04 858 bge .Lcopyout_bad2_loop4 859 sub r0, r0, #0x02 860 b .Lcopyout_l4 861 862.Lcopyout_bad3_loop16: 863#ifdef __ARMEB__ 864 mov r4, ip, lsl #24 865#else 866 mov r4, ip, lsr #24 867#endif 868 ldr r5, [r0], #0x04 869 pld [r0, #0x018] 870 ldr r6, [r0], #0x04 871 ldr r7, [r0], #0x04 872 ldr ip, [r0], #0x04 873#ifdef __ARMEB__ 874 orr r4, r4, r5, lsr #8 875 mov r5, r5, lsl #24 876 orr r5, r5, r6, lsr #8 877 mov r6, r6, lsl #24 878 orr r6, r6, r7, lsr #8 879 mov r7, r7, lsl #24 880 orr r7, r7, ip, lsr #8 881#else 882 orr r4, r4, r5, lsl #8 883 mov r5, r5, lsr #24 884 orr r5, r5, r6, lsl #8 885 mov r6, r6, lsr #24 886 orr r6, r6, r7, lsl #8 887 mov r7, r7, lsr #24 888 orr r7, r7, ip, lsl #8 889#endif 890 strt r4, [r1], #0x04 891 strt r5, [r1], #0x04 892 strt r6, [r1], #0x04 893 strt r7, [r1], #0x04 894.Lcopyout_bad3: 895 subs r2, r2, #0x10 896 bge .Lcopyout_bad3_loop16 897 898 adds r2, r2, #0x10 899 ldmeqfd sp!, {r4-r7} 900 RETeq /* Return now if done */ 901 subs r2, r2, #0x04 902 sublt r0, r0, #0x01 903 blt .Lcopyout_l4 904 905.Lcopyout_bad3_loop4: 906#ifdef __ARMEB__ 907 mov r4, ip, lsl #24 908#else 909 mov r4, ip, lsr #24 910#endif 911 ldr ip, [r0], #0x04 912 subs r2, r2, #0x04 913#ifdef __ARMEB__ 914 orr r4, r4, ip, lsr #8 915#else 916 orr r4, r4, ip, lsl #8 917#endif 918 strt r4, [r1], #0x04 919 bge .Lcopyout_bad3_loop4 920 sub r0, r0, #0x01 921 922.Lcopyout_l4: 923 ldmfd sp!, {r4-r7} 924 mov r3, #0x00 925 adds r2, r2, #0x04 926 RETeq 927.Lcopyout_l4_2: 928 rsbs r2, r2, #0x03 929 addne pc, pc, r2, lsl #3 930 nop 931 ldrb ip, [r0], #0x01 932 strbt ip, [r1], #0x01 933 ldrb ip, [r0], #0x01 934 strbt ip, [r1], #0x01 935 ldrb ip, [r0] 936 strbt ip, [r1] 937 RET 938