1/* $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $ */ 2 3/*- 4 * Copyright (c) 2002 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Allen Briggs for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 39#include "assym.s" 40 41#include <machine/asm.h> 42#include <sys/errno.h> 43 44.L_arm_memcpy: 45 .word _C_LABEL(_arm_memcpy) 46.L_min_memcpy_size: 47 .word _C_LABEL(_min_memcpy_size) 48 49__FBSDID("$FreeBSD$"); 50#ifdef _ARM_ARCH_5E 51#include <arm/arm/bcopyinout_xscale.S> 52#else 53 54 .text 55 .align 0 56 57#ifdef _ARM_ARCH_6 58#define GET_PCB(tmp) \ 59 mrc p15, 0, tmp, c13, c0, 4; \ 60 add tmp, tmp, #(PC_CURPCB) 61#else 62.Lcurpcb: 63 .word _C_LABEL(__pcpu) + PC_CURPCB 64 65#define GET_PCB(tmp) \ 66 ldr tmp, .Lcurpcb 67#endif 68 69 70#define SAVE_REGS stmfd sp!, {r4-r11} 71#define RESTORE_REGS ldmfd sp!, {r4-r11} 72 73#if defined(_ARM_ARCH_5E) 74#define HELLOCPP # 75#define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ] 76#else 77#define PREFETCH(rx,o) 78#endif 79 80/* 81 * r0 = user space address 82 * r1 = kernel space address 83 * r2 = length 84 * 85 * Copies bytes from user space to kernel space 86 * 87 * We save/restore r4-r11: 88 * r4-r11 are scratch 89 */ 90ENTRY(copyin) 91 /* Quick exit if length is zero */ 92 teq r2, #0 93 moveq r0, #0 94 RETeq 95 96 ldr r3, .L_arm_memcpy 97 ldr r3, [r3] 98 cmp r3, #0 99 beq .Lnormal 100 ldr r3, .L_min_memcpy_size 101 ldr r3, [r3] 102 cmp r2, r3 103 blt .Lnormal 104 stmfd sp!, {r0-r2, r4, lr} 105 mov r3, r0 106 mov r0, r1 107 mov r1, r3 108 mov r3, #2 /* SRC_IS_USER */ 109 ldr r4, .L_arm_memcpy 110 mov lr, pc 111 ldr pc, [r4] 112 cmp r0, #0 113 ldmfd sp!, {r0-r2, r4, lr} 114 moveq r0, #0 115 RETeq 116 117.Lnormal: 118 SAVE_REGS 119 GET_PCB(r4) 120 ldr r4, [r4] 121 122 123 ldr r5, [r4, #PCB_ONFAULT] 124 adr r3, .Lcopyfault 125 str r3, [r4, #PCB_ONFAULT] 126 127 PREFETCH(r0, 0) 128 PREFETCH(r1, 0) 129 130 /* 131 * If not too many bytes, take the slow path. 132 */ 133 cmp r2, #0x08 134 blt .Licleanup 135 136 /* 137 * Align destination to word boundary. 138 */ 139 and r6, r1, #0x3 140 ldr pc, [pc, r6, lsl #2] 141 b .Lialend 142 .word .Lialend 143 .word .Lial3 144 .word .Lial2 145 .word .Lial1 146.Lial3: ldrbt r6, [r0], #1 147 sub r2, r2, #1 148 strb r6, [r1], #1 149.Lial2: ldrbt r7, [r0], #1 150 sub r2, r2, #1 151 strb r7, [r1], #1 152.Lial1: ldrbt r6, [r0], #1 153 sub r2, r2, #1 154 strb r6, [r1], #1 155.Lialend: 156 157 /* 158 * If few bytes left, finish slow. 159 */ 160 cmp r2, #0x08 161 blt .Licleanup 162 163 /* 164 * If source is not aligned, finish slow. 165 */ 166 ands r3, r0, #0x03 167 bne .Licleanup 168 169 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 170 blt .Licleanup8 171 172 /* 173 * Align destination to cacheline boundary. 174 * If source and destination are nicely aligned, this can be a big 175 * win. If not, it's still cheaper to copy in groups of 32 even if 176 * we don't get the nice cacheline alignment. 177 */ 178 and r6, r1, #0x1f 179 ldr pc, [pc, r6] 180 b .Licaligned 181 .word .Licaligned 182 .word .Lical28 183 .word .Lical24 184 .word .Lical20 185 .word .Lical16 186 .word .Lical12 187 .word .Lical8 188 .word .Lical4 189.Lical28:ldrt r6, [r0], #4 190 sub r2, r2, #4 191 str r6, [r1], #4 192.Lical24:ldrt r7, [r0], #4 193 sub r2, r2, #4 194 str r7, [r1], #4 195.Lical20:ldrt r6, [r0], #4 196 sub r2, r2, #4 197 str r6, [r1], #4 198.Lical16:ldrt r7, [r0], #4 199 sub r2, r2, #4 200 str r7, [r1], #4 201.Lical12:ldrt r6, [r0], #4 202 sub r2, r2, #4 203 str r6, [r1], #4 204.Lical8:ldrt r7, [r0], #4 205 sub r2, r2, #4 206 str r7, [r1], #4 207.Lical4:ldrt r6, [r0], #4 208 sub r2, r2, #4 209 str r6, [r1], #4 210 211 /* 212 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 213 * part of the code, and we may have knocked that down by as much 214 * as 0x1c getting aligned). 215 * 216 * This loop basically works out to: 217 * do { 218 * prefetch-next-cacheline(s) 219 * bytes -= 0x20; 220 * copy cacheline 221 * } while (bytes >= 0x40); 222 * bytes -= 0x20; 223 * copy cacheline 224 */ 225.Licaligned: 226 PREFETCH(r0, 32) 227 PREFETCH(r1, 32) 228 229 sub r2, r2, #0x20 230 231 /* Copy a cacheline */ 232 ldrt r10, [r0], #4 233 ldrt r11, [r0], #4 234 ldrt r6, [r0], #4 235 ldrt r7, [r0], #4 236 ldrt r8, [r0], #4 237 ldrt r9, [r0], #4 238 stmia r1!, {r10-r11} 239 ldrt r10, [r0], #4 240 ldrt r11, [r0], #4 241 stmia r1!, {r6-r11} 242 243 cmp r2, #0x40 244 bge .Licaligned 245 246 sub r2, r2, #0x20 247 248 /* Copy a cacheline */ 249 ldrt r10, [r0], #4 250 ldrt r11, [r0], #4 251 ldrt r6, [r0], #4 252 ldrt r7, [r0], #4 253 ldrt r8, [r0], #4 254 ldrt r9, [r0], #4 255 stmia r1!, {r10-r11} 256 ldrt r10, [r0], #4 257 ldrt r11, [r0], #4 258 stmia r1!, {r6-r11} 259 260 cmp r2, #0x08 261 blt .Liprecleanup 262 263.Licleanup8: 264 ldrt r8, [r0], #4 265 ldrt r9, [r0], #4 266 sub r2, r2, #8 267 stmia r1!, {r8, r9} 268 cmp r2, #8 269 bge .Licleanup8 270 271.Liprecleanup: 272 /* 273 * If we're done, bail. 274 */ 275 cmp r2, #0 276 beq .Lout 277 278.Licleanup: 279 and r6, r2, #0x3 280 ldr pc, [pc, r6, lsl #2] 281 b .Licend 282 .word .Lic4 283 .word .Lic1 284 .word .Lic2 285 .word .Lic3 286.Lic4: ldrbt r6, [r0], #1 287 sub r2, r2, #1 288 strb r6, [r1], #1 289.Lic3: ldrbt r7, [r0], #1 290 sub r2, r2, #1 291 strb r7, [r1], #1 292.Lic2: ldrbt r6, [r0], #1 293 sub r2, r2, #1 294 strb r6, [r1], #1 295.Lic1: ldrbt r7, [r0], #1 296 subs r2, r2, #1 297 strb r7, [r1], #1 298.Licend: 299 bne .Licleanup 300 301.Liout: 302 mov r0, #0 303 304 str r5, [r4, #PCB_ONFAULT] 305 RESTORE_REGS 306 307 RET 308 309.Lcopyfault: 310 ldr r0, =EFAULT 311 str r5, [r4, #PCB_ONFAULT] 312 RESTORE_REGS 313 314 RET 315END(copyin) 316 317/* 318 * r0 = kernel space address 319 * r1 = user space address 320 * r2 = length 321 * 322 * Copies bytes from kernel space to user space 323 * 324 * We save/restore r4-r11: 325 * r4-r11 are scratch 326 */ 327 328ENTRY(copyout) 329 /* Quick exit if length is zero */ 330 teq r2, #0 331 moveq r0, #0 332 RETeq 333 334 ldr r3, .L_arm_memcpy 335 ldr r3, [r3] 336 cmp r3, #0 337 beq .Lnormale 338 ldr r3, .L_min_memcpy_size 339 ldr r3, [r3] 340 cmp r2, r3 341 blt .Lnormale 342 stmfd sp!, {r0-r2, r4, lr} 343 mov r3, r0 344 mov r0, r1 345 mov r1, r3 346 mov r3, #1 /* DST_IS_USER */ 347 ldr r4, .L_arm_memcpy 348 mov lr, pc 349 ldr pc, [r4] 350 cmp r0, #0 351 ldmfd sp!, {r0-r2, r4, lr} 352 moveq r0, #0 353 RETeq 354 355.Lnormale: 356 SAVE_REGS 357 GET_PCB(r4) 358 ldr r4, [r4] 359 360 ldr r5, [r4, #PCB_ONFAULT] 361 adr r3, .Lcopyfault 362 str r3, [r4, #PCB_ONFAULT] 363 364 PREFETCH(r0, 0) 365 PREFETCH(r1, 0) 366 367 /* 368 * If not too many bytes, take the slow path. 369 */ 370 cmp r2, #0x08 371 blt .Lcleanup 372 373 /* 374 * Align destination to word boundary. 375 */ 376 and r6, r1, #0x3 377 ldr pc, [pc, r6, lsl #2] 378 b .Lalend 379 .word .Lalend 380 .word .Lal3 381 .word .Lal2 382 .word .Lal1 383.Lal3: ldrb r6, [r0], #1 384 sub r2, r2, #1 385 strbt r6, [r1], #1 386.Lal2: ldrb r7, [r0], #1 387 sub r2, r2, #1 388 strbt r7, [r1], #1 389.Lal1: ldrb r6, [r0], #1 390 sub r2, r2, #1 391 strbt r6, [r1], #1 392.Lalend: 393 394 /* 395 * If few bytes left, finish slow. 396 */ 397 cmp r2, #0x08 398 blt .Lcleanup 399 400 /* 401 * If source is not aligned, finish slow. 402 */ 403 ands r3, r0, #0x03 404 bne .Lcleanup 405 406 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 407 blt .Lcleanup8 408 409 /* 410 * Align source & destination to cacheline boundary. 411 */ 412 and r6, r1, #0x1f 413 ldr pc, [pc, r6] 414 b .Lcaligned 415 .word .Lcaligned 416 .word .Lcal28 417 .word .Lcal24 418 .word .Lcal20 419 .word .Lcal16 420 .word .Lcal12 421 .word .Lcal8 422 .word .Lcal4 423.Lcal28:ldr r6, [r0], #4 424 sub r2, r2, #4 425 strt r6, [r1], #4 426.Lcal24:ldr r7, [r0], #4 427 sub r2, r2, #4 428 strt r7, [r1], #4 429.Lcal20:ldr r6, [r0], #4 430 sub r2, r2, #4 431 strt r6, [r1], #4 432.Lcal16:ldr r7, [r0], #4 433 sub r2, r2, #4 434 strt r7, [r1], #4 435.Lcal12:ldr r6, [r0], #4 436 sub r2, r2, #4 437 strt r6, [r1], #4 438.Lcal8: ldr r7, [r0], #4 439 sub r2, r2, #4 440 strt r7, [r1], #4 441.Lcal4: ldr r6, [r0], #4 442 sub r2, r2, #4 443 strt r6, [r1], #4 444 445 /* 446 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 447 * part of the code, and we may have knocked that down by as much 448 * as 0x1c getting aligned). 449 * 450 * This loop basically works out to: 451 * do { 452 * prefetch-next-cacheline(s) 453 * bytes -= 0x20; 454 * copy cacheline 455 * } while (bytes >= 0x40); 456 * bytes -= 0x20; 457 * copy cacheline 458 */ 459.Lcaligned: 460 PREFETCH(r0, 32) 461 PREFETCH(r1, 32) 462 463 sub r2, r2, #0x20 464 465 /* Copy a cacheline */ 466 ldmia r0!, {r6-r11} 467 strt r6, [r1], #4 468 strt r7, [r1], #4 469 ldmia r0!, {r6-r7} 470 strt r8, [r1], #4 471 strt r9, [r1], #4 472 strt r10, [r1], #4 473 strt r11, [r1], #4 474 strt r6, [r1], #4 475 strt r7, [r1], #4 476 477 cmp r2, #0x40 478 bge .Lcaligned 479 480 sub r2, r2, #0x20 481 482 /* Copy a cacheline */ 483 ldmia r0!, {r6-r11} 484 strt r6, [r1], #4 485 strt r7, [r1], #4 486 ldmia r0!, {r6-r7} 487 strt r8, [r1], #4 488 strt r9, [r1], #4 489 strt r10, [r1], #4 490 strt r11, [r1], #4 491 strt r6, [r1], #4 492 strt r7, [r1], #4 493 494 cmp r2, #0x08 495 blt .Lprecleanup 496 497.Lcleanup8: 498 ldmia r0!, {r8-r9} 499 sub r2, r2, #8 500 strt r8, [r1], #4 501 strt r9, [r1], #4 502 cmp r2, #8 503 bge .Lcleanup8 504 505.Lprecleanup: 506 /* 507 * If we're done, bail. 508 */ 509 cmp r2, #0 510 beq .Lout 511 512.Lcleanup: 513 and r6, r2, #0x3 514 ldr pc, [pc, r6, lsl #2] 515 b .Lcend 516 .word .Lc4 517 .word .Lc1 518 .word .Lc2 519 .word .Lc3 520.Lc4: ldrb r6, [r0], #1 521 sub r2, r2, #1 522 strbt r6, [r1], #1 523.Lc3: ldrb r7, [r0], #1 524 sub r2, r2, #1 525 strbt r7, [r1], #1 526.Lc2: ldrb r6, [r0], #1 527 sub r2, r2, #1 528 strbt r6, [r1], #1 529.Lc1: ldrb r7, [r0], #1 530 subs r2, r2, #1 531 strbt r7, [r1], #1 532.Lcend: 533 bne .Lcleanup 534 535.Lout: 536 mov r0, #0 537 538 str r5, [r4, #PCB_ONFAULT] 539 RESTORE_REGS 540 541 RET 542END(copyout) 543#endif 544 545/* 546 * int badaddr_read_1(const uint8_t *src, uint8_t *dest) 547 * 548 * Copies a single 8-bit value from src to dest, returning 0 on success, 549 * else EFAULT if a page fault occurred. 550 */ 551ENTRY(badaddr_read_1) 552 GET_PCB(r2) 553 ldr r2, [r2] 554 555 ldr ip, [r2, #PCB_ONFAULT] 556 adr r3, 1f 557 str r3, [r2, #PCB_ONFAULT] 558 nop 559 nop 560 nop 561 ldrb r3, [r0] 562 nop 563 nop 564 nop 565 strb r3, [r1] 566 mov r0, #0 /* No fault */ 5671: str ip, [r2, #PCB_ONFAULT] 568 RET 569END(badaddr_read_1) 570 571/* 572 * int badaddr_read_2(const uint16_t *src, uint16_t *dest) 573 * 574 * Copies a single 16-bit value from src to dest, returning 0 on success, 575 * else EFAULT if a page fault occurred. 576 */ 577ENTRY(badaddr_read_2) 578 GET_PCB(r2) 579 ldr r2, [r2] 580 581 ldr ip, [r2, #PCB_ONFAULT] 582 adr r3, 1f 583 str r3, [r2, #PCB_ONFAULT] 584 nop 585 nop 586 nop 587 ldrh r3, [r0] 588 nop 589 nop 590 nop 591 strh r3, [r1] 592 mov r0, #0 /* No fault */ 5931: str ip, [r2, #PCB_ONFAULT] 594 RET 595END(badaddr_read_2) 596 597/* 598 * int badaddr_read_4(const uint32_t *src, uint32_t *dest) 599 * 600 * Copies a single 32-bit value from src to dest, returning 0 on success, 601 * else EFAULT if a page fault occurred. 602 */ 603ENTRY(badaddr_read_4) 604 GET_PCB(r2) 605 ldr r2, [r2] 606 607 ldr ip, [r2, #PCB_ONFAULT] 608 adr r3, 1f 609 str r3, [r2, #PCB_ONFAULT] 610 nop 611 nop 612 nop 613 ldr r3, [r0] 614 nop 615 nop 616 nop 617 str r3, [r1] 618 mov r0, #0 /* No fault */ 6191: str ip, [r2, #PCB_ONFAULT] 620 RET 621END(badaddr_read_4) 622 623