1/* $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $ */ 2 3/*- 4 * Copyright (c) 2002 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Allen Briggs for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 39#include "assym.s" 40 41#include <machine/acle-compat.h> 42#include <machine/asm.h> 43#include <sys/errno.h> 44 45.L_arm_memcpy: 46 .word _C_LABEL(_arm_memcpy) 47.L_min_memcpy_size: 48 .word _C_LABEL(_min_memcpy_size) 49 50__FBSDID("$FreeBSD: releng/10.3/sys/arm/arm/bcopyinout.S 294681 2016-01-24 19:58:58Z ian $"); 51#ifdef _ARM_ARCH_5E 52#include <arm/arm/bcopyinout_xscale.S> 53#else 54 55 .text 56 .align 2 57 58#if __ARM_ARCH >= 6 59#define GET_PCB(tmp) \ 60 mrc p15, 0, tmp, c13, c0, 4; \ 61 add tmp, tmp, #(TD_PCB) 62#else 63.Lcurpcb: 64 .word _C_LABEL(__pcpu) + PC_CURPCB 65 66#define GET_PCB(tmp) \ 67 ldr tmp, .Lcurpcb 68#endif 69 70 71#define SAVE_REGS stmfd sp!, {r4-r11} 72#define RESTORE_REGS ldmfd sp!, {r4-r11} 73 74#if defined(_ARM_ARCH_5E) 75#define HELLOCPP # 76#define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ] 77#else 78#define PREFETCH(rx,o) 79#endif 80 81/* 82 * r0 = user space address 83 * r1 = kernel space address 84 * r2 = length 85 * 86 * Copies bytes from user space to kernel space 87 * 88 * We save/restore r4-r11: 89 * r4-r11 are scratch 90 */ 91ENTRY(copyin) 92 /* Quick exit if length is zero */ 93 teq r2, #0 94 moveq r0, #0 95 RETeq 96 97 ldr r3, .L_arm_memcpy 98 ldr r3, [r3] 99 cmp r3, #0 100 beq .Lnormal 101 ldr r3, .L_min_memcpy_size 102 ldr r3, [r3] 103 cmp r2, r3 104 blt .Lnormal 105 stmfd sp!, {r0-r2, r4, lr} 106 mov r3, r0 107 mov r0, r1 108 mov r1, r3 109 mov r3, #2 /* SRC_IS_USER */ 110 ldr r4, .L_arm_memcpy 111 mov lr, pc 112 ldr pc, [r4] 113 cmp r0, #0 114 ldmfd sp!, {r0-r2, r4, lr} 115 moveq r0, #0 116 RETeq 117 118.Lnormal: 119 SAVE_REGS 120 GET_PCB(r4) 121 ldr r4, [r4] 122 123 124 ldr r5, [r4, #PCB_ONFAULT] 125 adr r3, .Lcopyfault 126 str r3, [r4, #PCB_ONFAULT] 127 128 PREFETCH(r0, 0) 129 PREFETCH(r1, 0) 130 131 /* 132 * If not too many bytes, take the slow path. 133 */ 134 cmp r2, #0x08 135 blt .Licleanup 136 137 /* 138 * Align destination to word boundary. 139 */ 140 and r6, r1, #0x3 141 ldr pc, [pc, r6, lsl #2] 142 b .Lialend 143 .word .Lialend 144 .word .Lial3 145 .word .Lial2 146 .word .Lial1 147.Lial3: ldrbt r6, [r0], #1 148 sub r2, r2, #1 149 strb r6, [r1], #1 150.Lial2: ldrbt r7, [r0], #1 151 sub r2, r2, #1 152 strb r7, [r1], #1 153.Lial1: ldrbt r6, [r0], #1 154 sub r2, r2, #1 155 strb r6, [r1], #1 156.Lialend: 157 158 /* 159 * If few bytes left, finish slow. 160 */ 161 cmp r2, #0x08 162 blt .Licleanup 163 164 /* 165 * If source is not aligned, finish slow. 166 */ 167 ands r3, r0, #0x03 168 bne .Licleanup 169 170 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 171 blt .Licleanup8 172 173 /* 174 * Align destination to cacheline boundary. 175 * If source and destination are nicely aligned, this can be a big 176 * win. If not, it's still cheaper to copy in groups of 32 even if 177 * we don't get the nice cacheline alignment. 178 */ 179 and r6, r1, #0x1f 180 ldr pc, [pc, r6] 181 b .Licaligned 182 .word .Licaligned 183 .word .Lical28 184 .word .Lical24 185 .word .Lical20 186 .word .Lical16 187 .word .Lical12 188 .word .Lical8 189 .word .Lical4 190.Lical28:ldrt r6, [r0], #4 191 sub r2, r2, #4 192 str r6, [r1], #4 193.Lical24:ldrt r7, [r0], #4 194 sub r2, r2, #4 195 str r7, [r1], #4 196.Lical20:ldrt r6, [r0], #4 197 sub r2, r2, #4 198 str r6, [r1], #4 199.Lical16:ldrt r7, [r0], #4 200 sub r2, r2, #4 201 str r7, [r1], #4 202.Lical12:ldrt r6, [r0], #4 203 sub r2, r2, #4 204 str r6, [r1], #4 205.Lical8:ldrt r7, [r0], #4 206 sub r2, r2, #4 207 str r7, [r1], #4 208.Lical4:ldrt r6, [r0], #4 209 sub r2, r2, #4 210 str r6, [r1], #4 211 212 /* 213 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 214 * part of the code, and we may have knocked that down by as much 215 * as 0x1c getting aligned). 216 * 217 * This loop basically works out to: 218 * do { 219 * prefetch-next-cacheline(s) 220 * bytes -= 0x20; 221 * copy cacheline 222 * } while (bytes >= 0x40); 223 * bytes -= 0x20; 224 * copy cacheline 225 */ 226.Licaligned: 227 PREFETCH(r0, 32) 228 PREFETCH(r1, 32) 229 230 sub r2, r2, #0x20 231 232 /* Copy a cacheline */ 233 ldrt r10, [r0], #4 234 ldrt r11, [r0], #4 235 ldrt r6, [r0], #4 236 ldrt r7, [r0], #4 237 ldrt r8, [r0], #4 238 ldrt r9, [r0], #4 239 stmia r1!, {r10-r11} 240 ldrt r10, [r0], #4 241 ldrt r11, [r0], #4 242 stmia r1!, {r6-r11} 243 244 cmp r2, #0x40 245 bge .Licaligned 246 247 sub r2, r2, #0x20 248 249 /* Copy a cacheline */ 250 ldrt r10, [r0], #4 251 ldrt r11, [r0], #4 252 ldrt r6, [r0], #4 253 ldrt r7, [r0], #4 254 ldrt r8, [r0], #4 255 ldrt r9, [r0], #4 256 stmia r1!, {r10-r11} 257 ldrt r10, [r0], #4 258 ldrt r11, [r0], #4 259 stmia r1!, {r6-r11} 260 261 cmp r2, #0x08 262 blt .Liprecleanup 263 264.Licleanup8: 265 ldrt r8, [r0], #4 266 ldrt r9, [r0], #4 267 sub r2, r2, #8 268 stmia r1!, {r8, r9} 269 cmp r2, #8 270 bge .Licleanup8 271 272.Liprecleanup: 273 /* 274 * If we're done, bail. 275 */ 276 cmp r2, #0 277 beq .Lout 278 279.Licleanup: 280 and r6, r2, #0x3 281 ldr pc, [pc, r6, lsl #2] 282 b .Licend 283 .word .Lic4 284 .word .Lic1 285 .word .Lic2 286 .word .Lic3 287.Lic4: ldrbt r6, [r0], #1 288 sub r2, r2, #1 289 strb r6, [r1], #1 290.Lic3: ldrbt r7, [r0], #1 291 sub r2, r2, #1 292 strb r7, [r1], #1 293.Lic2: ldrbt r6, [r0], #1 294 sub r2, r2, #1 295 strb r6, [r1], #1 296.Lic1: ldrbt r7, [r0], #1 297 subs r2, r2, #1 298 strb r7, [r1], #1 299.Licend: 300 bne .Licleanup 301 302.Liout: 303 mov r0, #0 304 305 str r5, [r4, #PCB_ONFAULT] 306 RESTORE_REGS 307 308 RET 309 310.Lcopyfault: 311 ldr r0, =EFAULT 312 str r5, [r4, #PCB_ONFAULT] 313 RESTORE_REGS 314 315 RET 316END(copyin) 317 318/* 319 * r0 = kernel space address 320 * r1 = user space address 321 * r2 = length 322 * 323 * Copies bytes from kernel space to user space 324 * 325 * We save/restore r4-r11: 326 * r4-r11 are scratch 327 */ 328 329ENTRY(copyout) 330 /* Quick exit if length is zero */ 331 teq r2, #0 332 moveq r0, #0 333 RETeq 334 335 ldr r3, .L_arm_memcpy 336 ldr r3, [r3] 337 cmp r3, #0 338 beq .Lnormale 339 ldr r3, .L_min_memcpy_size 340 ldr r3, [r3] 341 cmp r2, r3 342 blt .Lnormale 343 stmfd sp!, {r0-r2, r4, lr} 344 mov r3, r0 345 mov r0, r1 346 mov r1, r3 347 mov r3, #1 /* DST_IS_USER */ 348 ldr r4, .L_arm_memcpy 349 mov lr, pc 350 ldr pc, [r4] 351 cmp r0, #0 352 ldmfd sp!, {r0-r2, r4, lr} 353 moveq r0, #0 354 RETeq 355 356.Lnormale: 357 SAVE_REGS 358 GET_PCB(r4) 359 ldr r4, [r4] 360 361 ldr r5, [r4, #PCB_ONFAULT] 362 adr r3, .Lcopyfault 363 str r3, [r4, #PCB_ONFAULT] 364 365 PREFETCH(r0, 0) 366 PREFETCH(r1, 0) 367 368 /* 369 * If not too many bytes, take the slow path. 370 */ 371 cmp r2, #0x08 372 blt .Lcleanup 373 374 /* 375 * Align destination to word boundary. 376 */ 377 and r6, r1, #0x3 378 ldr pc, [pc, r6, lsl #2] 379 b .Lalend 380 .word .Lalend 381 .word .Lal3 382 .word .Lal2 383 .word .Lal1 384.Lal3: ldrb r6, [r0], #1 385 sub r2, r2, #1 386 strbt r6, [r1], #1 387.Lal2: ldrb r7, [r0], #1 388 sub r2, r2, #1 389 strbt r7, [r1], #1 390.Lal1: ldrb r6, [r0], #1 391 sub r2, r2, #1 392 strbt r6, [r1], #1 393.Lalend: 394 395 /* 396 * If few bytes left, finish slow. 397 */ 398 cmp r2, #0x08 399 blt .Lcleanup 400 401 /* 402 * If source is not aligned, finish slow. 403 */ 404 ands r3, r0, #0x03 405 bne .Lcleanup 406 407 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 408 blt .Lcleanup8 409 410 /* 411 * Align source & destination to cacheline boundary. 412 */ 413 and r6, r1, #0x1f 414 ldr pc, [pc, r6] 415 b .Lcaligned 416 .word .Lcaligned 417 .word .Lcal28 418 .word .Lcal24 419 .word .Lcal20 420 .word .Lcal16 421 .word .Lcal12 422 .word .Lcal8 423 .word .Lcal4 424.Lcal28:ldr r6, [r0], #4 425 sub r2, r2, #4 426 strt r6, [r1], #4 427.Lcal24:ldr r7, [r0], #4 428 sub r2, r2, #4 429 strt r7, [r1], #4 430.Lcal20:ldr r6, [r0], #4 431 sub r2, r2, #4 432 strt r6, [r1], #4 433.Lcal16:ldr r7, [r0], #4 434 sub r2, r2, #4 435 strt r7, [r1], #4 436.Lcal12:ldr r6, [r0], #4 437 sub r2, r2, #4 438 strt r6, [r1], #4 439.Lcal8: ldr r7, [r0], #4 440 sub r2, r2, #4 441 strt r7, [r1], #4 442.Lcal4: ldr r6, [r0], #4 443 sub r2, r2, #4 444 strt r6, [r1], #4 445 446 /* 447 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 448 * part of the code, and we may have knocked that down by as much 449 * as 0x1c getting aligned). 450 * 451 * This loop basically works out to: 452 * do { 453 * prefetch-next-cacheline(s) 454 * bytes -= 0x20; 455 * copy cacheline 456 * } while (bytes >= 0x40); 457 * bytes -= 0x20; 458 * copy cacheline 459 */ 460.Lcaligned: 461 PREFETCH(r0, 32) 462 PREFETCH(r1, 32) 463 464 sub r2, r2, #0x20 465 466 /* Copy a cacheline */ 467 ldmia r0!, {r6-r11} 468 strt r6, [r1], #4 469 strt r7, [r1], #4 470 ldmia r0!, {r6-r7} 471 strt r8, [r1], #4 472 strt r9, [r1], #4 473 strt r10, [r1], #4 474 strt r11, [r1], #4 475 strt r6, [r1], #4 476 strt r7, [r1], #4 477 478 cmp r2, #0x40 479 bge .Lcaligned 480 481 sub r2, r2, #0x20 482 483 /* Copy a cacheline */ 484 ldmia r0!, {r6-r11} 485 strt r6, [r1], #4 486 strt r7, [r1], #4 487 ldmia r0!, {r6-r7} 488 strt r8, [r1], #4 489 strt r9, [r1], #4 490 strt r10, [r1], #4 491 strt r11, [r1], #4 492 strt r6, [r1], #4 493 strt r7, [r1], #4 494 495 cmp r2, #0x08 496 blt .Lprecleanup 497 498.Lcleanup8: 499 ldmia r0!, {r8-r9} 500 sub r2, r2, #8 501 strt r8, [r1], #4 502 strt r9, [r1], #4 503 cmp r2, #8 504 bge .Lcleanup8 505 506.Lprecleanup: 507 /* 508 * If we're done, bail. 509 */ 510 cmp r2, #0 511 beq .Lout 512 513.Lcleanup: 514 and r6, r2, #0x3 515 ldr pc, [pc, r6, lsl #2] 516 b .Lcend 517 .word .Lc4 518 .word .Lc1 519 .word .Lc2 520 .word .Lc3 521.Lc4: ldrb r6, [r0], #1 522 sub r2, r2, #1 523 strbt r6, [r1], #1 524.Lc3: ldrb r7, [r0], #1 525 sub r2, r2, #1 526 strbt r7, [r1], #1 527.Lc2: ldrb r6, [r0], #1 528 sub r2, r2, #1 529 strbt r6, [r1], #1 530.Lc1: ldrb r7, [r0], #1 531 subs r2, r2, #1 532 strbt r7, [r1], #1 533.Lcend: 534 bne .Lcleanup 535 536.Lout: 537 mov r0, #0 538 539 str r5, [r4, #PCB_ONFAULT] 540 RESTORE_REGS 541 542 RET 543END(copyout) 544#endif 545 546/* 547 * int badaddr_read_1(const uint8_t *src, uint8_t *dest) 548 * 549 * Copies a single 8-bit value from src to dest, returning 0 on success, 550 * else EFAULT if a page fault occurred. 551 */ 552ENTRY(badaddr_read_1) 553 GET_PCB(r2) 554 ldr r2, [r2] 555 556 ldr ip, [r2, #PCB_ONFAULT] 557 adr r3, 1f 558 str r3, [r2, #PCB_ONFAULT] 559 nop 560 nop 561 nop 562 ldrb r3, [r0] 563 nop 564 nop 565 nop 566 strb r3, [r1] 567 mov r0, #0 /* No fault */ 5681: str ip, [r2, #PCB_ONFAULT] 569 RET 570END(badaddr_read_1) 571 572/* 573 * int badaddr_read_2(const uint16_t *src, uint16_t *dest) 574 * 575 * Copies a single 16-bit value from src to dest, returning 0 on success, 576 * else EFAULT if a page fault occurred. 577 */ 578ENTRY(badaddr_read_2) 579 GET_PCB(r2) 580 ldr r2, [r2] 581 582 ldr ip, [r2, #PCB_ONFAULT] 583 adr r3, 1f 584 str r3, [r2, #PCB_ONFAULT] 585 nop 586 nop 587 nop 588 ldrh r3, [r0] 589 nop 590 nop 591 nop 592 strh r3, [r1] 593 mov r0, #0 /* No fault */ 5941: str ip, [r2, #PCB_ONFAULT] 595 RET 596END(badaddr_read_2) 597 598/* 599 * int badaddr_read_4(const uint32_t *src, uint32_t *dest) 600 * 601 * Copies a single 32-bit value from src to dest, returning 0 on success, 602 * else EFAULT if a page fault occurred. 603 */ 604ENTRY(badaddr_read_4) 605 GET_PCB(r2) 606 ldr r2, [r2] 607 608 ldr ip, [r2, #PCB_ONFAULT] 609 adr r3, 1f 610 str r3, [r2, #PCB_ONFAULT] 611 nop 612 nop 613 nop 614 ldr r3, [r0] 615 nop 616 nop 617 nop 618 str r3, [r1] 619 mov r0, #0 /* No fault */ 6201: str ip, [r2, #PCB_ONFAULT] 621 RET 622END(badaddr_read_4) 623 624