1/* $NetBSD: copy.S,v 1.35 2022/12/18 07:53:30 skrll Exp $ */ 2 3/* 4 * Copyright (c) 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Frank van der Linden for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include "assym.h" 39 40#include <sys/errno.h> 41#include <sys/syscall.h> 42 43#include <machine/asm.h> 44#include <machine/frameasm.h> 45 46#define GET_CURPCB(reg) \ 47 movq CPUVAR(CURLWP),reg; \ 48 movq L_PCB(reg),reg 49 50/* 51 * These are arranged so that the abnormal case is a forwards 52 * conditional branch - which will be predicted not-taken by 53 * both Intel and AMD processors. 54 */ 55#define DEFERRED_SWITCH_CHECK \ 56 CHECK_DEFERRED_SWITCH ; \ 57 jnz 99f ; \ 5898: 59 60#define DEFERRED_SWITCH_CALL \ 6199: ; \ 62 call _C_LABEL(do_pmap_load) ; \ 63 jmp 98b 64 65/* 66 * The following primitives are to copy regions of memory. 67 * Label must be before all copy functions. 68 */ 69 .text 70 71x86_copyfunc_start: .globl x86_copyfunc_start 72 73/* 74 * Handle deferred pmap switch. We must re-enable preemption without 75 * making a function call, so that the program counter is visible to 76 * cpu_kpreempt_exit(). It can then know if it needs to restore the 77 * pmap on returning, because a preemption occurred within one of the 78 * copy functions. 79 */ 80ENTRY(do_pmap_load) 81 pushq %rbp 82 movq %rsp,%rbp 83 pushq %rdi 84 pushq %rsi 85 pushq %rdx 86 pushq %rcx 87 pushq %rbx 88 movq CPUVAR(CURLWP),%rbx 891: 90 incl L_NOPREEMPT(%rbx) 91 call _C_LABEL(pmap_load) 92 decl L_NOPREEMPT(%rbx) 93 jnz 2f 94 cmpl $0,L_DOPREEMPT(%rbx) 95 jz 2f 96 xorq %rdi,%rdi 97 call _C_LABEL(kpreempt) 982: 99 cmpl $0,CPUVAR(WANT_PMAPLOAD) 100 jnz 1b 101 popq %rbx 102 popq %rcx 103 popq %rdx 104 popq %rsi 105 popq %rdi 106 leaveq 107 ret 108END(do_pmap_load) 109 110/* 111 * Copy routines from and to userland, plus a few more. See the 112 * section 9 manpages for info. Some cases can be optimized more. 113 * 114 * I wonder if it's worthwhile to make these use SSE2 registers? 115 * (dsl) Not from info I've read from the AMD guides. 116 * 117 * Also note that the setup time for 'rep movs' is horrid - especially on P4 118 * netburst - but on my AMD X2 it manages one copy (read+write) per clock 119 * which can be achieved with a code loop, but is probably impossible to beat. 120 * However the use of 'rep movsb' for the final bytes should be killed. 121 * 122 * Newer Intel cpus have a much lower setup time, and may (someday) 123 * be able to do cache-line size copies.... 124 */ 125 126/* 127 * int kcopy(const void *from, void *to, size_t len); 128 * Copy len bytes from and to kernel memory, and abort on fault. 129 */ 130ENTRY(kcopy) 131 xchgq %rdi,%rsi 132 movq %rdx,%rcx 133.Lkcopy_start: 134 movq %rdi,%rax 135 subq %rsi,%rax 136 cmpq %rcx,%rax /* overlapping? */ 137 jb 1f 138 /* nope, copy forward */ 139 shrq $3,%rcx /* copy by 64-bit words */ 140 rep 141 movsq 142 143 movq %rdx,%rcx 144 andl $7,%ecx /* any bytes left? */ 145 rep 146 movsb 147 148 xorq %rax,%rax 149 ret 150 151/* 152 * Using 'rep movs' to copy backwards is not as fast as for forwards copies 153 * and ought not be done when the copy doesn't acually overlap. 154 * However kcopy() isn't used any that looks even vaguely used often. 155 * I'm also not sure it is ever asked to do overlapping copies! 156 */ 157 1581: addq %rcx,%rdi /* copy backward */ 159 addq %rcx,%rsi 160 std 161 andq $7,%rcx /* any fractional bytes? */ 162 decq %rdi 163 decq %rsi 164 rep 165 movsb 166 movq %rdx,%rcx /* copy remainder by 64-bit words */ 167 shrq $3,%rcx 168 subq $7,%rsi 169 subq $7,%rdi 170 rep 171 movsq 172 cld 173.Lkcopy_end: 174 xorq %rax,%rax 175 ret 176END(kcopy) 177 178ENTRY(copyout) 179 DEFERRED_SWITCH_CHECK 180 181 xchgq %rdi,%rsi /* kernel address to %rsi, user to %rdi */ 182 movq %rdx,%rax /* save transfer length (bytes) */ 183 184 addq %rdi,%rdx /* end address to %rdx */ 185 jc _C_LABEL(copy_efault) /* jump if wraps */ 186 movq $VM_MAXUSER_ADDRESS,%r8 187 cmpq %r8,%rdx 188 ja _C_LABEL(copy_efault) /* jump if end in kernel space */ 189 190 SMAP_DISABLE 191.Lcopyout_start: 192 movq %rax,%rcx /* length */ 193 shrq $3,%rcx /* count of 8-byte words */ 194 rep 195 movsq /* copy from %rsi to %rdi */ 196 movb %al,%cl 197 andb $7,%cl /* remaining number of bytes */ 198 rep 199 movsb /* copy remaining bytes */ 200.Lcopyout_end: 201 SMAP_ENABLE 202 203 xorl %eax,%eax 204 ret 205 DEFERRED_SWITCH_CALL 206END(copyout) 207 208ENTRY(copyin) 209 DEFERRED_SWITCH_CHECK 210 211 xchgq %rdi,%rsi 212 movq %rdx,%rax 213 214 addq %rsi,%rdx /* check source address not wrapped */ 215 jc _C_LABEL(copy_efault) 216 movq $VM_MAXUSER_ADDRESS,%r8 217 cmpq %r8,%rdx 218 ja _C_LABEL(copy_efault) /* j if end in kernel space */ 219 220 SMAP_DISABLE 221.Lcopyin_start: 2223: /* bcopy(%rsi, %rdi, %rax); */ 223 movq %rax,%rcx 224 shrq $3,%rcx 225 rep 226 movsq 227 movb %al,%cl 228 andb $7,%cl 229 rep 230 movsb 231.Lcopyin_end: 232 SMAP_ENABLE 233 234 xorl %eax,%eax 235 ret 236 DEFERRED_SWITCH_CALL 237END(copyin) 238 239ENTRY(copy_efault) 240 movq $EFAULT,%rax 241 ret 242END(copy_efault) 243 244ENTRY(kcopy_fault) 245 cld 246 ret 247END(kcopy_fault) 248 249ENTRY(copy_fault) 250 SMAP_ENABLE 251 ret 252END(copy_fault) 253 254ENTRY(copyoutstr) 255 DEFERRED_SWITCH_CHECK 256 xchgq %rdi,%rsi 257 movq %rdx,%r8 258 movq %rcx,%r9 259 260 /* 261 * Get min(%rdx, VM_MAXUSER_ADDRESS-%rdi). 262 */ 263 movq $VM_MAXUSER_ADDRESS,%rax 264 subq %rdi,%rax 265 jc _C_LABEL(copystr_efault) 266 cmpq %rdx,%rax 267 jae 1f 268 movq %rax,%rdx 269 movq %rax,%r8 2701: incq %rdx 271 272 SMAP_DISABLE 273.Lcopyoutstr_start: 2741: decq %rdx 275 jz 2f 276 lodsb 277 stosb 278 testb %al,%al 279 jnz 1b 280.Lcopyoutstr_end: 281 SMAP_ENABLE 282 283 /* Success -- 0 byte reached. */ 284 decq %rdx 285 xorq %rax,%rax 286 jmp copystr_return 287 2882: /* rdx is zero -- return EFAULT or ENAMETOOLONG. */ 289 SMAP_ENABLE 290 movq $VM_MAXUSER_ADDRESS,%r11 291 cmpq %r11,%rdi 292 jae _C_LABEL(copystr_efault) 293 movq $ENAMETOOLONG,%rax 294 jmp copystr_return 295 DEFERRED_SWITCH_CALL 296END(copyoutstr) 297 298ENTRY(copyinstr) 299 DEFERRED_SWITCH_CHECK 300 xchgq %rdi,%rsi 301 movq %rdx,%r8 302 movq %rcx,%r9 303 304 /* 305 * Get min(%rdx, VM_MAXUSER_ADDRESS-%rsi). 306 */ 307 movq $VM_MAXUSER_ADDRESS,%rax 308 subq %rsi,%rax 309 jc _C_LABEL(copystr_efault) 310 cmpq %rdx,%rax 311 jae 1f 312 movq %rax,%rdx 313 movq %rax,%r8 3141: incq %rdx 315 316 SMAP_DISABLE 317.Lcopyinstr_start: 3181: decq %rdx 319 jz 2f 320 lodsb 321 stosb 322 testb %al,%al 323 jnz 1b 324.Lcopyinstr_end: 325 SMAP_ENABLE 326 327 /* Success -- 0 byte reached. */ 328 decq %rdx 329 xorq %rax,%rax 330 jmp copystr_return 331 3322: /* rdx is zero -- return EFAULT or ENAMETOOLONG. */ 333 SMAP_ENABLE 334 movq $VM_MAXUSER_ADDRESS,%r11 335 cmpq %r11,%rsi 336 jae _C_LABEL(copystr_efault) 337 movq $ENAMETOOLONG,%rax 338 jmp copystr_return 339 DEFERRED_SWITCH_CALL 340END(copyinstr) 341 342ENTRY(copystr_efault) 343 movl $EFAULT,%eax 344 jmp copystr_return 345END(copystr_efault) 346 347ENTRY(copystr_fault) 348 SMAP_ENABLE 349copystr_return: 350 /* Set *lencopied and return %eax. */ 351 testq %r9,%r9 352 jz 8f 353 subq %rdx,%r8 354 movq %r8,(%r9) 3558: ret 356END(copystr_fault) 357 358/**************************************************************************/ 359 360#define UFETCHSTORE_PROLOGUE(x) \ 361 movq $VM_MAXUSER_ADDRESS-x,%r11 ; \ 362 cmpq %r11,%rdi ; \ 363 ja _C_LABEL(copy_efault) 364 365/* LINTSTUB: int _ufetch_8(const uint8_t *uaddr, uint8_t *valp); */ 366ENTRY(_ufetch_8) 367 DEFERRED_SWITCH_CHECK 368 UFETCHSTORE_PROLOGUE(1) 369 370 SMAP_DISABLE 371.L_ufetch_8_start: 372 movb (%rdi),%al 373.L_ufetch_8_end: 374 SMAP_ENABLE 375 376 movb %al,(%rsi) 377 xorq %rax,%rax 378 ret 379 DEFERRED_SWITCH_CALL 380END(_ufetch_8) 381 382/* LINTSTUB: int _ufetch_16(const uint16_t *uaddr, uint16_t *valp); */ 383ENTRY(_ufetch_16) 384 DEFERRED_SWITCH_CHECK 385 UFETCHSTORE_PROLOGUE(2) 386 387 SMAP_DISABLE 388.L_ufetch_16_start: 389 movw (%rdi),%ax 390.L_ufetch_16_end: 391 SMAP_ENABLE 392 393 movw %ax,(%rsi) 394 xorq %rax,%rax 395 ret 396 DEFERRED_SWITCH_CALL 397END(_ufetch_16) 398 399/* LINTSTUB: int _ufetch_32(const uint32_t *uaddr, uint32_t *valp); */ 400ENTRY(_ufetch_32) 401 DEFERRED_SWITCH_CHECK 402 UFETCHSTORE_PROLOGUE(4) 403 404 SMAP_DISABLE 405.L_ufetch_32_start: 406 movl (%rdi),%eax 407.L_ufetch_32_end: 408 SMAP_ENABLE 409 410 movl %eax,(%rsi) 411 xorq %rax,%rax 412 ret 413 DEFERRED_SWITCH_CALL 414END(_ufetch_32) 415 416/* LINTSTUB: int _ufetch_64(const uint64_t *uaddr, uint64_t *valp); */ 417ENTRY(_ufetch_64) 418 DEFERRED_SWITCH_CHECK 419 UFETCHSTORE_PROLOGUE(8) 420 421 SMAP_DISABLE 422.L_ufetch_64_start: 423 movq (%rdi),%rax 424.L_ufetch_64_end: 425 SMAP_ENABLE 426 427 movq %rax,(%rsi) 428 xorq %rax,%rax 429 ret 430 DEFERRED_SWITCH_CALL 431END(_ufetch_64) 432 433/* LINTSTUB: int _ustore_8(uint8_t *uaddr, uint8_t val); */ 434ENTRY(_ustore_8) 435 DEFERRED_SWITCH_CHECK 436 UFETCHSTORE_PROLOGUE(1) 437 438 SMAP_DISABLE 439.L_ustore_8_start: 440 movb %sil,(%rdi) 441.L_ustore_8_end: 442 SMAP_ENABLE 443 444 xorq %rax,%rax 445 ret 446 DEFERRED_SWITCH_CALL 447END(_ustore_8) 448 449/* LINTSTUB: int _ustore_16(uint16_t *uaddr, uint16_t val); */ 450ENTRY(_ustore_16) 451 DEFERRED_SWITCH_CHECK 452 UFETCHSTORE_PROLOGUE(2) 453 454 SMAP_DISABLE 455.L_ustore_16_start: 456 movw %si,(%rdi) 457.L_ustore_16_end: 458 SMAP_ENABLE 459 460 xorq %rax,%rax 461 ret 462 DEFERRED_SWITCH_CALL 463END(_ustore_16) 464 465/* LINTSTUB: int _ustore_32(uint32_t *uaddr, uint32_t val); */ 466ENTRY(_ustore_32) 467 DEFERRED_SWITCH_CHECK 468 UFETCHSTORE_PROLOGUE(4) 469 470 SMAP_DISABLE 471.L_ustore_32_start: 472 movl %esi,(%rdi) 473.L_ustore_32_end: 474 SMAP_ENABLE 475 476 xorq %rax,%rax 477 ret 478 DEFERRED_SWITCH_CALL 479END(_ustore_32) 480 481/* LINTSTUB: int _ustore_64(uint64_t *uaddr, uint64_t val); */ 482ENTRY(_ustore_64) 483 DEFERRED_SWITCH_CHECK 484 UFETCHSTORE_PROLOGUE(8) 485 486 SMAP_DISABLE 487.L_ustore_64_start: 488 movq %rsi,(%rdi) 489.L_ustore_64_end: 490 SMAP_ENABLE 491 492 xorq %rax,%rax 493 ret 494 DEFERRED_SWITCH_CALL 495END(_ustore_64) 496 497/**************************************************************************/ 498 499/* 500 * Compare-and-swap the 64-bit integer in the user-space. 501 * 502 * int _ucas_64(volatile uint64_t *uptr, uint64_t old, uint64_t new, 503 * uint64_t *ret); 504 */ 505ENTRY(_ucas_64) 506 DEFERRED_SWITCH_CHECK 507 /* Fail if kernel-space */ 508 movq $VM_MAXUSER_ADDRESS-8,%r8 509 cmpq %r8,%rdi 510 ja _C_LABEL(ucas_efault) 511 movq %rsi,%rax 512 513 SMAP_DISABLE 514.Lucas64_start: 515 /* Perform the CAS */ 516 lock 517 cmpxchgq %rdx,(%rdi) 518.Lucas64_end: 519 SMAP_ENABLE 520 521 /* 522 * Note: %rax is "old" value. 523 * Set the return values. 524 */ 525 movq %rax,(%rcx) 526 xorq %rax,%rax 527 ret 528 DEFERRED_SWITCH_CALL 529END(_ucas_64) 530 531/* 532 * int _ucas_32(volatile uint32_t *uptr, uint32_t old, uint32_t new, 533 * uint32_t *ret); 534 */ 535ENTRY(_ucas_32) 536 DEFERRED_SWITCH_CHECK 537 /* Fail if kernel-space */ 538 movq $VM_MAXUSER_ADDRESS-4,%r8 539 cmpq %r8,%rdi 540 ja _C_LABEL(ucas_efault) 541 movl %esi,%eax 542 543 SMAP_DISABLE 544.Lucas32_start: 545 /* Perform the CAS */ 546 lock 547 cmpxchgl %edx,(%rdi) 548.Lucas32_end: 549 SMAP_ENABLE 550 551 /* 552 * Note: %eax is "old" value. 553 * Set the return values. 554 */ 555 movl %eax,(%rcx) 556 xorq %rax,%rax 557 ret 558 DEFERRED_SWITCH_CALL 559END(_ucas_32) 560 561ENTRY(ucas_efault) 562 movq $EFAULT,%rax 563 ret 564END(ucas_efault) 565 566ENTRY(ucas_fault) 567 SMAP_ENABLE 568 ret 569END(ucas_fault) 570 571/* 572 * Label must be after all copy functions. 573 */ 574x86_copyfunc_end: .globl x86_copyfunc_end 575 576/* 577 * Fault table of copy functions for trap(). 578 */ 579 .section ".rodata" 580 .globl _C_LABEL(onfault_table) 581 582_C_LABEL(onfault_table): 583 .quad .Lcopyin_start 584 .quad .Lcopyin_end 585 .quad _C_LABEL(copy_fault) 586 587 .quad .Lcopyout_start 588 .quad .Lcopyout_end 589 .quad _C_LABEL(copy_fault) 590 591 .quad .Lkcopy_start 592 .quad .Lkcopy_end 593 .quad _C_LABEL(kcopy_fault) 594 595 .quad .Lcopyoutstr_start 596 .quad .Lcopyoutstr_end 597 .quad _C_LABEL(copystr_fault) 598 599 .quad .Lcopyinstr_start 600 .quad .Lcopyinstr_end 601 .quad _C_LABEL(copystr_fault) 602 603 .quad .Lucas64_start 604 .quad .Lucas64_end 605 .quad _C_LABEL(ucas_fault) 606 607 .quad .Lucas32_start 608 .quad .Lucas32_end 609 .quad _C_LABEL(ucas_fault) 610 611 .quad .L_ufetch_8_start 612 .quad .L_ufetch_8_end 613 .quad _C_LABEL(copy_fault) 614 615 .quad .L_ufetch_16_start 616 .quad .L_ufetch_16_end 617 .quad _C_LABEL(copy_fault) 618 619 .quad .L_ufetch_32_start 620 .quad .L_ufetch_32_end 621 .quad _C_LABEL(copy_fault) 622 623 .quad .L_ufetch_64_start 624 .quad .L_ufetch_64_end 625 .quad _C_LABEL(copy_fault) 626 627 .quad .L_ustore_8_start 628 .quad .L_ustore_8_end 629 .quad _C_LABEL(copy_fault) 630 631 .quad .L_ustore_16_start 632 .quad .L_ustore_16_end 633 .quad _C_LABEL(copy_fault) 634 635 .quad .L_ustore_32_start 636 .quad .L_ustore_32_end 637 .quad _C_LABEL(copy_fault) 638 639 .quad .L_ustore_64_start 640 .quad .L_ustore_64_end 641 .quad _C_LABEL(copy_fault) 642 643 .quad 0 /* terminate */ 644 645 .text 646