support.s revision 21278
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $Id: support.s,v 1.45 1996/11/29 14:32:30 bde Exp $ 34 */ 35 36#include "opt_cpu.h" 37 38#include <machine/asmacros.h> 39#include <machine/cputypes.h> 40#include <machine/specialreg.h> 41 42#include "assym.s" 43 44#define KDSEL 0x10 /* kernel data selector */ 45#define IDXSHIFT 10 46 47 .data 48 .globl _bcopy_vector 49_bcopy_vector: 50 .long _generic_bcopy 51 .globl _bzero 52_bzero: 53 .long _generic_bzero 54 .globl _copyin_vector 55_copyin_vector: 56 .long _generic_copyin 57 .globl _copyout_vector 58_copyout_vector: 59 .long _generic_copyout 60 .globl _ovbcopy_vector 61_ovbcopy_vector: 62 .long _generic_bcopy 63kernel_fpu_lock: 64 .byte 0xfe 65 .space 3 66 67 .text 68 69/* 70 * bcopy family 71 * void bzero(void *buf, u_int len) 72 */ 73 74ENTRY(generic_bzero) 75 pushl %edi 76 movl 8(%esp),%edi 77 movl 12(%esp),%ecx 78 xorl %eax,%eax 79 shrl $2,%ecx 80 cld 81 rep 82 stosl 83 movl 12(%esp),%ecx 84 andl $3,%ecx 85 rep 86 stosb 87 popl %edi 88 ret 89 90#if defined(I486_CPU) 91ENTRY(i486_bzero) 92 movl 4(%esp),%edx 93 movl 8(%esp),%ecx 94 xorl %eax,%eax 95/* 96 * do 64 byte chunks first 97 * 98 * XXX this is probably over-unrolled at least for DX2's 99 */ 1002: 101 cmpl $64,%ecx 102 jb 3f 103 movl %eax,(%edx) 104 movl %eax,4(%edx) 105 movl %eax,8(%edx) 106 movl %eax,12(%edx) 107 movl %eax,16(%edx) 108 movl %eax,20(%edx) 109 movl %eax,24(%edx) 110 movl %eax,28(%edx) 111 movl %eax,32(%edx) 112 movl %eax,36(%edx) 113 movl %eax,40(%edx) 114 movl %eax,44(%edx) 115 movl %eax,48(%edx) 116 movl %eax,52(%edx) 117 movl %eax,56(%edx) 118 movl %eax,60(%edx) 119 addl $64,%edx 120 subl $64,%ecx 121 jnz 2b 122 ret 123 124/* 125 * do 16 byte chunks 126 */ 127 SUPERALIGN_TEXT 1283: 129 cmpl $16,%ecx 130 jb 4f 131 movl %eax,(%edx) 132 movl %eax,4(%edx) 133 movl %eax,8(%edx) 134 movl %eax,12(%edx) 135 addl $16,%edx 136 subl $16,%ecx 137 jnz 3b 138 ret 139 140/* 141 * do 4 byte chunks 142 */ 143 SUPERALIGN_TEXT 1444: 145 cmpl $4,%ecx 146 jb 5f 147 movl %eax,(%edx) 148 addl $4,%edx 149 subl $4,%ecx 150 jnz 4b 151 ret 152 153/* 154 * do 1 byte chunks 155 * a jump table seems to be faster than a loop or more range reductions 156 * 157 * XXX need a const section for non-text 158 */ 159 .data 160jtab: 161 .long do0 162 .long do1 163 .long do2 164 .long do3 165 166 .text 167 SUPERALIGN_TEXT 1685: 169 jmp jtab(,%ecx,4) 170 171 SUPERALIGN_TEXT 172do3: 173 movw %ax,(%edx) 174 movb %al,2(%edx) 175 ret 176 177 SUPERALIGN_TEXT 178do2: 179 movw %ax,(%edx) 180 ret 181 182 SUPERALIGN_TEXT 183do1: 184 movb %al,(%edx) 185 ret 186 187 SUPERALIGN_TEXT 188do0: 189 ret 190#endif 191 192#ifdef I586_CPU 193ENTRY(i586_bzero) 194 movl 4(%esp),%edx 195 movl 8(%esp),%ecx 196 197 /* 198 * The FPU register method is twice as fast as the integer register 199 * method unless the target is in the L1 cache and we pre-allocate a 200 * cache line for it (then the integer register method is 4-5 times 201 * faster). However, we never pre-allocate cache lines, since that 202 * would make the integer method 25% or more slower for the common 203 * case when the target isn't in either the L1 cache or the L2 cache. 204 * Thus we normally use the FPU register method unless the overhead 205 * would be too large. 206 */ 207 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 208 jb intreg_i586_bzero 209 210 /* 211 * The FPU registers may belong to an application or to fastmove() 212 * or to another invocation of bcopy() or ourself in a higher level 213 * interrupt or trap handler. Preserving the registers is 214 * complicated since we avoid it if possible at all levels. We 215 * want to localize the complications even when that increases them. 216 * Here the extra work involves preserving CR0_TS in TS. 217 * `npxproc != NULL' is supposed to be the condition that all the 218 * FPU resources belong to an application, but npxproc and CR0_TS 219 * aren't set atomically enough for this condition to work in 220 * interrupt handlers. 221 * 222 * Case 1: FPU registers belong to the application: we must preserve 223 * the registers if we use them, so we only use the FPU register 224 * method if the target size is large enough to amortize the extra 225 * overhead for preserving them. CR0_TS must be preserved although 226 * it is very likely to end up as set. 227 * 228 * Case 2: FPU registers belong to fastmove(): fastmove() currently 229 * makes the registers look like they belong to an application so 230 * that cpu_switch() and savectx() don't have to know about it, so 231 * this case reduces to case 1. 232 * 233 * Case 3: FPU registers belong to the kernel: don't use the FPU 234 * register method. This case is unlikely, and supporting it would 235 * be more complicated and might take too much stack. 236 * 237 * Case 4: FPU registers don't belong to anyone: the FPU registers 238 * don't need to be preserved, so we always use the FPU register 239 * method. CR0_TS must be preserved although it is very likely to 240 * always end up as clear. 241 */ 242 cmpl $0,_npxproc 243 je i586_bz1 244 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 245 jb intreg_i586_bzero 246 sarb $1,kernel_fpu_lock 247 jc intreg_i586_bzero 248 smsw %ax 249 clts 250 subl $108,%esp 251 fnsave 0(%esp) 252 jmp i586_bz2 253 254i586_bz1: 255 sarb $1,kernel_fpu_lock 256 jc intreg_i586_bzero 257 smsw %ax 258 clts 259 fninit /* XXX should avoid needing this */ 260i586_bz2: 261 fldz 262 263 /* 264 * Align to an 8 byte boundary (misalignment in the main loop would 265 * cost a factor of >= 2). Avoid jumps (at little cost if it is 266 * already aligned) by always zeroing 8 bytes and using the part up 267 * to the _next_ alignment position. 268 */ 269 fstl 0(%edx) 270 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 271 addl $8,%edx 272 andl $~7,%edx 273 subl %edx,%ecx 274 275 /* 276 * Similarly align `len' to a multiple of 8. 277 */ 278 fstl -8(%edx,%ecx) 279 decl %ecx 280 andl $~7,%ecx 281 282 /* 283 * This wouldn't be any faster if it were unrolled, since the loop 284 * control instructions are much faster than the fstl and/or done 285 * in parallel with it so their overhead is insignificant. 286 */ 287fpureg_i586_bzero_loop: 288 fstl 0(%edx) 289 addl $8,%edx 290 subl $8,%ecx 291 cmpl $8,%ecx 292 jae fpureg_i586_bzero_loop 293 294 cmpl $0,_npxproc 295 je i586_bz3 296 frstor 0(%esp) 297 addl $108,%esp 298 lmsw %ax 299 movb $0xfe,kernel_fpu_lock 300 ret 301 302i586_bz3: 303 fstpl %st(0) 304 lmsw %ax 305 movb $0xfe,kernel_fpu_lock 306 ret 307 308intreg_i586_bzero: 309 /* 310 * `rep stos' seems to be the best method in practice for small 311 * counts. Fancy methods usually take too long to start up due 312 * to cache and BTB misses. 313 */ 314 pushl %edi 315 movl %edx,%edi 316 xorl %eax,%eax 317 shrl $2,%ecx 318 cld 319 rep 320 stosl 321 movl 12(%esp),%ecx 322 andl $3,%ecx 323 jne 1f 324 popl %edi 325 ret 326 3271: 328 rep 329 stosb 330 popl %edi 331 ret 332#endif /* I586_CPU */ 333 334/* fillw(pat, base, cnt) */ 335ENTRY(fillw) 336 pushl %edi 337 movl 8(%esp),%eax 338 movl 12(%esp),%edi 339 movl 16(%esp),%ecx 340 cld 341 rep 342 stosw 343 popl %edi 344 ret 345 346ENTRY(bcopyb) 347bcopyb: 348 pushl %esi 349 pushl %edi 350 movl 12(%esp),%esi 351 movl 16(%esp),%edi 352 movl 20(%esp),%ecx 353 movl %edi,%eax 354 subl %esi,%eax 355 cmpl %ecx,%eax /* overlapping && src < dst? */ 356 jb 1f 357 cld /* nope, copy forwards */ 358 rep 359 movsb 360 popl %edi 361 popl %esi 362 ret 363 364 ALIGN_TEXT 3651: 366 addl %ecx,%edi /* copy backwards. */ 367 addl %ecx,%esi 368 decl %edi 369 decl %esi 370 std 371 rep 372 movsb 373 popl %edi 374 popl %esi 375 cld 376 ret 377 378ENTRY(bcopy) 379 MEXITCOUNT 380 jmp *_bcopy_vector 381 382ENTRY(ovbcopy) 383 MEXITCOUNT 384 jmp *_ovbcopy_vector 385 386/* 387 * generic_bcopy(src, dst, cnt) 388 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 389 */ 390ENTRY(generic_bcopy) 391 pushl %esi 392 pushl %edi 393 movl 12(%esp),%esi 394 movl 16(%esp),%edi 395 movl 20(%esp),%ecx 396 397 movl %edi,%eax 398 subl %esi,%eax 399 cmpl %ecx,%eax /* overlapping && src < dst? */ 400 jb 1f 401 402 shrl $2,%ecx /* copy by 32-bit words */ 403 cld /* nope, copy forwards */ 404 rep 405 movsl 406 movl 20(%esp),%ecx 407 andl $3,%ecx /* any bytes left? */ 408 rep 409 movsb 410 popl %edi 411 popl %esi 412 ret 413 414 ALIGN_TEXT 4151: 416 addl %ecx,%edi /* copy backwards */ 417 addl %ecx,%esi 418 decl %edi 419 decl %esi 420 andl $3,%ecx /* any fractional bytes? */ 421 std 422 rep 423 movsb 424 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 425 shrl $2,%ecx 426 subl $3,%esi 427 subl $3,%edi 428 rep 429 movsl 430 popl %edi 431 popl %esi 432 cld 433 ret 434 435#ifdef I586_CPU 436ENTRY(i586_bcopy) 437 pushl %esi 438 pushl %edi 439 movl 12(%esp),%esi 440 movl 16(%esp),%edi 441 movl 20(%esp),%ecx 442 443 movl %edi,%eax 444 subl %esi,%eax 445 cmpl %ecx,%eax /* overlapping && src < dst? */ 446 jb 1f 447 448 cmpl $1024,%ecx 449 jb small_i586_bcopy 450 451 sarb $1,kernel_fpu_lock 452 jc small_i586_bcopy 453 cmpl $0,_npxproc 454 je i586_bc1 455 smsw %dx 456 clts 457 subl $108,%esp 458 fnsave 0(%esp) 459 jmp 4f 460 461i586_bc1: 462 smsw %dx 463 clts 464 fninit /* XXX should avoid needing this */ 465 466 ALIGN_TEXT 4674: 468 pushl %ecx 469#define DCACHE_SIZE 8192 470 cmpl $(DCACHE_SIZE-512)/2,%ecx 471 jbe 2f 472 movl $(DCACHE_SIZE-512)/2,%ecx 4732: 474 subl %ecx,0(%esp) 475 cmpl $256,%ecx 476 jb 5f /* XXX should prefetch if %ecx >= 32 */ 477 pushl %esi 478 pushl %ecx 479 ALIGN_TEXT 4803: 481 movl 0(%esi),%eax 482 movl 32(%esi),%eax 483 movl 64(%esi),%eax 484 movl 96(%esi),%eax 485 movl 128(%esi),%eax 486 movl 160(%esi),%eax 487 movl 192(%esi),%eax 488 movl 224(%esi),%eax 489 addl $256,%esi 490 subl $256,%ecx 491 cmpl $256,%ecx 492 jae 3b 493 popl %ecx 494 popl %esi 4955: 496 ALIGN_TEXT 497large_i586_bcopy_loop: 498 fildq 0(%esi) 499 fildq 8(%esi) 500 fildq 16(%esi) 501 fildq 24(%esi) 502 fildq 32(%esi) 503 fildq 40(%esi) 504 fildq 48(%esi) 505 fildq 56(%esi) 506 fistpq 56(%edi) 507 fistpq 48(%edi) 508 fistpq 40(%edi) 509 fistpq 32(%edi) 510 fistpq 24(%edi) 511 fistpq 16(%edi) 512 fistpq 8(%edi) 513 fistpq 0(%edi) 514 addl $64,%esi 515 addl $64,%edi 516 subl $64,%ecx 517 cmpl $64,%ecx 518 jae large_i586_bcopy_loop 519 popl %eax 520 addl %eax,%ecx 521 cmpl $64,%ecx 522 jae 4b 523 524 cmpl $0,_npxproc 525 je i586_bc2 526 frstor 0(%esp) 527 addl $108,%esp 528i586_bc2: 529 lmsw %dx 530 movb $0xfe,kernel_fpu_lock 531 532/* 533 * This is a duplicate of the main part of generic_bcopy. See the comments 534 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 535 * would mess up high resolution profiling. 536 */ 537 ALIGN_TEXT 538small_i586_bcopy: 539 shrl $2,%ecx 540 cld 541 rep 542 movsl 543 movl 20(%esp),%ecx 544 andl $3,%ecx 545 rep 546 movsb 547 popl %edi 548 popl %esi 549 ret 550 551 ALIGN_TEXT 5521: 553 addl %ecx,%edi 554 addl %ecx,%esi 555 decl %edi 556 decl %esi 557 andl $3,%ecx 558 std 559 rep 560 movsb 561 movl 20(%esp),%ecx 562 shrl $2,%ecx 563 subl $3,%esi 564 subl $3,%edi 565 rep 566 movsl 567 popl %edi 568 popl %esi 569 cld 570 ret 571#endif /* I586_CPU */ 572 573/* 574 * Note: memcpy does not support overlapping copies 575 */ 576ENTRY(memcpy) 577 pushl %edi 578 pushl %esi 579 movl 12(%esp),%edi 580 movl 16(%esp),%esi 581 movl 20(%esp),%ecx 582 movl %edi,%eax 583 shrl $2,%ecx /* copy by 32-bit words */ 584 cld /* nope, copy forwards */ 585 rep 586 movsl 587 movl 20(%esp),%ecx 588 andl $3,%ecx /* any bytes left? */ 589 rep 590 movsb 591 popl %esi 592 popl %edi 593 ret 594 595 596/*****************************************************************************/ 597/* copyout and fubyte family */ 598/*****************************************************************************/ 599/* 600 * Access user memory from inside the kernel. These routines and possibly 601 * the math- and DOS emulators should be the only places that do this. 602 * 603 * We have to access the memory with user's permissions, so use a segment 604 * selector with RPL 3. For writes to user space we have to additionally 605 * check the PTE for write permission, because the 386 does not check 606 * write permissions when we are executing with EPL 0. The 486 does check 607 * this if the WP bit is set in CR0, so we can use a simpler version here. 608 * 609 * These routines set curpcb->onfault for the time they execute. When a 610 * protection violation occurs inside the functions, the trap handler 611 * returns to *curpcb->onfault instead of the function. 612 */ 613 614/* copyout(from_kernel, to_user, len) */ 615ENTRY(copyout) 616 MEXITCOUNT 617 jmp *_copyout_vector 618 619ENTRY(generic_copyout) 620 movl _curpcb,%eax 621 movl $copyout_fault,PCB_ONFAULT(%eax) 622 pushl %esi 623 pushl %edi 624 pushl %ebx 625 movl 16(%esp),%esi 626 movl 20(%esp),%edi 627 movl 24(%esp),%ebx 628 testl %ebx,%ebx /* anything to do? */ 629 jz done_copyout 630 631 /* 632 * Check explicitly for non-user addresses. If 486 write protection 633 * is being used, this check is essential because we are in kernel 634 * mode so the h/w does not provide any protection against writing 635 * kernel addresses. 636 */ 637 638 /* 639 * First, prevent address wrapping. 640 */ 641 movl %edi,%eax 642 addl %ebx,%eax 643 jc copyout_fault 644/* 645 * XXX STOP USING VM_MAXUSER_ADDRESS. 646 * It is an end address, not a max, so every time it is used correctly it 647 * looks like there is an off by one error, and of course it caused an off 648 * by one error in several places. 649 */ 650 cmpl $VM_MAXUSER_ADDRESS,%eax 651 ja copyout_fault 652 653#if defined(I386_CPU) 654 655#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 656 cmpl $CPUCLASS_386,_cpu_class 657 jne 3f 658#endif 659/* 660 * We have to check each PTE for user write permission. 661 * The checking may cause a page fault, so it is important to set 662 * up everything for return via copyout_fault before here. 663 */ 664 /* compute number of pages */ 665 movl %edi,%ecx 666 andl $PAGE_MASK,%ecx 667 addl %ebx,%ecx 668 decl %ecx 669 shrl $IDXSHIFT+2,%ecx 670 incl %ecx 671 672 /* compute PTE offset for start address */ 673 movl %edi,%edx 674 shrl $IDXSHIFT,%edx 675 andb $0xfc,%dl 676 6771: /* check PTE for each page */ 678 movb _PTmap(%edx),%al 679 andb $0x07,%al /* Pages must be VALID + USERACC + WRITABLE */ 680 cmpb $0x07,%al 681 je 2f 682 683 /* simulate a trap */ 684 pushl %edx 685 pushl %ecx 686 shll $IDXSHIFT,%edx 687 pushl %edx 688 call _trapwrite /* trapwrite(addr) */ 689 popl %edx 690 popl %ecx 691 popl %edx 692 693 testl %eax,%eax /* if not ok, return EFAULT */ 694 jnz copyout_fault 695 6962: 697 addl $4,%edx 698 decl %ecx 699 jnz 1b /* check next page */ 700#endif /* I386_CPU */ 701 702 /* bcopy(%esi, %edi, %ebx) */ 7033: 704 movl %ebx,%ecx 705 706#ifdef I586_CPU 707 ALIGN_TEXT 708slow_copyout: 709#endif 710 shrl $2,%ecx 711 cld 712 rep 713 movsl 714 movb %bl,%cl 715 andb $3,%cl 716 rep 717 movsb 718 719done_copyout: 720 popl %ebx 721 popl %edi 722 popl %esi 723 xorl %eax,%eax 724 movl _curpcb,%edx 725 movl %eax,PCB_ONFAULT(%edx) 726 ret 727 728 ALIGN_TEXT 729copyout_fault: 730 popl %ebx 731 popl %edi 732 popl %esi 733 movl _curpcb,%edx 734 movl $0,PCB_ONFAULT(%edx) 735 movl $EFAULT,%eax 736 ret 737 738#ifdef I586_CPU 739ENTRY(i586_copyout) 740 /* 741 * Duplicated from generic_copyout. Could be done a bit better. 742 */ 743 movl _curpcb,%eax 744 movl $copyout_fault,PCB_ONFAULT(%eax) 745 pushl %esi 746 pushl %edi 747 pushl %ebx 748 movl 16(%esp),%esi 749 movl 20(%esp),%edi 750 movl 24(%esp),%ebx 751 testl %ebx,%ebx /* anything to do? */ 752 jz done_copyout 753 754 /* 755 * Check explicitly for non-user addresses. If 486 write protection 756 * is being used, this check is essential because we are in kernel 757 * mode so the h/w does not provide any protection against writing 758 * kernel addresses. 759 */ 760 761 /* 762 * First, prevent address wrapping. 763 */ 764 movl %edi,%eax 765 addl %ebx,%eax 766 jc copyout_fault 767/* 768 * XXX STOP USING VM_MAXUSER_ADDRESS. 769 * It is an end address, not a max, so every time it is used correctly it 770 * looks like there is an off by one error, and of course it caused an off 771 * by one error in several places. 772 */ 773 cmpl $VM_MAXUSER_ADDRESS,%eax 774 ja copyout_fault 775 776 /* bcopy(%esi, %edi, %ebx) */ 7773: 778 movl %ebx,%ecx 779 /* 780 * End of duplicated code. 781 */ 782 783 cmpl $1024,%ecx 784 jb slow_copyout 785 786 pushl %ecx 787 call _fastmove 788 addl $4,%esp 789 jmp done_copyout 790#endif /* I586_CPU */ 791 792/* copyin(from_user, to_kernel, len) */ 793ENTRY(copyin) 794 MEXITCOUNT 795 jmp *_copyin_vector 796 797ENTRY(generic_copyin) 798 movl _curpcb,%eax 799 movl $copyin_fault,PCB_ONFAULT(%eax) 800 pushl %esi 801 pushl %edi 802 movl 12(%esp),%esi /* caddr_t from */ 803 movl 16(%esp),%edi /* caddr_t to */ 804 movl 20(%esp),%ecx /* size_t len */ 805 806 /* 807 * make sure address is valid 808 */ 809 movl %esi,%edx 810 addl %ecx,%edx 811 jc copyin_fault 812 cmpl $VM_MAXUSER_ADDRESS,%edx 813 ja copyin_fault 814 815#ifdef I586_CPU 816 ALIGN_TEXT 817slow_copyin: 818#endif 819 movb %cl,%al 820 shrl $2,%ecx /* copy longword-wise */ 821 cld 822 rep 823 movsl 824 movb %al,%cl 825 andb $3,%cl /* copy remaining bytes */ 826 rep 827 movsb 828 829#if defined(I586_CPU) 830 ALIGN_TEXT 831done_copyin: 832#endif /* I586_CPU */ 833 popl %edi 834 popl %esi 835 xorl %eax,%eax 836 movl _curpcb,%edx 837 movl %eax,PCB_ONFAULT(%edx) 838 ret 839 840 ALIGN_TEXT 841copyin_fault: 842 popl %edi 843 popl %esi 844 movl _curpcb,%edx 845 movl $0,PCB_ONFAULT(%edx) 846 movl $EFAULT,%eax 847 ret 848 849#ifdef I586_CPU 850ENTRY(i586_copyin) 851 /* 852 * Duplicated from generic_copyin. Could be done a bit better. 853 */ 854 movl _curpcb,%eax 855 movl $copyin_fault,PCB_ONFAULT(%eax) 856 pushl %esi 857 pushl %edi 858 movl 12(%esp),%esi /* caddr_t from */ 859 movl 16(%esp),%edi /* caddr_t to */ 860 movl 20(%esp),%ecx /* size_t len */ 861 862 /* 863 * make sure address is valid 864 */ 865 movl %esi,%edx 866 addl %ecx,%edx 867 jc copyin_fault 868 cmpl $VM_MAXUSER_ADDRESS,%edx 869 ja copyin_fault 870 /* 871 * End of duplicated code. 872 */ 873 874 cmpl $1024,%ecx 875 jb slow_copyin 876 877 pushl %ebx /* XXX prepare for fastmove_fault */ 878 pushl %ecx 879 call _fastmove 880 addl $8,%esp 881 jmp done_copyin 882#endif /* I586_CPU */ 883 884#if defined(I586_CPU) 885/* fastmove(src, dst, len) 886 src in %esi 887 dst in %edi 888 len in %ecx XXX changed to on stack for profiling 889 uses %eax and %edx for tmp. storage 890 */ 891/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 892ENTRY(fastmove) 893 pushl %ebp 894 movl %esp,%ebp 895 subl $PCB_SAVEFPU_SIZE+3*4,%esp 896 897 movl 8(%ebp),%ecx 898 cmpl $63,%ecx 899 jbe fastmove_tail 900 901 testl $7,%esi /* check if src addr is multiple of 8 */ 902 jnz fastmove_tail 903 904 testl $7,%edi /* check if dst addr is multiple of 8 */ 905 jnz fastmove_tail 906 907/* if (npxproc != NULL) { */ 908 cmpl $0,_npxproc 909 je 6f 910/* fnsave(&curpcb->pcb_savefpu); */ 911 movl _curpcb,%eax 912 fnsave PCB_SAVEFPU(%eax) 913/* npxproc = NULL; */ 914 movl $0,_npxproc 915/* } */ 9166: 917/* now we own the FPU. */ 918 919/* 920 * The process' FP state is saved in the pcb, but if we get 921 * switched, the cpu_switch() will store our FP state in the 922 * pcb. It should be possible to avoid all the copying for 923 * this, e.g., by setting a flag to tell cpu_switch() to 924 * save the state somewhere else. 925 */ 926/* tmp = curpcb->pcb_savefpu; */ 927 movl %ecx,-12(%ebp) 928 movl %esi,-8(%ebp) 929 movl %edi,-4(%ebp) 930 movl %esp,%edi 931 movl _curpcb,%esi 932 addl $PCB_SAVEFPU,%esi 933 cld 934 movl $PCB_SAVEFPU_SIZE>>2,%ecx 935 rep 936 movsl 937 movl -12(%ebp),%ecx 938 movl -8(%ebp),%esi 939 movl -4(%ebp),%edi 940/* stop_emulating(); */ 941 clts 942/* npxproc = curproc; */ 943 movl _curproc,%eax 944 movl %eax,_npxproc 945 movl _curpcb,%eax 946 movl $fastmove_fault,PCB_ONFAULT(%eax) 9474: 948 movl %ecx,-12(%ebp) 949 cmpl $1792,%ecx 950 jbe 2f 951 movl $1792,%ecx 9522: 953 subl %ecx,-12(%ebp) 954 cmpl $256,%ecx 955 jb 5f 956 movl %ecx,-8(%ebp) 957 movl %esi,-4(%ebp) 958 ALIGN_TEXT 9593: 960 movl 0(%esi),%eax 961 movl 32(%esi),%eax 962 movl 64(%esi),%eax 963 movl 96(%esi),%eax 964 movl 128(%esi),%eax 965 movl 160(%esi),%eax 966 movl 192(%esi),%eax 967 movl 224(%esi),%eax 968 addl $256,%esi 969 subl $256,%ecx 970 cmpl $256,%ecx 971 jae 3b 972 movl -8(%ebp),%ecx 973 movl -4(%ebp),%esi 9745: 975 ALIGN_TEXT 976fastmove_loop: 977 fildq 0(%esi) 978 fildq 8(%esi) 979 fildq 16(%esi) 980 fildq 24(%esi) 981 fildq 32(%esi) 982 fildq 40(%esi) 983 fildq 48(%esi) 984 fildq 56(%esi) 985 fistpq 56(%edi) 986 fistpq 48(%edi) 987 fistpq 40(%edi) 988 fistpq 32(%edi) 989 fistpq 24(%edi) 990 fistpq 16(%edi) 991 fistpq 8(%edi) 992 fistpq 0(%edi) 993 addl $-64,%ecx 994 addl $64,%esi 995 addl $64,%edi 996 cmpl $63,%ecx 997 ja fastmove_loop 998 movl -12(%ebp),%eax 999 addl %eax,%ecx 1000 cmpl $64,%ecx 1001 jae 4b 1002 1003/* curpcb->pcb_savefpu = tmp; */ 1004 movl %ecx,-12(%ebp) 1005 movl %esi,-8(%ebp) 1006 movl %edi,-4(%ebp) 1007 movl _curpcb,%edi 1008 addl $PCB_SAVEFPU,%edi 1009 movl %esp,%esi 1010 cld 1011 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1012 rep 1013 movsl 1014 movl -12(%ebp),%ecx 1015 movl -8(%ebp),%esi 1016 movl -4(%ebp),%edi 1017 1018/* start_emulating(); */ 1019 smsw %ax 1020 orb $CR0_TS,%al 1021 lmsw %ax 1022/* npxproc = NULL; */ 1023 movl $0,_npxproc 1024 1025 ALIGN_TEXT 1026fastmove_tail: 1027 movl _curpcb,%eax 1028 movl $fastmove_tail_fault,PCB_ONFAULT(%eax) 1029 1030 movb %cl,%al 1031 shrl $2,%ecx /* copy longword-wise */ 1032 cld 1033 rep 1034 movsl 1035 movb %al,%cl 1036 andb $3,%cl /* copy remaining bytes */ 1037 rep 1038 movsb 1039 1040 movl %ebp,%esp 1041 popl %ebp 1042 ret 1043 1044 ALIGN_TEXT 1045fastmove_fault: 1046 movl _curpcb,%edi 1047 addl $PCB_SAVEFPU,%edi 1048 movl %esp,%esi 1049 cld 1050 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1051 rep 1052 movsl 1053 1054 smsw %ax 1055 orb $CR0_TS,%al 1056 lmsw %ax 1057 movl $0,_npxproc 1058 1059fastmove_tail_fault: 1060 movl %ebp,%esp 1061 popl %ebp 1062 addl $8,%esp 1063 popl %ebx 1064 popl %edi 1065 popl %esi 1066 movl _curpcb,%edx 1067 movl $0,PCB_ONFAULT(%edx) 1068 movl $EFAULT,%eax 1069 ret 1070#endif /* I586_CPU */ 1071 1072/* 1073 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory 1074 */ 1075ENTRY(fuword) 1076 movl _curpcb,%ecx 1077 movl $fusufault,PCB_ONFAULT(%ecx) 1078 movl 4(%esp),%edx /* from */ 1079 1080 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 1081 ja fusufault 1082 1083 movl (%edx),%eax 1084 movl $0,PCB_ONFAULT(%ecx) 1085 ret 1086 1087/* 1088 * These two routines are called from the profiling code, potentially 1089 * at interrupt time. If they fail, that's okay, good things will 1090 * happen later. Fail all the time for now - until the trap code is 1091 * able to deal with this. 1092 */ 1093ALTENTRY(suswintr) 1094ENTRY(fuswintr) 1095 movl $-1,%eax 1096 ret 1097 1098ENTRY(fusword) 1099 movl _curpcb,%ecx 1100 movl $fusufault,PCB_ONFAULT(%ecx) 1101 movl 4(%esp),%edx 1102 1103 cmpl $VM_MAXUSER_ADDRESS-2,%edx 1104 ja fusufault 1105 1106 movzwl (%edx),%eax 1107 movl $0,PCB_ONFAULT(%ecx) 1108 ret 1109 1110ENTRY(fubyte) 1111 movl _curpcb,%ecx 1112 movl $fusufault,PCB_ONFAULT(%ecx) 1113 movl 4(%esp),%edx 1114 1115 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1116 ja fusufault 1117 1118 movzbl (%edx),%eax 1119 movl $0,PCB_ONFAULT(%ecx) 1120 ret 1121 1122 ALIGN_TEXT 1123fusufault: 1124 movl _curpcb,%ecx 1125 xorl %eax,%eax 1126 movl %eax,PCB_ONFAULT(%ecx) 1127 decl %eax 1128 ret 1129 1130/* 1131 * su{byte,sword,word}: write a byte (word, longword) to user memory 1132 */ 1133ENTRY(suword) 1134 movl _curpcb,%ecx 1135 movl $fusufault,PCB_ONFAULT(%ecx) 1136 movl 4(%esp),%edx 1137 1138#if defined(I386_CPU) 1139 1140#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1141 cmpl $CPUCLASS_386,_cpu_class 1142 jne 2f /* we only have to set the right segment selector */ 1143#endif /* I486_CPU || I586_CPU || I686_CPU */ 1144 1145 /* XXX - page boundary crossing is still not handled */ 1146 movl %edx,%eax 1147 shrl $IDXSHIFT,%edx 1148 andb $0xfc,%dl 1149 movb _PTmap(%edx),%dl 1150 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1151 cmpb $0x7,%dl 1152 je 1f 1153 1154 /* simulate a trap */ 1155 pushl %eax 1156 call _trapwrite 1157 popl %edx /* remove junk parameter from stack */ 1158 movl _curpcb,%ecx /* restore trashed register */ 1159 testl %eax,%eax 1160 jnz fusufault 11611: 1162 movl 4(%esp),%edx 1163#endif 1164 11652: 1166 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1167 ja fusufault 1168 1169 movl 8(%esp),%eax 1170 movl %eax,(%edx) 1171 xorl %eax,%eax 1172 movl %eax,PCB_ONFAULT(%ecx) 1173 ret 1174 1175ENTRY(susword) 1176 movl _curpcb,%ecx 1177 movl $fusufault,PCB_ONFAULT(%ecx) 1178 movl 4(%esp),%edx 1179 1180#if defined(I386_CPU) 1181 1182#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1183 cmpl $CPUCLASS_386,_cpu_class 1184 jne 2f 1185#endif /* I486_CPU || I586_CPU || I686_CPU */ 1186 1187 /* XXX - page boundary crossing is still not handled */ 1188 movl %edx,%eax 1189 shrl $IDXSHIFT,%edx 1190 andb $0xfc,%dl 1191 movb _PTmap(%edx),%dl 1192 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1193 cmpb $0x7,%dl 1194 je 1f 1195 1196 /* simulate a trap */ 1197 pushl %eax 1198 call _trapwrite 1199 popl %edx /* remove junk parameter from stack */ 1200 movl _curpcb,%ecx /* restore trashed register */ 1201 testl %eax,%eax 1202 jnz fusufault 12031: 1204 movl 4(%esp),%edx 1205#endif 1206 12072: 1208 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1209 ja fusufault 1210 1211 movw 8(%esp),%ax 1212 movw %ax,(%edx) 1213 xorl %eax,%eax 1214 movl %eax,PCB_ONFAULT(%ecx) 1215 ret 1216 1217ALTENTRY(suibyte) 1218ENTRY(subyte) 1219 movl _curpcb,%ecx 1220 movl $fusufault,PCB_ONFAULT(%ecx) 1221 movl 4(%esp),%edx 1222 1223#if defined(I386_CPU) 1224 1225#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1226 cmpl $CPUCLASS_386,_cpu_class 1227 jne 2f 1228#endif /* I486_CPU || I586_CPU || I686_CPU */ 1229 1230 movl %edx,%eax 1231 shrl $IDXSHIFT,%edx 1232 andb $0xfc,%dl 1233 movb _PTmap(%edx),%dl 1234 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1235 cmpb $0x7,%dl 1236 je 1f 1237 1238 /* simulate a trap */ 1239 pushl %eax 1240 call _trapwrite 1241 popl %edx /* remove junk parameter from stack */ 1242 movl _curpcb,%ecx /* restore trashed register */ 1243 testl %eax,%eax 1244 jnz fusufault 12451: 1246 movl 4(%esp),%edx 1247#endif 1248 12492: 1250 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1251 ja fusufault 1252 1253 movb 8(%esp),%al 1254 movb %al,(%edx) 1255 xorl %eax,%eax 1256 movl %eax,PCB_ONFAULT(%ecx) 1257 ret 1258 1259/* 1260 * copyinstr(from, to, maxlen, int *lencopied) 1261 * copy a string from from to to, stop when a 0 character is reached. 1262 * return ENAMETOOLONG if string is longer than maxlen, and 1263 * EFAULT on protection violations. If lencopied is non-zero, 1264 * return the actual length in *lencopied. 1265 */ 1266ENTRY(copyinstr) 1267 pushl %esi 1268 pushl %edi 1269 movl _curpcb,%ecx 1270 movl $cpystrflt,PCB_ONFAULT(%ecx) 1271 1272 movl 12(%esp),%esi /* %esi = from */ 1273 movl 16(%esp),%edi /* %edi = to */ 1274 movl 20(%esp),%edx /* %edx = maxlen */ 1275 1276 movl $VM_MAXUSER_ADDRESS,%eax 1277 1278 /* make sure 'from' is within bounds */ 1279 subl %esi,%eax 1280 jbe cpystrflt 1281 1282 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1283 cmpl %edx,%eax 1284 jae 1f 1285 movl %eax,%edx 1286 movl %eax,20(%esp) 12871: 1288 incl %edx 1289 cld 1290 12912: 1292 decl %edx 1293 jz 3f 1294 1295 lodsb 1296 stosb 1297 orb %al,%al 1298 jnz 2b 1299 1300 /* Success -- 0 byte reached */ 1301 decl %edx 1302 xorl %eax,%eax 1303 jmp cpystrflt_x 13043: 1305 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1306 cmpl $VM_MAXUSER_ADDRESS,%esi 1307 jae cpystrflt 13084: 1309 movl $ENAMETOOLONG,%eax 1310 jmp cpystrflt_x 1311 1312cpystrflt: 1313 movl $EFAULT,%eax 1314 1315cpystrflt_x: 1316 /* set *lencopied and return %eax */ 1317 movl _curpcb,%ecx 1318 movl $0,PCB_ONFAULT(%ecx) 1319 movl 20(%esp),%ecx 1320 subl %edx,%ecx 1321 movl 24(%esp),%edx 1322 testl %edx,%edx 1323 jz 1f 1324 movl %ecx,(%edx) 13251: 1326 popl %edi 1327 popl %esi 1328 ret 1329 1330 1331/* 1332 * copystr(from, to, maxlen, int *lencopied) 1333 */ 1334ENTRY(copystr) 1335 pushl %esi 1336 pushl %edi 1337 1338 movl 12(%esp),%esi /* %esi = from */ 1339 movl 16(%esp),%edi /* %edi = to */ 1340 movl 20(%esp),%edx /* %edx = maxlen */ 1341 incl %edx 1342 cld 13431: 1344 decl %edx 1345 jz 4f 1346 lodsb 1347 stosb 1348 orb %al,%al 1349 jnz 1b 1350 1351 /* Success -- 0 byte reached */ 1352 decl %edx 1353 xorl %eax,%eax 1354 jmp 6f 13554: 1356 /* edx is zero -- return ENAMETOOLONG */ 1357 movl $ENAMETOOLONG,%eax 1358 13596: 1360 /* set *lencopied and return %eax */ 1361 movl 20(%esp),%ecx 1362 subl %edx,%ecx 1363 movl 24(%esp),%edx 1364 testl %edx,%edx 1365 jz 7f 1366 movl %ecx,(%edx) 13677: 1368 popl %edi 1369 popl %esi 1370 ret 1371 1372ENTRY(bcmp) 1373 pushl %edi 1374 pushl %esi 1375 movl 12(%esp),%edi 1376 movl 16(%esp),%esi 1377 movl 20(%esp),%edx 1378 xorl %eax,%eax 1379 1380 movl %edx,%ecx 1381 shrl $2,%ecx 1382 cld /* compare forwards */ 1383 repe 1384 cmpsl 1385 jne 1f 1386 1387 movl %edx,%ecx 1388 andl $3,%ecx 1389 repe 1390 cmpsb 1391 je 2f 13921: 1393 incl %eax 13942: 1395 popl %esi 1396 popl %edi 1397 ret 1398 1399 1400/* 1401 * Handling of special 386 registers and descriptor tables etc 1402 */ 1403/* void lgdt(struct region_descriptor *rdp); */ 1404ENTRY(lgdt) 1405 /* reload the descriptor table */ 1406 movl 4(%esp),%eax 1407 lgdt (%eax) 1408 1409 /* flush the prefetch q */ 1410 jmp 1f 1411 nop 14121: 1413 /* reload "stale" selectors */ 1414 movl $KDSEL,%eax 1415 movl %ax,%ds 1416 movl %ax,%es 1417 movl %ax,%ss 1418 1419 /* reload code selector by turning return into intersegmental return */ 1420 movl (%esp),%eax 1421 pushl %eax 1422# movl $KCSEL,4(%esp) 1423 movl $8,4(%esp) 1424 lret 1425 1426/* 1427 * void lidt(struct region_descriptor *rdp); 1428 */ 1429ENTRY(lidt) 1430 movl 4(%esp),%eax 1431 lidt (%eax) 1432 ret 1433 1434/* 1435 * void lldt(u_short sel) 1436 */ 1437ENTRY(lldt) 1438 lldt 4(%esp) 1439 ret 1440 1441/* 1442 * void ltr(u_short sel) 1443 */ 1444ENTRY(ltr) 1445 ltr 4(%esp) 1446 ret 1447 1448/* ssdtosd(*ssdp,*sdp) */ 1449ENTRY(ssdtosd) 1450 pushl %ebx 1451 movl 8(%esp),%ecx 1452 movl 8(%ecx),%ebx 1453 shll $16,%ebx 1454 movl (%ecx),%edx 1455 roll $16,%edx 1456 movb %dh,%bl 1457 movb %dl,%bh 1458 rorl $8,%ebx 1459 movl 4(%ecx),%eax 1460 movw %ax,%dx 1461 andl $0xf0000,%eax 1462 orl %eax,%ebx 1463 movl 12(%esp),%ecx 1464 movl %edx,(%ecx) 1465 movl %ebx,4(%ecx) 1466 popl %ebx 1467 ret 1468 1469/* load_cr0(cr0) */ 1470ENTRY(load_cr0) 1471 movl 4(%esp),%eax 1472 movl %eax,%cr0 1473 ret 1474 1475/* rcr0() */ 1476ENTRY(rcr0) 1477 movl %cr0,%eax 1478 ret 1479 1480/* rcr3() */ 1481ENTRY(rcr3) 1482 movl %cr3,%eax 1483 ret 1484 1485/* void load_cr3(caddr_t cr3) */ 1486ENTRY(load_cr3) 1487 movl 4(%esp),%eax 1488 movl %eax,%cr3 1489 ret 1490 1491 1492/*****************************************************************************/ 1493/* setjump, longjump */ 1494/*****************************************************************************/ 1495 1496ENTRY(setjmp) 1497 movl 4(%esp),%eax 1498 movl %ebx,(%eax) /* save ebx */ 1499 movl %esp,4(%eax) /* save esp */ 1500 movl %ebp,8(%eax) /* save ebp */ 1501 movl %esi,12(%eax) /* save esi */ 1502 movl %edi,16(%eax) /* save edi */ 1503 movl (%esp),%edx /* get rta */ 1504 movl %edx,20(%eax) /* save eip */ 1505 xorl %eax,%eax /* return(0); */ 1506 ret 1507 1508ENTRY(longjmp) 1509 movl 4(%esp),%eax 1510 movl (%eax),%ebx /* restore ebx */ 1511 movl 4(%eax),%esp /* restore esp */ 1512 movl 8(%eax),%ebp /* restore ebp */ 1513 movl 12(%eax),%esi /* restore esi */ 1514 movl 16(%eax),%edi /* restore edi */ 1515 movl 20(%eax),%edx /* get rta */ 1516 movl %edx,(%esp) /* put in return frame */ 1517 xorl %eax,%eax /* return(1); */ 1518 incl %eax 1519 ret 1520 1521/* 1522 * Here for doing BB-profiling (gcc -a). 1523 * We rely on the "bbset" instead, but need a dummy function. 1524 */ 1525NON_GPROF_ENTRY(__bb_init_func) 1526 movl 4(%esp),%eax 1527 movl $1,(%eax) 1528 .byte 0xc3 /* avoid macro for `ret' */ 1529