support.s revision 21944
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $FreeBSD: head/sys/i386/i386/support.s 21944 1997-01-22 06:15:27Z dyson $ 34 */ 35 36#include "opt_cpu.h" 37 38#include <machine/asmacros.h> 39#include <machine/cputypes.h> 40#include <machine/specialreg.h> 41 42#include "assym.s" 43 44#define KDSEL 0x10 /* kernel data selector */ 45#define IDXSHIFT 10 46 47 .data 48 .globl _bcopy_vector 49_bcopy_vector: 50 .long _generic_bcopy 51 .globl _bzero 52_bzero: 53 .long _generic_bzero 54 .globl _copyin_vector 55_copyin_vector: 56 .long _generic_copyin 57 .globl _copyout_vector 58_copyout_vector: 59 .long _generic_copyout 60 .globl _ovbcopy_vector 61_ovbcopy_vector: 62 .long _generic_bcopy 63kernel_fpu_lock: 64 .byte 0xfe 65 .space 3 66 67 .text 68 69/* 70 * bcopy family 71 * void bzero(void *buf, u_int len) 72 */ 73 74ENTRY(generic_bzero) 75 pushl %edi 76 movl 8(%esp),%edi 77 movl 12(%esp),%ecx 78 xorl %eax,%eax 79 shrl $2,%ecx 80 cld 81 rep 82 stosl 83 movl 12(%esp),%ecx 84 andl $3,%ecx 85 rep 86 stosb 87 popl %edi 88 ret 89 90#if defined(I486_CPU) 91ENTRY(i486_bzero) 92 movl 4(%esp),%edx 93 movl 8(%esp),%ecx 94 xorl %eax,%eax 95/* 96 * do 64 byte chunks first 97 * 98 * XXX this is probably over-unrolled at least for DX2's 99 */ 1002: 101 cmpl $64,%ecx 102 jb 3f 103 movl %eax,(%edx) 104 movl %eax,4(%edx) 105 movl %eax,8(%edx) 106 movl %eax,12(%edx) 107 movl %eax,16(%edx) 108 movl %eax,20(%edx) 109 movl %eax,24(%edx) 110 movl %eax,28(%edx) 111 movl %eax,32(%edx) 112 movl %eax,36(%edx) 113 movl %eax,40(%edx) 114 movl %eax,44(%edx) 115 movl %eax,48(%edx) 116 movl %eax,52(%edx) 117 movl %eax,56(%edx) 118 movl %eax,60(%edx) 119 addl $64,%edx 120 subl $64,%ecx 121 jnz 2b 122 ret 123 124/* 125 * do 16 byte chunks 126 */ 127 SUPERALIGN_TEXT 1283: 129 cmpl $16,%ecx 130 jb 4f 131 movl %eax,(%edx) 132 movl %eax,4(%edx) 133 movl %eax,8(%edx) 134 movl %eax,12(%edx) 135 addl $16,%edx 136 subl $16,%ecx 137 jnz 3b 138 ret 139 140/* 141 * do 4 byte chunks 142 */ 143 SUPERALIGN_TEXT 1444: 145 cmpl $4,%ecx 146 jb 5f 147 movl %eax,(%edx) 148 addl $4,%edx 149 subl $4,%ecx 150 jnz 4b 151 ret 152 153/* 154 * do 1 byte chunks 155 * a jump table seems to be faster than a loop or more range reductions 156 * 157 * XXX need a const section for non-text 158 */ 159 .data 160jtab: 161 .long do0 162 .long do1 163 .long do2 164 .long do3 165 166 .text 167 SUPERALIGN_TEXT 1685: 169 jmp jtab(,%ecx,4) 170 171 SUPERALIGN_TEXT 172do3: 173 movw %ax,(%edx) 174 movb %al,2(%edx) 175 ret 176 177 SUPERALIGN_TEXT 178do2: 179 movw %ax,(%edx) 180 ret 181 182 SUPERALIGN_TEXT 183do1: 184 movb %al,(%edx) 185 ret 186 187 SUPERALIGN_TEXT 188do0: 189 ret 190#endif 191 192#ifdef I586_CPU 193ENTRY(i586_bzero) 194 movl 4(%esp),%edx 195 movl 8(%esp),%ecx 196 197 /* 198 * The FPU register method is twice as fast as the integer register 199 * method unless the target is in the L1 cache and we pre-allocate a 200 * cache line for it (then the integer register method is 4-5 times 201 * faster). However, we never pre-allocate cache lines, since that 202 * would make the integer method 25% or more slower for the common 203 * case when the target isn't in either the L1 cache or the L2 cache. 204 * Thus we normally use the FPU register method unless the overhead 205 * would be too large. 206 */ 207 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 208 jb intreg_i586_bzero 209 210 /* 211 * The FPU registers may belong to an application or to fastmove() 212 * or to another invocation of bcopy() or ourself in a higher level 213 * interrupt or trap handler. Preserving the registers is 214 * complicated since we avoid it if possible at all levels. We 215 * want to localize the complications even when that increases them. 216 * Here the extra work involves preserving CR0_TS in TS. 217 * `npxproc != NULL' is supposed to be the condition that all the 218 * FPU resources belong to an application, but npxproc and CR0_TS 219 * aren't set atomically enough for this condition to work in 220 * interrupt handlers. 221 * 222 * Case 1: FPU registers belong to the application: we must preserve 223 * the registers if we use them, so we only use the FPU register 224 * method if the target size is large enough to amortize the extra 225 * overhead for preserving them. CR0_TS must be preserved although 226 * it is very likely to end up as set. 227 * 228 * Case 2: FPU registers belong to fastmove(): fastmove() currently 229 * makes the registers look like they belong to an application so 230 * that cpu_switch() and savectx() don't have to know about it, so 231 * this case reduces to case 1. 232 * 233 * Case 3: FPU registers belong to the kernel: don't use the FPU 234 * register method. This case is unlikely, and supporting it would 235 * be more complicated and might take too much stack. 236 * 237 * Case 4: FPU registers don't belong to anyone: the FPU registers 238 * don't need to be preserved, so we always use the FPU register 239 * method. CR0_TS must be preserved although it is very likely to 240 * always end up as clear. 241 */ 242 cmpl $0,_npxproc 243 je i586_bz1 244 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 245 jb intreg_i586_bzero 246 sarb $1,kernel_fpu_lock 247 jc intreg_i586_bzero 248 smsw %ax 249 clts 250 subl $108,%esp 251 fnsave 0(%esp) 252 jmp i586_bz2 253 254i586_bz1: 255 sarb $1,kernel_fpu_lock 256 jc intreg_i586_bzero 257 smsw %ax 258 clts 259 fninit /* XXX should avoid needing this */ 260i586_bz2: 261 fldz 262 263 /* 264 * Align to an 8 byte boundary (misalignment in the main loop would 265 * cost a factor of >= 2). Avoid jumps (at little cost if it is 266 * already aligned) by always zeroing 8 bytes and using the part up 267 * to the _next_ alignment position. 268 */ 269 fstl 0(%edx) 270 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 271 addl $8,%edx 272 andl $~7,%edx 273 subl %edx,%ecx 274 275 /* 276 * Similarly align `len' to a multiple of 8. 277 */ 278 fstl -8(%edx,%ecx) 279 decl %ecx 280 andl $~7,%ecx 281 282 /* 283 * This wouldn't be any faster if it were unrolled, since the loop 284 * control instructions are much faster than the fstl and/or done 285 * in parallel with it so their overhead is insignificant. 286 */ 287fpureg_i586_bzero_loop: 288 fstl 0(%edx) 289 addl $8,%edx 290 subl $8,%ecx 291 cmpl $8,%ecx 292 jae fpureg_i586_bzero_loop 293 294 cmpl $0,_npxproc 295 je i586_bz3 296 frstor 0(%esp) 297 addl $108,%esp 298 lmsw %ax 299 movb $0xfe,kernel_fpu_lock 300 ret 301 302i586_bz3: 303 fstpl %st(0) 304 lmsw %ax 305 movb $0xfe,kernel_fpu_lock 306 ret 307 308intreg_i586_bzero: 309 /* 310 * `rep stos' seems to be the best method in practice for small 311 * counts. Fancy methods usually take too long to start up due 312 * to cache and BTB misses. 313 */ 314 pushl %edi 315 movl %edx,%edi 316 xorl %eax,%eax 317 shrl $2,%ecx 318 cld 319 rep 320 stosl 321 movl 12(%esp),%ecx 322 andl $3,%ecx 323 jne 1f 324 popl %edi 325 ret 326 3271: 328 rep 329 stosb 330 popl %edi 331 ret 332#endif /* I586_CPU */ 333 334/* fillw(pat, base, cnt) */ 335ENTRY(fillw) 336 pushl %edi 337 movl 8(%esp),%eax 338 movl 12(%esp),%edi 339 movl 16(%esp),%ecx 340 cld 341 rep 342 stosw 343 popl %edi 344 ret 345 346ENTRY(bcopyb) 347bcopyb: 348 pushl %esi 349 pushl %edi 350 movl 12(%esp),%esi 351 movl 16(%esp),%edi 352 movl 20(%esp),%ecx 353 movl %edi,%eax 354 subl %esi,%eax 355 cmpl %ecx,%eax /* overlapping && src < dst? */ 356 jb 1f 357 cld /* nope, copy forwards */ 358 rep 359 movsb 360 popl %edi 361 popl %esi 362 ret 363 364 ALIGN_TEXT 3651: 366 addl %ecx,%edi /* copy backwards. */ 367 addl %ecx,%esi 368 decl %edi 369 decl %esi 370 std 371 rep 372 movsb 373 popl %edi 374 popl %esi 375 cld 376 ret 377 378ENTRY(bcopy) 379 MEXITCOUNT 380 jmp *_bcopy_vector 381 382ENTRY(ovbcopy) 383 MEXITCOUNT 384 jmp *_ovbcopy_vector 385 386/* 387 * generic_bcopy(src, dst, cnt) 388 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 389 */ 390ENTRY(generic_bcopy) 391 pushl %esi 392 pushl %edi 393 movl 12(%esp),%esi 394 movl 16(%esp),%edi 395 movl 20(%esp),%ecx 396 397 movl %edi,%eax 398 subl %esi,%eax 399 cmpl %ecx,%eax /* overlapping && src < dst? */ 400 jb 1f 401 402 shrl $2,%ecx /* copy by 32-bit words */ 403 cld /* nope, copy forwards */ 404 rep 405 movsl 406 movl 20(%esp),%ecx 407 andl $3,%ecx /* any bytes left? */ 408 rep 409 movsb 410 popl %edi 411 popl %esi 412 ret 413 414 ALIGN_TEXT 4151: 416 addl %ecx,%edi /* copy backwards */ 417 addl %ecx,%esi 418 decl %edi 419 decl %esi 420 andl $3,%ecx /* any fractional bytes? */ 421 std 422 rep 423 movsb 424 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 425 shrl $2,%ecx 426 subl $3,%esi 427 subl $3,%edi 428 rep 429 movsl 430 popl %edi 431 popl %esi 432 cld 433 ret 434 435#ifdef I586_CPU 436ENTRY(i586_bcopy) 437 pushl %esi 438 pushl %edi 439 movl 12(%esp),%esi 440 movl 16(%esp),%edi 441 movl 20(%esp),%ecx 442 443 movl %edi,%eax 444 subl %esi,%eax 445 cmpl %ecx,%eax /* overlapping && src < dst? */ 446 jb 1f 447 448 cmpl $1024,%ecx 449 jb small_i586_bcopy 450 451 sarb $1,kernel_fpu_lock 452 jc small_i586_bcopy 453 cmpl $0,_npxproc 454 je i586_bc1 455 smsw %dx 456 clts 457 subl $108,%esp 458 fnsave 0(%esp) 459 jmp 4f 460 461i586_bc1: 462 smsw %dx 463 clts 464 fninit /* XXX should avoid needing this */ 465 466 ALIGN_TEXT 4674: 468 pushl %ecx 469#define DCACHE_SIZE 8192 470 cmpl $(DCACHE_SIZE-512)/2,%ecx 471 jbe 2f 472 movl $(DCACHE_SIZE-512)/2,%ecx 4732: 474 subl %ecx,0(%esp) 475 cmpl $256,%ecx 476 jb 5f /* XXX should prefetch if %ecx >= 32 */ 477 pushl %esi 478 pushl %ecx 479 ALIGN_TEXT 4803: 481 movl 0(%esi),%eax 482 movl 32(%esi),%eax 483 movl 64(%esi),%eax 484 movl 96(%esi),%eax 485 movl 128(%esi),%eax 486 movl 160(%esi),%eax 487 movl 192(%esi),%eax 488 movl 224(%esi),%eax 489 addl $256,%esi 490 subl $256,%ecx 491 cmpl $256,%ecx 492 jae 3b 493 popl %ecx 494 popl %esi 4955: 496 ALIGN_TEXT 497large_i586_bcopy_loop: 498 fildq 0(%esi) 499 fildq 8(%esi) 500 fildq 16(%esi) 501 fildq 24(%esi) 502 fildq 32(%esi) 503 fildq 40(%esi) 504 fildq 48(%esi) 505 fildq 56(%esi) 506 fistpq 56(%edi) 507 fistpq 48(%edi) 508 fistpq 40(%edi) 509 fistpq 32(%edi) 510 fistpq 24(%edi) 511 fistpq 16(%edi) 512 fistpq 8(%edi) 513 fistpq 0(%edi) 514 addl $64,%esi 515 addl $64,%edi 516 subl $64,%ecx 517 cmpl $64,%ecx 518 jae large_i586_bcopy_loop 519 popl %eax 520 addl %eax,%ecx 521 cmpl $64,%ecx 522 jae 4b 523 524 cmpl $0,_npxproc 525 je i586_bc2 526 frstor 0(%esp) 527 addl $108,%esp 528i586_bc2: 529 lmsw %dx 530 movb $0xfe,kernel_fpu_lock 531 532/* 533 * This is a duplicate of the main part of generic_bcopy. See the comments 534 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 535 * would mess up high resolution profiling. 536 */ 537 ALIGN_TEXT 538small_i586_bcopy: 539 shrl $2,%ecx 540 cld 541 rep 542 movsl 543 movl 20(%esp),%ecx 544 andl $3,%ecx 545 rep 546 movsb 547 popl %edi 548 popl %esi 549 ret 550 551 ALIGN_TEXT 5521: 553 addl %ecx,%edi 554 addl %ecx,%esi 555 decl %edi 556 decl %esi 557 andl $3,%ecx 558 std 559 rep 560 movsb 561 movl 20(%esp),%ecx 562 shrl $2,%ecx 563 subl $3,%esi 564 subl $3,%edi 565 rep 566 movsl 567 popl %edi 568 popl %esi 569 cld 570 ret 571#endif /* I586_CPU */ 572 573/* 574 * Note: memcpy does not support overlapping copies 575 */ 576ENTRY(memcpy) 577 pushl %edi 578 pushl %esi 579 movl 12(%esp),%edi 580 movl 16(%esp),%esi 581 movl 20(%esp),%ecx 582 movl %edi,%eax 583 shrl $2,%ecx /* copy by 32-bit words */ 584 cld /* nope, copy forwards */ 585 rep 586 movsl 587 movl 20(%esp),%ecx 588 andl $3,%ecx /* any bytes left? */ 589 rep 590 movsb 591 popl %esi 592 popl %edi 593 ret 594 595 596/*****************************************************************************/ 597/* copyout and fubyte family */ 598/*****************************************************************************/ 599/* 600 * Access user memory from inside the kernel. These routines and possibly 601 * the math- and DOS emulators should be the only places that do this. 602 * 603 * We have to access the memory with user's permissions, so use a segment 604 * selector with RPL 3. For writes to user space we have to additionally 605 * check the PTE for write permission, because the 386 does not check 606 * write permissions when we are executing with EPL 0. The 486 does check 607 * this if the WP bit is set in CR0, so we can use a simpler version here. 608 * 609 * These routines set curpcb->onfault for the time they execute. When a 610 * protection violation occurs inside the functions, the trap handler 611 * returns to *curpcb->onfault instead of the function. 612 */ 613 614/* copyout(from_kernel, to_user, len) */ 615ENTRY(copyout) 616 MEXITCOUNT 617 jmp *_copyout_vector 618 619ENTRY(generic_copyout) 620 movl _curpcb,%eax 621 movl $copyout_fault,PCB_ONFAULT(%eax) 622 pushl %esi 623 pushl %edi 624 pushl %ebx 625 movl 16(%esp),%esi 626 movl 20(%esp),%edi 627 movl 24(%esp),%ebx 628 testl %ebx,%ebx /* anything to do? */ 629 jz done_copyout 630 631 /* 632 * Check explicitly for non-user addresses. If 486 write protection 633 * is being used, this check is essential because we are in kernel 634 * mode so the h/w does not provide any protection against writing 635 * kernel addresses. 636 */ 637 638 /* 639 * First, prevent address wrapping. 640 */ 641 movl %edi,%eax 642 addl %ebx,%eax 643 jc copyout_fault 644/* 645 * XXX STOP USING VM_MAXUSER_ADDRESS. 646 * It is an end address, not a max, so every time it is used correctly it 647 * looks like there is an off by one error, and of course it caused an off 648 * by one error in several places. 649 */ 650 cmpl $VM_MAXUSER_ADDRESS,%eax 651 ja copyout_fault 652 653#if defined(I386_CPU) 654 655#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 656 cmpl $CPUCLASS_386,_cpu_class 657 jne 3f 658#endif 659/* 660 * We have to check each PTE for user write permission. 661 * The checking may cause a page fault, so it is important to set 662 * up everything for return via copyout_fault before here. 663 */ 664 /* compute number of pages */ 665 movl %edi,%ecx 666 andl $PAGE_MASK,%ecx 667 addl %ebx,%ecx 668 decl %ecx 669 shrl $IDXSHIFT+2,%ecx 670 incl %ecx 671 672 /* compute PTE offset for start address */ 673 movl %edi,%edx 674 shrl $IDXSHIFT,%edx 675 andb $0xfc,%dl 676 677 1: /* check PTE for each page */ 678 leal _PTmap(%edx),%eax 679 shrl $IDXSHIFT,%eax 680 andb $0xfc,%al 681 testb $0x01,_PTmap(%eax) /* PTE Page must be VALID */ 682 je 4f 683 movb _PTmap(%edx),%al 684 andb $0x07,%al /* Pages must be VALID + USERACC + WRITABLE */ 685 cmpb $0x07,%al 686 je 2f 687 6884: /* simulate a trap */ 689 pushl %edx 690 pushl %ecx 691 shll $IDXSHIFT,%edx 692 pushl %edx 693 call _trapwrite /* trapwrite(addr) */ 694 popl %edx 695 popl %ecx 696 popl %edx 697 698 testl %eax,%eax /* if not ok, return EFAULT */ 699 jnz copyout_fault 700 7012: 702 addl $4,%edx 703 decl %ecx 704 jnz 1b /* check next page */ 705#endif /* I386_CPU */ 706 707 /* bcopy(%esi, %edi, %ebx) */ 7083: 709 movl %ebx,%ecx 710 711#ifdef I586_CPU 712 ALIGN_TEXT 713slow_copyout: 714#endif 715 shrl $2,%ecx 716 cld 717 rep 718 movsl 719 movb %bl,%cl 720 andb $3,%cl 721 rep 722 movsb 723 724done_copyout: 725 popl %ebx 726 popl %edi 727 popl %esi 728 xorl %eax,%eax 729 movl _curpcb,%edx 730 movl %eax,PCB_ONFAULT(%edx) 731 ret 732 733 ALIGN_TEXT 734copyout_fault: 735 popl %ebx 736 popl %edi 737 popl %esi 738 movl _curpcb,%edx 739 movl $0,PCB_ONFAULT(%edx) 740 movl $EFAULT,%eax 741 ret 742 743#ifdef I586_CPU 744ENTRY(i586_copyout) 745 /* 746 * Duplicated from generic_copyout. Could be done a bit better. 747 */ 748 movl _curpcb,%eax 749 movl $copyout_fault,PCB_ONFAULT(%eax) 750 pushl %esi 751 pushl %edi 752 pushl %ebx 753 movl 16(%esp),%esi 754 movl 20(%esp),%edi 755 movl 24(%esp),%ebx 756 testl %ebx,%ebx /* anything to do? */ 757 jz done_copyout 758 759 /* 760 * Check explicitly for non-user addresses. If 486 write protection 761 * is being used, this check is essential because we are in kernel 762 * mode so the h/w does not provide any protection against writing 763 * kernel addresses. 764 */ 765 766 /* 767 * First, prevent address wrapping. 768 */ 769 movl %edi,%eax 770 addl %ebx,%eax 771 jc copyout_fault 772/* 773 * XXX STOP USING VM_MAXUSER_ADDRESS. 774 * It is an end address, not a max, so every time it is used correctly it 775 * looks like there is an off by one error, and of course it caused an off 776 * by one error in several places. 777 */ 778 cmpl $VM_MAXUSER_ADDRESS,%eax 779 ja copyout_fault 780 781 /* bcopy(%esi, %edi, %ebx) */ 7823: 783 movl %ebx,%ecx 784 /* 785 * End of duplicated code. 786 */ 787 788 cmpl $1024,%ecx 789 jb slow_copyout 790 791 pushl %ecx 792 call _fastmove 793 addl $4,%esp 794 jmp done_copyout 795#endif /* I586_CPU */ 796 797/* copyin(from_user, to_kernel, len) */ 798ENTRY(copyin) 799 MEXITCOUNT 800 jmp *_copyin_vector 801 802ENTRY(generic_copyin) 803 movl _curpcb,%eax 804 movl $copyin_fault,PCB_ONFAULT(%eax) 805 pushl %esi 806 pushl %edi 807 movl 12(%esp),%esi /* caddr_t from */ 808 movl 16(%esp),%edi /* caddr_t to */ 809 movl 20(%esp),%ecx /* size_t len */ 810 811 /* 812 * make sure address is valid 813 */ 814 movl %esi,%edx 815 addl %ecx,%edx 816 jc copyin_fault 817 cmpl $VM_MAXUSER_ADDRESS,%edx 818 ja copyin_fault 819 820#ifdef I586_CPU 821 ALIGN_TEXT 822slow_copyin: 823#endif 824 movb %cl,%al 825 shrl $2,%ecx /* copy longword-wise */ 826 cld 827 rep 828 movsl 829 movb %al,%cl 830 andb $3,%cl /* copy remaining bytes */ 831 rep 832 movsb 833 834#if defined(I586_CPU) 835 ALIGN_TEXT 836done_copyin: 837#endif /* I586_CPU */ 838 popl %edi 839 popl %esi 840 xorl %eax,%eax 841 movl _curpcb,%edx 842 movl %eax,PCB_ONFAULT(%edx) 843 ret 844 845 ALIGN_TEXT 846copyin_fault: 847 popl %edi 848 popl %esi 849 movl _curpcb,%edx 850 movl $0,PCB_ONFAULT(%edx) 851 movl $EFAULT,%eax 852 ret 853 854#ifdef I586_CPU 855ENTRY(i586_copyin) 856 /* 857 * Duplicated from generic_copyin. Could be done a bit better. 858 */ 859 movl _curpcb,%eax 860 movl $copyin_fault,PCB_ONFAULT(%eax) 861 pushl %esi 862 pushl %edi 863 movl 12(%esp),%esi /* caddr_t from */ 864 movl 16(%esp),%edi /* caddr_t to */ 865 movl 20(%esp),%ecx /* size_t len */ 866 867 /* 868 * make sure address is valid 869 */ 870 movl %esi,%edx 871 addl %ecx,%edx 872 jc copyin_fault 873 cmpl $VM_MAXUSER_ADDRESS,%edx 874 ja copyin_fault 875 /* 876 * End of duplicated code. 877 */ 878 879 cmpl $1024,%ecx 880 jb slow_copyin 881 882 pushl %ebx /* XXX prepare for fastmove_fault */ 883 pushl %ecx 884 call _fastmove 885 addl $8,%esp 886 jmp done_copyin 887#endif /* I586_CPU */ 888 889#if defined(I586_CPU) 890/* fastmove(src, dst, len) 891 src in %esi 892 dst in %edi 893 len in %ecx XXX changed to on stack for profiling 894 uses %eax and %edx for tmp. storage 895 */ 896/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 897ENTRY(fastmove) 898 pushl %ebp 899 movl %esp,%ebp 900 subl $PCB_SAVEFPU_SIZE+3*4,%esp 901 902 movl 8(%ebp),%ecx 903 cmpl $63,%ecx 904 jbe fastmove_tail 905 906 testl $7,%esi /* check if src addr is multiple of 8 */ 907 jnz fastmove_tail 908 909 testl $7,%edi /* check if dst addr is multiple of 8 */ 910 jnz fastmove_tail 911 912/* if (npxproc != NULL) { */ 913 cmpl $0,_npxproc 914 je 6f 915/* fnsave(&curpcb->pcb_savefpu); */ 916 movl _curpcb,%eax 917 fnsave PCB_SAVEFPU(%eax) 918/* npxproc = NULL; */ 919 movl $0,_npxproc 920/* } */ 9216: 922/* now we own the FPU. */ 923 924/* 925 * The process' FP state is saved in the pcb, but if we get 926 * switched, the cpu_switch() will store our FP state in the 927 * pcb. It should be possible to avoid all the copying for 928 * this, e.g., by setting a flag to tell cpu_switch() to 929 * save the state somewhere else. 930 */ 931/* tmp = curpcb->pcb_savefpu; */ 932 movl %ecx,-12(%ebp) 933 movl %esi,-8(%ebp) 934 movl %edi,-4(%ebp) 935 movl %esp,%edi 936 movl _curpcb,%esi 937 addl $PCB_SAVEFPU,%esi 938 cld 939 movl $PCB_SAVEFPU_SIZE>>2,%ecx 940 rep 941 movsl 942 movl -12(%ebp),%ecx 943 movl -8(%ebp),%esi 944 movl -4(%ebp),%edi 945/* stop_emulating(); */ 946 clts 947/* npxproc = curproc; */ 948 movl _curproc,%eax 949 movl %eax,_npxproc 950 movl _curpcb,%eax 951 movl $fastmove_fault,PCB_ONFAULT(%eax) 9524: 953 movl %ecx,-12(%ebp) 954 cmpl $1792,%ecx 955 jbe 2f 956 movl $1792,%ecx 9572: 958 subl %ecx,-12(%ebp) 959 cmpl $256,%ecx 960 jb 5f 961 movl %ecx,-8(%ebp) 962 movl %esi,-4(%ebp) 963 ALIGN_TEXT 9643: 965 movl 0(%esi),%eax 966 movl 32(%esi),%eax 967 movl 64(%esi),%eax 968 movl 96(%esi),%eax 969 movl 128(%esi),%eax 970 movl 160(%esi),%eax 971 movl 192(%esi),%eax 972 movl 224(%esi),%eax 973 addl $256,%esi 974 subl $256,%ecx 975 cmpl $256,%ecx 976 jae 3b 977 movl -8(%ebp),%ecx 978 movl -4(%ebp),%esi 9795: 980 ALIGN_TEXT 981fastmove_loop: 982 fildq 0(%esi) 983 fildq 8(%esi) 984 fildq 16(%esi) 985 fildq 24(%esi) 986 fildq 32(%esi) 987 fildq 40(%esi) 988 fildq 48(%esi) 989 fildq 56(%esi) 990 fistpq 56(%edi) 991 fistpq 48(%edi) 992 fistpq 40(%edi) 993 fistpq 32(%edi) 994 fistpq 24(%edi) 995 fistpq 16(%edi) 996 fistpq 8(%edi) 997 fistpq 0(%edi) 998 addl $-64,%ecx 999 addl $64,%esi 1000 addl $64,%edi 1001 cmpl $63,%ecx 1002 ja fastmove_loop 1003 movl -12(%ebp),%eax 1004 addl %eax,%ecx 1005 cmpl $64,%ecx 1006 jae 4b 1007 1008/* curpcb->pcb_savefpu = tmp; */ 1009 movl %ecx,-12(%ebp) 1010 movl %esi,-8(%ebp) 1011 movl %edi,-4(%ebp) 1012 movl _curpcb,%edi 1013 addl $PCB_SAVEFPU,%edi 1014 movl %esp,%esi 1015 cld 1016 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1017 rep 1018 movsl 1019 movl -12(%ebp),%ecx 1020 movl -8(%ebp),%esi 1021 movl -4(%ebp),%edi 1022 1023/* start_emulating(); */ 1024 smsw %ax 1025 orb $CR0_TS,%al 1026 lmsw %ax 1027/* npxproc = NULL; */ 1028 movl $0,_npxproc 1029 1030 ALIGN_TEXT 1031fastmove_tail: 1032 movl _curpcb,%eax 1033 movl $fastmove_tail_fault,PCB_ONFAULT(%eax) 1034 1035 movb %cl,%al 1036 shrl $2,%ecx /* copy longword-wise */ 1037 cld 1038 rep 1039 movsl 1040 movb %al,%cl 1041 andb $3,%cl /* copy remaining bytes */ 1042 rep 1043 movsb 1044 1045 movl %ebp,%esp 1046 popl %ebp 1047 ret 1048 1049 ALIGN_TEXT 1050fastmove_fault: 1051 movl _curpcb,%edi 1052 addl $PCB_SAVEFPU,%edi 1053 movl %esp,%esi 1054 cld 1055 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1056 rep 1057 movsl 1058 1059 smsw %ax 1060 orb $CR0_TS,%al 1061 lmsw %ax 1062 movl $0,_npxproc 1063 1064fastmove_tail_fault: 1065 movl %ebp,%esp 1066 popl %ebp 1067 addl $8,%esp 1068 popl %ebx 1069 popl %edi 1070 popl %esi 1071 movl _curpcb,%edx 1072 movl $0,PCB_ONFAULT(%edx) 1073 movl $EFAULT,%eax 1074 ret 1075#endif /* I586_CPU */ 1076 1077/* 1078 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory 1079 */ 1080ENTRY(fuword) 1081 movl _curpcb,%ecx 1082 movl $fusufault,PCB_ONFAULT(%ecx) 1083 movl 4(%esp),%edx /* from */ 1084 1085 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 1086 ja fusufault 1087 1088 movl (%edx),%eax 1089 movl $0,PCB_ONFAULT(%ecx) 1090 ret 1091 1092/* 1093 * These two routines are called from the profiling code, potentially 1094 * at interrupt time. If they fail, that's okay, good things will 1095 * happen later. Fail all the time for now - until the trap code is 1096 * able to deal with this. 1097 */ 1098ALTENTRY(suswintr) 1099ENTRY(fuswintr) 1100 movl $-1,%eax 1101 ret 1102 1103ENTRY(fusword) 1104 movl _curpcb,%ecx 1105 movl $fusufault,PCB_ONFAULT(%ecx) 1106 movl 4(%esp),%edx 1107 1108 cmpl $VM_MAXUSER_ADDRESS-2,%edx 1109 ja fusufault 1110 1111 movzwl (%edx),%eax 1112 movl $0,PCB_ONFAULT(%ecx) 1113 ret 1114 1115ENTRY(fubyte) 1116 movl _curpcb,%ecx 1117 movl $fusufault,PCB_ONFAULT(%ecx) 1118 movl 4(%esp),%edx 1119 1120 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1121 ja fusufault 1122 1123 movzbl (%edx),%eax 1124 movl $0,PCB_ONFAULT(%ecx) 1125 ret 1126 1127 ALIGN_TEXT 1128fusufault: 1129 movl _curpcb,%ecx 1130 xorl %eax,%eax 1131 movl %eax,PCB_ONFAULT(%ecx) 1132 decl %eax 1133 ret 1134 1135/* 1136 * su{byte,sword,word}: write a byte (word, longword) to user memory 1137 */ 1138ENTRY(suword) 1139 movl _curpcb,%ecx 1140 movl $fusufault,PCB_ONFAULT(%ecx) 1141 movl 4(%esp),%edx 1142 1143#if defined(I386_CPU) 1144 1145#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1146 cmpl $CPUCLASS_386,_cpu_class 1147 jne 2f /* we only have to set the right segment selector */ 1148#endif /* I486_CPU || I586_CPU || I686_CPU */ 1149 1150 /* XXX - page boundary crossing is still not handled */ 1151 movl %edx,%eax 1152 shrl $IDXSHIFT,%edx 1153 andb $0xfc,%dl 1154 1155 leal _PTmap(%edx),%ecx 1156 shrl $IDXSHIFT,%ecx 1157 andb $0xfc,%cl 1158 testb $0x01,_PTmap(%ecx) /* PTE Page must be VALID */ 1159 je 4f 1160 movb _PTmap(%edx),%dl 1161 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1162 cmpb $0x7,%dl 1163 je 1f 1164 1165 /* simulate a trap */ 11664: pushl %eax 1167 call _trapwrite 1168 popl %edx /* remove junk parameter from stack */ 1169 testl %eax,%eax 1170 jnz fusufault 11711: 1172 movl 4(%esp),%edx 1173#endif 1174 11752: 1176 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1177 ja fusufault 1178 1179 movl 8(%esp),%eax 1180 movl %eax,(%edx) 1181 xorl %eax,%eax 1182 movl _curpcb,%ecx 1183 movl %eax,PCB_ONFAULT(%ecx) 1184 ret 1185 1186ENTRY(susword) 1187 movl _curpcb,%ecx 1188 movl $fusufault,PCB_ONFAULT(%ecx) 1189 movl 4(%esp),%edx 1190 1191#if defined(I386_CPU) 1192 1193#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1194 cmpl $CPUCLASS_386,_cpu_class 1195 jne 2f 1196#endif /* I486_CPU || I586_CPU || I686_CPU */ 1197 1198 /* XXX - page boundary crossing is still not handled */ 1199 movl %edx,%eax 1200 shrl $IDXSHIFT,%edx 1201 andb $0xfc,%dl 1202 1203 leal _PTmap(%edx),%ecx 1204 shrl $IDXSHIFT,%ecx 1205 andb $0xfc,%cl 1206 testb $0x01,_PTmap(%ecx) /* PTE Page must be VALID */ 1207 je 4f 1208 movb _PTmap(%edx),%dl 1209 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1210 cmpb $0x7,%dl 1211 je 1f 1212 12134: /* simulate a trap */ 1214 pushl %eax 1215 call _trapwrite 1216 popl %edx /* remove junk parameter from stack */ 1217 testl %eax,%eax 1218 jnz fusufault 12191: 1220 movl 4(%esp),%edx 1221#endif 1222 12232: 1224 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1225 ja fusufault 1226 1227 movw 8(%esp),%ax 1228 movw %ax,(%edx) 1229 xorl %eax,%eax 1230 movl _curpcb,%ecx /* restore trashed register */ 1231 movl %eax,PCB_ONFAULT(%ecx) 1232 ret 1233 1234ALTENTRY(suibyte) 1235ENTRY(subyte) 1236 movl _curpcb,%ecx 1237 movl $fusufault,PCB_ONFAULT(%ecx) 1238 movl 4(%esp),%edx 1239 1240#if defined(I386_CPU) 1241 1242#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1243 cmpl $CPUCLASS_386,_cpu_class 1244 jne 2f 1245#endif /* I486_CPU || I586_CPU || I686_CPU */ 1246 1247 movl %edx,%eax 1248 shrl $IDXSHIFT,%edx 1249 andb $0xfc,%dl 1250 1251 leal _PTmap(%edx),%ecx 1252 shrl $IDXSHIFT,%ecx 1253 andb $0xfc,%cl 1254 testb $0x01,_PTmap(%ecx) /* PTE Page must be VALID */ 1255 je 4f 1256 1257 movb _PTmap(%edx),%dl 1258 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1259 cmpb $0x7,%dl 1260 je 1f 1261 12624: /* simulate a trap */ 1263 pushl %eax 1264 call _trapwrite 1265 popl %edx /* remove junk parameter from stack */ 1266 testl %eax,%eax 1267 jnz fusufault 12681: 1269 movl 4(%esp),%edx 1270#endif 1271 12722: 1273 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1274 ja fusufault 1275 1276 movb 8(%esp),%al 1277 movb %al,(%edx) 1278 xorl %eax,%eax 1279 movl _curpcb,%ecx /* restore trashed register */ 1280 movl %eax,PCB_ONFAULT(%ecx) 1281 ret 1282 1283/* 1284 * copyinstr(from, to, maxlen, int *lencopied) 1285 * copy a string from from to to, stop when a 0 character is reached. 1286 * return ENAMETOOLONG if string is longer than maxlen, and 1287 * EFAULT on protection violations. If lencopied is non-zero, 1288 * return the actual length in *lencopied. 1289 */ 1290ENTRY(copyinstr) 1291 pushl %esi 1292 pushl %edi 1293 movl _curpcb,%ecx 1294 movl $cpystrflt,PCB_ONFAULT(%ecx) 1295 1296 movl 12(%esp),%esi /* %esi = from */ 1297 movl 16(%esp),%edi /* %edi = to */ 1298 movl 20(%esp),%edx /* %edx = maxlen */ 1299 1300 movl $VM_MAXUSER_ADDRESS,%eax 1301 1302 /* make sure 'from' is within bounds */ 1303 subl %esi,%eax 1304 jbe cpystrflt 1305 1306 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1307 cmpl %edx,%eax 1308 jae 1f 1309 movl %eax,%edx 1310 movl %eax,20(%esp) 13111: 1312 incl %edx 1313 cld 1314 13152: 1316 decl %edx 1317 jz 3f 1318 1319 lodsb 1320 stosb 1321 orb %al,%al 1322 jnz 2b 1323 1324 /* Success -- 0 byte reached */ 1325 decl %edx 1326 xorl %eax,%eax 1327 jmp cpystrflt_x 13283: 1329 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1330 cmpl $VM_MAXUSER_ADDRESS,%esi 1331 jae cpystrflt 13324: 1333 movl $ENAMETOOLONG,%eax 1334 jmp cpystrflt_x 1335 1336cpystrflt: 1337 movl $EFAULT,%eax 1338 1339cpystrflt_x: 1340 /* set *lencopied and return %eax */ 1341 movl _curpcb,%ecx 1342 movl $0,PCB_ONFAULT(%ecx) 1343 movl 20(%esp),%ecx 1344 subl %edx,%ecx 1345 movl 24(%esp),%edx 1346 testl %edx,%edx 1347 jz 1f 1348 movl %ecx,(%edx) 13491: 1350 popl %edi 1351 popl %esi 1352 ret 1353 1354 1355/* 1356 * copystr(from, to, maxlen, int *lencopied) 1357 */ 1358ENTRY(copystr) 1359 pushl %esi 1360 pushl %edi 1361 1362 movl 12(%esp),%esi /* %esi = from */ 1363 movl 16(%esp),%edi /* %edi = to */ 1364 movl 20(%esp),%edx /* %edx = maxlen */ 1365 incl %edx 1366 cld 13671: 1368 decl %edx 1369 jz 4f 1370 lodsb 1371 stosb 1372 orb %al,%al 1373 jnz 1b 1374 1375 /* Success -- 0 byte reached */ 1376 decl %edx 1377 xorl %eax,%eax 1378 jmp 6f 13794: 1380 /* edx is zero -- return ENAMETOOLONG */ 1381 movl $ENAMETOOLONG,%eax 1382 13836: 1384 /* set *lencopied and return %eax */ 1385 movl 20(%esp),%ecx 1386 subl %edx,%ecx 1387 movl 24(%esp),%edx 1388 testl %edx,%edx 1389 jz 7f 1390 movl %ecx,(%edx) 13917: 1392 popl %edi 1393 popl %esi 1394 ret 1395 1396ENTRY(bcmp) 1397 pushl %edi 1398 pushl %esi 1399 movl 12(%esp),%edi 1400 movl 16(%esp),%esi 1401 movl 20(%esp),%edx 1402 xorl %eax,%eax 1403 1404 movl %edx,%ecx 1405 shrl $2,%ecx 1406 cld /* compare forwards */ 1407 repe 1408 cmpsl 1409 jne 1f 1410 1411 movl %edx,%ecx 1412 andl $3,%ecx 1413 repe 1414 cmpsb 1415 je 2f 14161: 1417 incl %eax 14182: 1419 popl %esi 1420 popl %edi 1421 ret 1422 1423 1424/* 1425 * Handling of special 386 registers and descriptor tables etc 1426 */ 1427/* void lgdt(struct region_descriptor *rdp); */ 1428ENTRY(lgdt) 1429 /* reload the descriptor table */ 1430 movl 4(%esp),%eax 1431 lgdt (%eax) 1432 1433 /* flush the prefetch q */ 1434 jmp 1f 1435 nop 14361: 1437 /* reload "stale" selectors */ 1438 movl $KDSEL,%eax 1439 movl %ax,%ds 1440 movl %ax,%es 1441 movl %ax,%ss 1442 1443 /* reload code selector by turning return into intersegmental return */ 1444 movl (%esp),%eax 1445 pushl %eax 1446# movl $KCSEL,4(%esp) 1447 movl $8,4(%esp) 1448 lret 1449 1450/* 1451 * void lidt(struct region_descriptor *rdp); 1452 */ 1453ENTRY(lidt) 1454 movl 4(%esp),%eax 1455 lidt (%eax) 1456 ret 1457 1458/* 1459 * void lldt(u_short sel) 1460 */ 1461ENTRY(lldt) 1462 lldt 4(%esp) 1463 ret 1464 1465/* 1466 * void ltr(u_short sel) 1467 */ 1468ENTRY(ltr) 1469 ltr 4(%esp) 1470 ret 1471 1472/* ssdtosd(*ssdp,*sdp) */ 1473ENTRY(ssdtosd) 1474 pushl %ebx 1475 movl 8(%esp),%ecx 1476 movl 8(%ecx),%ebx 1477 shll $16,%ebx 1478 movl (%ecx),%edx 1479 roll $16,%edx 1480 movb %dh,%bl 1481 movb %dl,%bh 1482 rorl $8,%ebx 1483 movl 4(%ecx),%eax 1484 movw %ax,%dx 1485 andl $0xf0000,%eax 1486 orl %eax,%ebx 1487 movl 12(%esp),%ecx 1488 movl %edx,(%ecx) 1489 movl %ebx,4(%ecx) 1490 popl %ebx 1491 ret 1492 1493/* load_cr0(cr0) */ 1494ENTRY(load_cr0) 1495 movl 4(%esp),%eax 1496 movl %eax,%cr0 1497 ret 1498 1499/* rcr0() */ 1500ENTRY(rcr0) 1501 movl %cr0,%eax 1502 ret 1503 1504/* rcr3() */ 1505ENTRY(rcr3) 1506 movl %cr3,%eax 1507 ret 1508 1509/* void load_cr3(caddr_t cr3) */ 1510ENTRY(load_cr3) 1511 movl 4(%esp),%eax 1512 movl %eax,%cr3 1513 ret 1514 1515 1516/*****************************************************************************/ 1517/* setjump, longjump */ 1518/*****************************************************************************/ 1519 1520ENTRY(setjmp) 1521 movl 4(%esp),%eax 1522 movl %ebx,(%eax) /* save ebx */ 1523 movl %esp,4(%eax) /* save esp */ 1524 movl %ebp,8(%eax) /* save ebp */ 1525 movl %esi,12(%eax) /* save esi */ 1526 movl %edi,16(%eax) /* save edi */ 1527 movl (%esp),%edx /* get rta */ 1528 movl %edx,20(%eax) /* save eip */ 1529 xorl %eax,%eax /* return(0); */ 1530 ret 1531 1532ENTRY(longjmp) 1533 movl 4(%esp),%eax 1534 movl (%eax),%ebx /* restore ebx */ 1535 movl 4(%eax),%esp /* restore esp */ 1536 movl 8(%eax),%ebp /* restore ebp */ 1537 movl 12(%eax),%esi /* restore esi */ 1538 movl 16(%eax),%edi /* restore edi */ 1539 movl 20(%eax),%edx /* get rta */ 1540 movl %edx,(%esp) /* put in return frame */ 1541 xorl %eax,%eax /* return(1); */ 1542 incl %eax 1543 ret 1544 1545/* 1546 * Here for doing BB-profiling (gcc -a). 1547 * We rely on the "bbset" instead, but need a dummy function. 1548 */ 1549NON_GPROF_ENTRY(__bb_init_func) 1550 movl 4(%esp),%eax 1551 movl $1,(%eax) 1552 .byte 0xc3 /* avoid macro for `ret' */ 1553