support.s revision 26267
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $Id: support.s,v 1.52 1997/04/26 11:45:21 peter Exp $ 34 */ 35 36#include "npx.h" 37#include "opt_cpu.h" 38 39#include <machine/asmacros.h> 40#include <machine/cputypes.h> 41#include <machine/pmap.h> 42#include <machine/specialreg.h> 43#include <machine/smpasm.h> 44 45#include "assym.s" 46 47#define KDSEL 0x10 /* kernel data selector */ 48#define KCSEL 0x8 /* kernel code selector */ 49#define IDXSHIFT 10 50 51 .data 52 .globl _bcopy_vector 53_bcopy_vector: 54 .long _generic_bcopy 55 .globl _bzero 56_bzero: 57 .long _generic_bzero 58 .globl _copyin_vector 59_copyin_vector: 60 .long _generic_copyin 61 .globl _copyout_vector 62_copyout_vector: 63 .long _generic_copyout 64 .globl _ovbcopy_vector 65_ovbcopy_vector: 66 .long _generic_bcopy 67#if defined(I586_CPU) && NNPX > 0 68kernel_fpu_lock: 69 .byte 0xfe 70 .space 3 71#endif 72 73 .text 74 75/* 76 * bcopy family 77 * void bzero(void *buf, u_int len) 78 */ 79 80ENTRY(generic_bzero) 81 pushl %edi 82 movl 8(%esp),%edi 83 movl 12(%esp),%ecx 84 xorl %eax,%eax 85 shrl $2,%ecx 86 cld 87 rep 88 stosl 89 movl 12(%esp),%ecx 90 andl $3,%ecx 91 rep 92 stosb 93 popl %edi 94 ret 95 96#if defined(I486_CPU) 97ENTRY(i486_bzero) 98 movl 4(%esp),%edx 99 movl 8(%esp),%ecx 100 xorl %eax,%eax 101/* 102 * do 64 byte chunks first 103 * 104 * XXX this is probably over-unrolled at least for DX2's 105 */ 1062: 107 cmpl $64,%ecx 108 jb 3f 109 movl %eax,(%edx) 110 movl %eax,4(%edx) 111 movl %eax,8(%edx) 112 movl %eax,12(%edx) 113 movl %eax,16(%edx) 114 movl %eax,20(%edx) 115 movl %eax,24(%edx) 116 movl %eax,28(%edx) 117 movl %eax,32(%edx) 118 movl %eax,36(%edx) 119 movl %eax,40(%edx) 120 movl %eax,44(%edx) 121 movl %eax,48(%edx) 122 movl %eax,52(%edx) 123 movl %eax,56(%edx) 124 movl %eax,60(%edx) 125 addl $64,%edx 126 subl $64,%ecx 127 jnz 2b 128 ret 129 130/* 131 * do 16 byte chunks 132 */ 133 SUPERALIGN_TEXT 1343: 135 cmpl $16,%ecx 136 jb 4f 137 movl %eax,(%edx) 138 movl %eax,4(%edx) 139 movl %eax,8(%edx) 140 movl %eax,12(%edx) 141 addl $16,%edx 142 subl $16,%ecx 143 jnz 3b 144 ret 145 146/* 147 * do 4 byte chunks 148 */ 149 SUPERALIGN_TEXT 1504: 151 cmpl $4,%ecx 152 jb 5f 153 movl %eax,(%edx) 154 addl $4,%edx 155 subl $4,%ecx 156 jnz 4b 157 ret 158 159/* 160 * do 1 byte chunks 161 * a jump table seems to be faster than a loop or more range reductions 162 * 163 * XXX need a const section for non-text 164 */ 165 .data 166jtab: 167 .long do0 168 .long do1 169 .long do2 170 .long do3 171 172 .text 173 SUPERALIGN_TEXT 1745: 175 jmp jtab(,%ecx,4) 176 177 SUPERALIGN_TEXT 178do3: 179 movw %ax,(%edx) 180 movb %al,2(%edx) 181 ret 182 183 SUPERALIGN_TEXT 184do2: 185 movw %ax,(%edx) 186 ret 187 188 SUPERALIGN_TEXT 189do1: 190 movb %al,(%edx) 191 ret 192 193 SUPERALIGN_TEXT 194do0: 195 ret 196#endif 197 198#if defined(I586_CPU) && NNPX > 0 && !defined(SMP) 199ENTRY(i586_bzero) 200 movl 4(%esp),%edx 201 movl 8(%esp),%ecx 202 203 /* 204 * The FPU register method is twice as fast as the integer register 205 * method unless the target is in the L1 cache and we pre-allocate a 206 * cache line for it (then the integer register method is 4-5 times 207 * faster). However, we never pre-allocate cache lines, since that 208 * would make the integer method 25% or more slower for the common 209 * case when the target isn't in either the L1 cache or the L2 cache. 210 * Thus we normally use the FPU register method unless the overhead 211 * would be too large. 212 */ 213 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 214 jb intreg_i586_bzero 215 216 /* 217 * The FPU registers may belong to an application or to fastmove() 218 * or to another invocation of bcopy() or ourself in a higher level 219 * interrupt or trap handler. Preserving the registers is 220 * complicated since we avoid it if possible at all levels. We 221 * want to localize the complications even when that increases them. 222 * Here the extra work involves preserving CR0_TS in TS. 223 * `npxproc != NULL' is supposed to be the condition that all the 224 * FPU resources belong to an application, but npxproc and CR0_TS 225 * aren't set atomically enough for this condition to work in 226 * interrupt handlers. 227 * 228 * Case 1: FPU registers belong to the application: we must preserve 229 * the registers if we use them, so we only use the FPU register 230 * method if the target size is large enough to amortize the extra 231 * overhead for preserving them. CR0_TS must be preserved although 232 * it is very likely to end up as set. 233 * 234 * Case 2: FPU registers belong to fastmove(): fastmove() currently 235 * makes the registers look like they belong to an application so 236 * that cpu_switch() and savectx() don't have to know about it, so 237 * this case reduces to case 1. 238 * 239 * Case 3: FPU registers belong to the kernel: don't use the FPU 240 * register method. This case is unlikely, and supporting it would 241 * be more complicated and might take too much stack. 242 * 243 * Case 4: FPU registers don't belong to anyone: the FPU registers 244 * don't need to be preserved, so we always use the FPU register 245 * method. CR0_TS must be preserved although it is very likely to 246 * always end up as clear. 247 */ 248 cmpl $0,_npxproc 249 je i586_bz1 250 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 251 jb intreg_i586_bzero 252 sarb $1,kernel_fpu_lock 253 jc intreg_i586_bzero 254 smsw %ax 255 clts 256 subl $108,%esp 257 fnsave 0(%esp) 258 jmp i586_bz2 259 260i586_bz1: 261 sarb $1,kernel_fpu_lock 262 jc intreg_i586_bzero 263 smsw %ax 264 clts 265 fninit /* XXX should avoid needing this */ 266i586_bz2: 267 fldz 268 269 /* 270 * Align to an 8 byte boundary (misalignment in the main loop would 271 * cost a factor of >= 2). Avoid jumps (at little cost if it is 272 * already aligned) by always zeroing 8 bytes and using the part up 273 * to the _next_ alignment position. 274 */ 275 fstl 0(%edx) 276 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 277 addl $8,%edx 278 andl $~7,%edx 279 subl %edx,%ecx 280 281 /* 282 * Similarly align `len' to a multiple of 8. 283 */ 284 fstl -8(%edx,%ecx) 285 decl %ecx 286 andl $~7,%ecx 287 288 /* 289 * This wouldn't be any faster if it were unrolled, since the loop 290 * control instructions are much faster than the fstl and/or done 291 * in parallel with it so their overhead is insignificant. 292 */ 293fpureg_i586_bzero_loop: 294 fstl 0(%edx) 295 addl $8,%edx 296 subl $8,%ecx 297 cmpl $8,%ecx 298 jae fpureg_i586_bzero_loop 299 300 cmpl $0,_npxproc 301 je i586_bz3 302 frstor 0(%esp) 303 addl $108,%esp 304 lmsw %ax 305 movb $0xfe,kernel_fpu_lock 306 ret 307 308i586_bz3: 309 fstpl %st(0) 310 lmsw %ax 311 movb $0xfe,kernel_fpu_lock 312 ret 313 314intreg_i586_bzero: 315 /* 316 * `rep stos' seems to be the best method in practice for small 317 * counts. Fancy methods usually take too long to start up due 318 * to cache and BTB misses. 319 */ 320 pushl %edi 321 movl %edx,%edi 322 xorl %eax,%eax 323 shrl $2,%ecx 324 cld 325 rep 326 stosl 327 movl 12(%esp),%ecx 328 andl $3,%ecx 329 jne 1f 330 popl %edi 331 ret 332 3331: 334 rep 335 stosb 336 popl %edi 337 ret 338#endif /* I586_CPU && NNPX > 0 */ 339 340/* fillw(pat, base, cnt) */ 341ENTRY(fillw) 342 pushl %edi 343 movl 8(%esp),%eax 344 movl 12(%esp),%edi 345 movl 16(%esp),%ecx 346 cld 347 rep 348 stosw 349 popl %edi 350 ret 351 352ENTRY(bcopyb) 353bcopyb: 354 pushl %esi 355 pushl %edi 356 movl 12(%esp),%esi 357 movl 16(%esp),%edi 358 movl 20(%esp),%ecx 359 movl %edi,%eax 360 subl %esi,%eax 361 cmpl %ecx,%eax /* overlapping && src < dst? */ 362 jb 1f 363 cld /* nope, copy forwards */ 364 rep 365 movsb 366 popl %edi 367 popl %esi 368 ret 369 370 ALIGN_TEXT 3711: 372 addl %ecx,%edi /* copy backwards. */ 373 addl %ecx,%esi 374 decl %edi 375 decl %esi 376 std 377 rep 378 movsb 379 popl %edi 380 popl %esi 381 cld 382 ret 383 384ENTRY(bcopy) 385 MEXITCOUNT 386 jmp *_bcopy_vector 387 388ENTRY(ovbcopy) 389 MEXITCOUNT 390 jmp *_ovbcopy_vector 391 392/* 393 * generic_bcopy(src, dst, cnt) 394 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 395 */ 396ENTRY(generic_bcopy) 397 pushl %esi 398 pushl %edi 399 movl 12(%esp),%esi 400 movl 16(%esp),%edi 401 movl 20(%esp),%ecx 402 403 movl %edi,%eax 404 subl %esi,%eax 405 cmpl %ecx,%eax /* overlapping && src < dst? */ 406 jb 1f 407 408 shrl $2,%ecx /* copy by 32-bit words */ 409 cld /* nope, copy forwards */ 410 rep 411 movsl 412 movl 20(%esp),%ecx 413 andl $3,%ecx /* any bytes left? */ 414 rep 415 movsb 416 popl %edi 417 popl %esi 418 ret 419 420 ALIGN_TEXT 4211: 422 addl %ecx,%edi /* copy backwards */ 423 addl %ecx,%esi 424 decl %edi 425 decl %esi 426 andl $3,%ecx /* any fractional bytes? */ 427 std 428 rep 429 movsb 430 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 431 shrl $2,%ecx 432 subl $3,%esi 433 subl $3,%edi 434 rep 435 movsl 436 popl %edi 437 popl %esi 438 cld 439 ret 440 441#if defined(I586_CPU) && NNPX > 0 && !defined(SMP) 442ENTRY(i586_bcopy) 443 pushl %esi 444 pushl %edi 445 movl 12(%esp),%esi 446 movl 16(%esp),%edi 447 movl 20(%esp),%ecx 448 449 movl %edi,%eax 450 subl %esi,%eax 451 cmpl %ecx,%eax /* overlapping && src < dst? */ 452 jb 1f 453 454 cmpl $1024,%ecx 455 jb small_i586_bcopy 456 457 sarb $1,kernel_fpu_lock 458 jc small_i586_bcopy 459 cmpl $0,_npxproc 460 je i586_bc1 461 smsw %dx 462 clts 463 subl $108,%esp 464 fnsave 0(%esp) 465 jmp 4f 466 467i586_bc1: 468 smsw %dx 469 clts 470 fninit /* XXX should avoid needing this */ 471 472 ALIGN_TEXT 4734: 474 pushl %ecx 475#define DCACHE_SIZE 8192 476 cmpl $(DCACHE_SIZE-512)/2,%ecx 477 jbe 2f 478 movl $(DCACHE_SIZE-512)/2,%ecx 4792: 480 subl %ecx,0(%esp) 481 cmpl $256,%ecx 482 jb 5f /* XXX should prefetch if %ecx >= 32 */ 483 pushl %esi 484 pushl %ecx 485 ALIGN_TEXT 4863: 487 movl 0(%esi),%eax 488 movl 32(%esi),%eax 489 movl 64(%esi),%eax 490 movl 96(%esi),%eax 491 movl 128(%esi),%eax 492 movl 160(%esi),%eax 493 movl 192(%esi),%eax 494 movl 224(%esi),%eax 495 addl $256,%esi 496 subl $256,%ecx 497 cmpl $256,%ecx 498 jae 3b 499 popl %ecx 500 popl %esi 5015: 502 ALIGN_TEXT 503large_i586_bcopy_loop: 504 fildq 0(%esi) 505 fildq 8(%esi) 506 fildq 16(%esi) 507 fildq 24(%esi) 508 fildq 32(%esi) 509 fildq 40(%esi) 510 fildq 48(%esi) 511 fildq 56(%esi) 512 fistpq 56(%edi) 513 fistpq 48(%edi) 514 fistpq 40(%edi) 515 fistpq 32(%edi) 516 fistpq 24(%edi) 517 fistpq 16(%edi) 518 fistpq 8(%edi) 519 fistpq 0(%edi) 520 addl $64,%esi 521 addl $64,%edi 522 subl $64,%ecx 523 cmpl $64,%ecx 524 jae large_i586_bcopy_loop 525 popl %eax 526 addl %eax,%ecx 527 cmpl $64,%ecx 528 jae 4b 529 530 cmpl $0,_npxproc 531 je i586_bc2 532 frstor 0(%esp) 533 addl $108,%esp 534i586_bc2: 535 lmsw %dx 536 movb $0xfe,kernel_fpu_lock 537 538/* 539 * This is a duplicate of the main part of generic_bcopy. See the comments 540 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 541 * would mess up high resolution profiling. 542 */ 543 ALIGN_TEXT 544small_i586_bcopy: 545 shrl $2,%ecx 546 cld 547 rep 548 movsl 549 movl 20(%esp),%ecx 550 andl $3,%ecx 551 rep 552 movsb 553 popl %edi 554 popl %esi 555 ret 556 557 ALIGN_TEXT 5581: 559 addl %ecx,%edi 560 addl %ecx,%esi 561 decl %edi 562 decl %esi 563 andl $3,%ecx 564 std 565 rep 566 movsb 567 movl 20(%esp),%ecx 568 shrl $2,%ecx 569 subl $3,%esi 570 subl $3,%edi 571 rep 572 movsl 573 popl %edi 574 popl %esi 575 cld 576 ret 577#endif /* I586_CPU && NNPX > 0 */ 578 579/* 580 * Note: memcpy does not support overlapping copies 581 */ 582ENTRY(memcpy) 583 pushl %edi 584 pushl %esi 585 movl 12(%esp),%edi 586 movl 16(%esp),%esi 587 movl 20(%esp),%ecx 588 movl %edi,%eax 589 shrl $2,%ecx /* copy by 32-bit words */ 590 cld /* nope, copy forwards */ 591 rep 592 movsl 593 movl 20(%esp),%ecx 594 andl $3,%ecx /* any bytes left? */ 595 rep 596 movsb 597 popl %esi 598 popl %edi 599 ret 600 601 602/*****************************************************************************/ 603/* copyout and fubyte family */ 604/*****************************************************************************/ 605/* 606 * Access user memory from inside the kernel. These routines and possibly 607 * the math- and DOS emulators should be the only places that do this. 608 * 609 * We have to access the memory with user's permissions, so use a segment 610 * selector with RPL 3. For writes to user space we have to additionally 611 * check the PTE for write permission, because the 386 does not check 612 * write permissions when we are executing with EPL 0. The 486 does check 613 * this if the WP bit is set in CR0, so we can use a simpler version here. 614 * 615 * These routines set curpcb->onfault for the time they execute. When a 616 * protection violation occurs inside the functions, the trap handler 617 * returns to *curpcb->onfault instead of the function. 618 */ 619 620/* copyout(from_kernel, to_user, len) */ 621ENTRY(copyout) 622 MEXITCOUNT 623 jmp *_copyout_vector 624 625ENTRY(generic_copyout) 626 GETCURPCB(%eax) 627 movl $copyout_fault,PCB_ONFAULT(%eax) 628 pushl %esi 629 pushl %edi 630 pushl %ebx 631 movl 16(%esp),%esi 632 movl 20(%esp),%edi 633 movl 24(%esp),%ebx 634 testl %ebx,%ebx /* anything to do? */ 635 jz done_copyout 636 637 /* 638 * Check explicitly for non-user addresses. If 486 write protection 639 * is being used, this check is essential because we are in kernel 640 * mode so the h/w does not provide any protection against writing 641 * kernel addresses. 642 */ 643 644 /* 645 * First, prevent address wrapping. 646 */ 647 movl %edi,%eax 648 addl %ebx,%eax 649 jc copyout_fault 650/* 651 * XXX STOP USING VM_MAXUSER_ADDRESS. 652 * It is an end address, not a max, so every time it is used correctly it 653 * looks like there is an off by one error, and of course it caused an off 654 * by one error in several places. 655 */ 656 cmpl $VM_MAXUSER_ADDRESS,%eax 657 ja copyout_fault 658 659#if defined(I386_CPU) 660 661#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 662 cmpl $CPUCLASS_386,_cpu_class 663 jne 3f 664#endif 665/* 666 * We have to check each PTE for user write permission. 667 * The checking may cause a page fault, so it is important to set 668 * up everything for return via copyout_fault before here. 669 */ 670 /* compute number of pages */ 671 movl %edi,%ecx 672 andl $PAGE_MASK,%ecx 673 addl %ebx,%ecx 674 decl %ecx 675 shrl $IDXSHIFT+2,%ecx 676 incl %ecx 677 678 /* compute PTE offset for start address */ 679 movl %edi,%edx 680 shrl $IDXSHIFT,%edx 681 andb $0xfc,%dl 682 6831: 684 /* check PTE for each page */ 685 leal _PTmap(%edx),%eax 686 shrl $IDXSHIFT,%eax 687 andb $0xfc,%al 688 testb $PG_V,_PTmap(%eax) /* PTE page must be valid */ 689 je 4f 690 movb _PTmap(%edx),%al 691 andb $PG_V|PG_RW|PG_U,%al /* page must be valid and user writable */ 692 cmpb $PG_V|PG_RW|PG_U,%al 693 je 2f 694 6954: 696 /* simulate a trap */ 697 pushl %edx 698 pushl %ecx 699 shll $IDXSHIFT,%edx 700 pushl %edx 701 call _trapwrite /* trapwrite(addr) */ 702 popl %edx 703 popl %ecx 704 popl %edx 705 706 testl %eax,%eax /* if not ok, return EFAULT */ 707 jnz copyout_fault 708 7092: 710 addl $4,%edx 711 decl %ecx 712 jnz 1b /* check next page */ 713#endif /* I386_CPU */ 714 715 /* bcopy(%esi, %edi, %ebx) */ 7163: 717 movl %ebx,%ecx 718 719#if defined(I586_CPU) && NNPX > 0 && !defined(SMP) 720 ALIGN_TEXT 721slow_copyout: 722#endif 723 shrl $2,%ecx 724 cld 725 rep 726 movsl 727 movb %bl,%cl 728 andb $3,%cl 729 rep 730 movsb 731 732done_copyout: 733 popl %ebx 734 popl %edi 735 popl %esi 736 xorl %eax,%eax 737 GETCURPCB(%edx) 738 movl %eax,PCB_ONFAULT(%edx) 739 ret 740 741 ALIGN_TEXT 742copyout_fault: 743 popl %ebx 744 popl %edi 745 popl %esi 746 GETCURPCB(%edx) 747 movl $0,PCB_ONFAULT(%edx) 748 movl $EFAULT,%eax 749 ret 750 751#if defined(I586_CPU) && NNPX > 0 && !defined(SMP) 752ENTRY(i586_copyout) 753 /* 754 * Duplicated from generic_copyout. Could be done a bit better. 755 */ 756 movl _curpcb,%eax 757 movl $copyout_fault,PCB_ONFAULT(%eax) 758 pushl %esi 759 pushl %edi 760 pushl %ebx 761 movl 16(%esp),%esi 762 movl 20(%esp),%edi 763 movl 24(%esp),%ebx 764 testl %ebx,%ebx /* anything to do? */ 765 jz done_copyout 766 767 /* 768 * Check explicitly for non-user addresses. If 486 write protection 769 * is being used, this check is essential because we are in kernel 770 * mode so the h/w does not provide any protection against writing 771 * kernel addresses. 772 */ 773 774 /* 775 * First, prevent address wrapping. 776 */ 777 movl %edi,%eax 778 addl %ebx,%eax 779 jc copyout_fault 780/* 781 * XXX STOP USING VM_MAXUSER_ADDRESS. 782 * It is an end address, not a max, so every time it is used correctly it 783 * looks like there is an off by one error, and of course it caused an off 784 * by one error in several places. 785 */ 786 cmpl $VM_MAXUSER_ADDRESS,%eax 787 ja copyout_fault 788 789 /* bcopy(%esi, %edi, %ebx) */ 7903: 791 movl %ebx,%ecx 792 /* 793 * End of duplicated code. 794 */ 795 796 cmpl $1024,%ecx 797 jb slow_copyout 798 799 pushl %ecx 800 call _fastmove 801 addl $4,%esp 802 jmp done_copyout 803#endif /* I586_CPU && NNPX > 0 */ 804 805/* copyin(from_user, to_kernel, len) */ 806ENTRY(copyin) 807 MEXITCOUNT 808 jmp *_copyin_vector 809 810ENTRY(generic_copyin) 811 GETCURPCB(%eax) 812 movl $copyin_fault,PCB_ONFAULT(%eax) 813 pushl %esi 814 pushl %edi 815 movl 12(%esp),%esi /* caddr_t from */ 816 movl 16(%esp),%edi /* caddr_t to */ 817 movl 20(%esp),%ecx /* size_t len */ 818 819 /* 820 * make sure address is valid 821 */ 822 movl %esi,%edx 823 addl %ecx,%edx 824 jc copyin_fault 825 cmpl $VM_MAXUSER_ADDRESS,%edx 826 ja copyin_fault 827 828#if defined(I586_CPU) && NNPX > 0 && !defined(SMP) 829 ALIGN_TEXT 830slow_copyin: 831#endif 832 movb %cl,%al 833 shrl $2,%ecx /* copy longword-wise */ 834 cld 835 rep 836 movsl 837 movb %al,%cl 838 andb $3,%cl /* copy remaining bytes */ 839 rep 840 movsb 841 842#if defined(I586_CPU) && NNPX > 0 && !defined(SMP) 843 ALIGN_TEXT 844done_copyin: 845#endif 846 popl %edi 847 popl %esi 848 xorl %eax,%eax 849 GETCURPCB(%edx) 850 movl %eax,PCB_ONFAULT(%edx) 851 ret 852 853 ALIGN_TEXT 854copyin_fault: 855 popl %edi 856 popl %esi 857 GETCURPCB(%edx) 858 movl $0,PCB_ONFAULT(%edx) 859 movl $EFAULT,%eax 860 ret 861 862#if defined(I586_CPU) && NNPX > 0 && !defined(SMP) 863ENTRY(i586_copyin) 864 /* 865 * Duplicated from generic_copyin. Could be done a bit better. 866 */ 867 movl _curpcb,%eax 868 movl $copyin_fault,PCB_ONFAULT(%eax) 869 pushl %esi 870 pushl %edi 871 movl 12(%esp),%esi /* caddr_t from */ 872 movl 16(%esp),%edi /* caddr_t to */ 873 movl 20(%esp),%ecx /* size_t len */ 874 875 /* 876 * make sure address is valid 877 */ 878 movl %esi,%edx 879 addl %ecx,%edx 880 jc copyin_fault 881 cmpl $VM_MAXUSER_ADDRESS,%edx 882 ja copyin_fault 883 /* 884 * End of duplicated code. 885 */ 886 887 cmpl $1024,%ecx 888 jb slow_copyin 889 890 pushl %ebx /* XXX prepare for fastmove_fault */ 891 pushl %ecx 892 call _fastmove 893 addl $8,%esp 894 jmp done_copyin 895#endif /* I586_CPU && NNPX > 0 */ 896 897#if defined(I586_CPU) && NNPX > 0 && !defined(SMP) 898/* fastmove(src, dst, len) 899 src in %esi 900 dst in %edi 901 len in %ecx XXX changed to on stack for profiling 902 uses %eax and %edx for tmp. storage 903 */ 904/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 905ENTRY(fastmove) 906 pushl %ebp 907 movl %esp,%ebp 908 subl $PCB_SAVEFPU_SIZE+3*4,%esp 909 910 movl 8(%ebp),%ecx 911 cmpl $63,%ecx 912 jbe fastmove_tail 913 914 testl $7,%esi /* check if src addr is multiple of 8 */ 915 jnz fastmove_tail 916 917 testl $7,%edi /* check if dst addr is multiple of 8 */ 918 jnz fastmove_tail 919 920/* if (npxproc != NULL) { */ 921 cmpl $0,_npxproc 922 je 6f 923/* fnsave(&curpcb->pcb_savefpu); */ 924 movl _curpcb,%eax 925 fnsave PCB_SAVEFPU(%eax) 926/* npxproc = NULL; */ 927 movl $0,_npxproc 928/* } */ 9296: 930/* now we own the FPU. */ 931 932/* 933 * The process' FP state is saved in the pcb, but if we get 934 * switched, the cpu_switch() will store our FP state in the 935 * pcb. It should be possible to avoid all the copying for 936 * this, e.g., by setting a flag to tell cpu_switch() to 937 * save the state somewhere else. 938 */ 939/* tmp = curpcb->pcb_savefpu; */ 940 movl %ecx,-12(%ebp) 941 movl %esi,-8(%ebp) 942 movl %edi,-4(%ebp) 943 movl %esp,%edi 944 movl _curpcb,%esi 945 addl $PCB_SAVEFPU,%esi 946 cld 947 movl $PCB_SAVEFPU_SIZE>>2,%ecx 948 rep 949 movsl 950 movl -12(%ebp),%ecx 951 movl -8(%ebp),%esi 952 movl -4(%ebp),%edi 953/* stop_emulating(); */ 954 clts 955/* npxproc = curproc; */ 956 movl _curproc,%eax 957 movl %eax,_npxproc 958 movl _curpcb,%eax 959 movl $fastmove_fault,PCB_ONFAULT(%eax) 9604: 961 movl %ecx,-12(%ebp) 962 cmpl $1792,%ecx 963 jbe 2f 964 movl $1792,%ecx 9652: 966 subl %ecx,-12(%ebp) 967 cmpl $256,%ecx 968 jb 5f 969 movl %ecx,-8(%ebp) 970 movl %esi,-4(%ebp) 971 ALIGN_TEXT 9723: 973 movl 0(%esi),%eax 974 movl 32(%esi),%eax 975 movl 64(%esi),%eax 976 movl 96(%esi),%eax 977 movl 128(%esi),%eax 978 movl 160(%esi),%eax 979 movl 192(%esi),%eax 980 movl 224(%esi),%eax 981 addl $256,%esi 982 subl $256,%ecx 983 cmpl $256,%ecx 984 jae 3b 985 movl -8(%ebp),%ecx 986 movl -4(%ebp),%esi 9875: 988 ALIGN_TEXT 989fastmove_loop: 990 fildq 0(%esi) 991 fildq 8(%esi) 992 fildq 16(%esi) 993 fildq 24(%esi) 994 fildq 32(%esi) 995 fildq 40(%esi) 996 fildq 48(%esi) 997 fildq 56(%esi) 998 fistpq 56(%edi) 999 fistpq 48(%edi) 1000 fistpq 40(%edi) 1001 fistpq 32(%edi) 1002 fistpq 24(%edi) 1003 fistpq 16(%edi) 1004 fistpq 8(%edi) 1005 fistpq 0(%edi) 1006 addl $-64,%ecx 1007 addl $64,%esi 1008 addl $64,%edi 1009 cmpl $63,%ecx 1010 ja fastmove_loop 1011 movl -12(%ebp),%eax 1012 addl %eax,%ecx 1013 cmpl $64,%ecx 1014 jae 4b 1015 1016/* curpcb->pcb_savefpu = tmp; */ 1017 movl %ecx,-12(%ebp) 1018 movl %esi,-8(%ebp) 1019 movl %edi,-4(%ebp) 1020 movl _curpcb,%edi 1021 addl $PCB_SAVEFPU,%edi 1022 movl %esp,%esi 1023 cld 1024 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1025 rep 1026 movsl 1027 movl -12(%ebp),%ecx 1028 movl -8(%ebp),%esi 1029 movl -4(%ebp),%edi 1030 1031/* start_emulating(); */ 1032 smsw %ax 1033 orb $CR0_TS,%al 1034 lmsw %ax 1035/* npxproc = NULL; */ 1036 movl $0,_npxproc 1037 1038 ALIGN_TEXT 1039fastmove_tail: 1040 movl _curpcb,%eax 1041 movl $fastmove_tail_fault,PCB_ONFAULT(%eax) 1042 1043 movb %cl,%al 1044 shrl $2,%ecx /* copy longword-wise */ 1045 cld 1046 rep 1047 movsl 1048 movb %al,%cl 1049 andb $3,%cl /* copy remaining bytes */ 1050 rep 1051 movsb 1052 1053 movl %ebp,%esp 1054 popl %ebp 1055 ret 1056 1057 ALIGN_TEXT 1058fastmove_fault: 1059 movl _curpcb,%edi 1060 addl $PCB_SAVEFPU,%edi 1061 movl %esp,%esi 1062 cld 1063 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1064 rep 1065 movsl 1066 1067 smsw %ax 1068 orb $CR0_TS,%al 1069 lmsw %ax 1070 movl $0,_npxproc 1071 1072fastmove_tail_fault: 1073 movl %ebp,%esp 1074 popl %ebp 1075 addl $8,%esp 1076 popl %ebx 1077 popl %edi 1078 popl %esi 1079 movl _curpcb,%edx 1080 movl $0,PCB_ONFAULT(%edx) 1081 movl $EFAULT,%eax 1082 ret 1083#endif /* I586_CPU && NNPX > 0 */ 1084 1085/* 1086 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory 1087 */ 1088ENTRY(fuword) 1089 GETCURPCB(%ecx) 1090 movl $fusufault,PCB_ONFAULT(%ecx) 1091 movl 4(%esp),%edx /* from */ 1092 1093 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 1094 ja fusufault 1095 1096 movl (%edx),%eax 1097 movl $0,PCB_ONFAULT(%ecx) 1098 ret 1099 1100/* 1101 * These two routines are called from the profiling code, potentially 1102 * at interrupt time. If they fail, that's okay, good things will 1103 * happen later. Fail all the time for now - until the trap code is 1104 * able to deal with this. 1105 */ 1106ALTENTRY(suswintr) 1107ENTRY(fuswintr) 1108 movl $-1,%eax 1109 ret 1110 1111ENTRY(fusword) 1112 GETCURPCB(%ecx) 1113 movl $fusufault,PCB_ONFAULT(%ecx) 1114 movl 4(%esp),%edx 1115 1116 cmpl $VM_MAXUSER_ADDRESS-2,%edx 1117 ja fusufault 1118 1119 movzwl (%edx),%eax 1120 movl $0,PCB_ONFAULT(%ecx) 1121 ret 1122 1123ENTRY(fubyte) 1124 GETCURPCB(%ecx) 1125 movl $fusufault,PCB_ONFAULT(%ecx) 1126 movl 4(%esp),%edx 1127 1128 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1129 ja fusufault 1130 1131 movzbl (%edx),%eax 1132 movl $0,PCB_ONFAULT(%ecx) 1133 ret 1134 1135 ALIGN_TEXT 1136fusufault: 1137 GETCURPCB(%ecx) 1138 xorl %eax,%eax 1139 movl %eax,PCB_ONFAULT(%ecx) 1140 decl %eax 1141 ret 1142 1143/* 1144 * su{byte,sword,word}: write a byte (word, longword) to user memory 1145 */ 1146ENTRY(suword) 1147 GETCURPCB(%ecx) 1148 movl $fusufault,PCB_ONFAULT(%ecx) 1149 movl 4(%esp),%edx 1150 1151#if defined(I386_CPU) 1152 1153#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1154 cmpl $CPUCLASS_386,_cpu_class 1155 jne 2f /* we only have to set the right segment selector */ 1156#endif /* I486_CPU || I586_CPU || I686_CPU */ 1157 1158 /* XXX - page boundary crossing is still not handled */ 1159 movl %edx,%eax 1160 shrl $IDXSHIFT,%edx 1161 andb $0xfc,%dl 1162 1163 leal _PTmap(%edx),%ecx 1164 shrl $IDXSHIFT,%ecx 1165 andb $0xfc,%cl 1166 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1167 je 4f 1168 movb _PTmap(%edx),%dl 1169 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1170 cmpb $PG_V|PG_RW|PG_U,%dl 1171 je 1f 1172 11734: 1174 /* simulate a trap */ 1175 pushl %eax 1176 call _trapwrite 1177 popl %edx /* remove junk parameter from stack */ 1178 testl %eax,%eax 1179 jnz fusufault 11801: 1181 movl 4(%esp),%edx 1182#endif 1183 11842: 1185 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1186 ja fusufault 1187 1188 movl 8(%esp),%eax 1189 movl %eax,(%edx) 1190 xorl %eax,%eax 1191 GETCURPCB(%ecx) 1192 movl %eax,PCB_ONFAULT(%ecx) 1193 ret 1194 1195ENTRY(susword) 1196 GETCURPCB(%ecx) 1197 movl $fusufault,PCB_ONFAULT(%ecx) 1198 movl 4(%esp),%edx 1199 1200#if defined(I386_CPU) 1201 1202#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1203 cmpl $CPUCLASS_386,_cpu_class 1204 jne 2f 1205#endif /* I486_CPU || I586_CPU || I686_CPU */ 1206 1207 /* XXX - page boundary crossing is still not handled */ 1208 movl %edx,%eax 1209 shrl $IDXSHIFT,%edx 1210 andb $0xfc,%dl 1211 1212 leal _PTmap(%edx),%ecx 1213 shrl $IDXSHIFT,%ecx 1214 andb $0xfc,%cl 1215 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1216 je 4f 1217 movb _PTmap(%edx),%dl 1218 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1219 cmpb $PG_V|PG_RW|PG_U,%dl 1220 je 1f 1221 12224: 1223 /* simulate a trap */ 1224 pushl %eax 1225 call _trapwrite 1226 popl %edx /* remove junk parameter from stack */ 1227 testl %eax,%eax 1228 jnz fusufault 12291: 1230 movl 4(%esp),%edx 1231#endif 1232 12332: 1234 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1235 ja fusufault 1236 1237 movw 8(%esp),%ax 1238 movw %ax,(%edx) 1239 xorl %eax,%eax 1240 GETCURPCB(%ecx) /* restore trashed register */ 1241 movl %eax,PCB_ONFAULT(%ecx) 1242 ret 1243 1244ALTENTRY(suibyte) 1245ENTRY(subyte) 1246 GETCURPCB(%ecx) 1247 movl $fusufault,PCB_ONFAULT(%ecx) 1248 movl 4(%esp),%edx 1249 1250#if defined(I386_CPU) 1251 1252#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1253 cmpl $CPUCLASS_386,_cpu_class 1254 jne 2f 1255#endif /* I486_CPU || I586_CPU || I686_CPU */ 1256 1257 movl %edx,%eax 1258 shrl $IDXSHIFT,%edx 1259 andb $0xfc,%dl 1260 1261 leal _PTmap(%edx),%ecx 1262 shrl $IDXSHIFT,%ecx 1263 andb $0xfc,%cl 1264 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1265 je 4f 1266 movb _PTmap(%edx),%dl 1267 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1268 cmpb $PG_V|PG_RW|PG_U,%dl 1269 je 1f 1270 12714: 1272 /* simulate a trap */ 1273 pushl %eax 1274 call _trapwrite 1275 popl %edx /* remove junk parameter from stack */ 1276 testl %eax,%eax 1277 jnz fusufault 12781: 1279 movl 4(%esp),%edx 1280#endif 1281 12822: 1283 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1284 ja fusufault 1285 1286 movb 8(%esp),%al 1287 movb %al,(%edx) 1288 xorl %eax,%eax 1289 GETCURPCB(%ecx) /* restore trashed register */ 1290 movl %eax,PCB_ONFAULT(%ecx) 1291 ret 1292 1293/* 1294 * copyinstr(from, to, maxlen, int *lencopied) 1295 * copy a string from from to to, stop when a 0 character is reached. 1296 * return ENAMETOOLONG if string is longer than maxlen, and 1297 * EFAULT on protection violations. If lencopied is non-zero, 1298 * return the actual length in *lencopied. 1299 */ 1300ENTRY(copyinstr) 1301 pushl %esi 1302 pushl %edi 1303 GETCURPCB(%ecx) 1304 movl $cpystrflt,PCB_ONFAULT(%ecx) 1305 1306 movl 12(%esp),%esi /* %esi = from */ 1307 movl 16(%esp),%edi /* %edi = to */ 1308 movl 20(%esp),%edx /* %edx = maxlen */ 1309 1310 movl $VM_MAXUSER_ADDRESS,%eax 1311 1312 /* make sure 'from' is within bounds */ 1313 subl %esi,%eax 1314 jbe cpystrflt 1315 1316 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1317 cmpl %edx,%eax 1318 jae 1f 1319 movl %eax,%edx 1320 movl %eax,20(%esp) 13211: 1322 incl %edx 1323 cld 1324 13252: 1326 decl %edx 1327 jz 3f 1328 1329 lodsb 1330 stosb 1331 orb %al,%al 1332 jnz 2b 1333 1334 /* Success -- 0 byte reached */ 1335 decl %edx 1336 xorl %eax,%eax 1337 jmp cpystrflt_x 13383: 1339 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1340 cmpl $VM_MAXUSER_ADDRESS,%esi 1341 jae cpystrflt 13424: 1343 movl $ENAMETOOLONG,%eax 1344 jmp cpystrflt_x 1345 1346cpystrflt: 1347 movl $EFAULT,%eax 1348 1349cpystrflt_x: 1350 /* set *lencopied and return %eax */ 1351 GETCURPCB(%ecx) 1352 movl $0,PCB_ONFAULT(%ecx) 1353 movl 20(%esp),%ecx 1354 subl %edx,%ecx 1355 movl 24(%esp),%edx 1356 testl %edx,%edx 1357 jz 1f 1358 movl %ecx,(%edx) 13591: 1360 popl %edi 1361 popl %esi 1362 ret 1363 1364 1365/* 1366 * copystr(from, to, maxlen, int *lencopied) 1367 */ 1368ENTRY(copystr) 1369 pushl %esi 1370 pushl %edi 1371 1372 movl 12(%esp),%esi /* %esi = from */ 1373 movl 16(%esp),%edi /* %edi = to */ 1374 movl 20(%esp),%edx /* %edx = maxlen */ 1375 incl %edx 1376 cld 13771: 1378 decl %edx 1379 jz 4f 1380 lodsb 1381 stosb 1382 orb %al,%al 1383 jnz 1b 1384 1385 /* Success -- 0 byte reached */ 1386 decl %edx 1387 xorl %eax,%eax 1388 jmp 6f 13894: 1390 /* edx is zero -- return ENAMETOOLONG */ 1391 movl $ENAMETOOLONG,%eax 1392 13936: 1394 /* set *lencopied and return %eax */ 1395 movl 20(%esp),%ecx 1396 subl %edx,%ecx 1397 movl 24(%esp),%edx 1398 testl %edx,%edx 1399 jz 7f 1400 movl %ecx,(%edx) 14017: 1402 popl %edi 1403 popl %esi 1404 ret 1405 1406ENTRY(bcmp) 1407 pushl %edi 1408 pushl %esi 1409 movl 12(%esp),%edi 1410 movl 16(%esp),%esi 1411 movl 20(%esp),%edx 1412 xorl %eax,%eax 1413 1414 movl %edx,%ecx 1415 shrl $2,%ecx 1416 cld /* compare forwards */ 1417 repe 1418 cmpsl 1419 jne 1f 1420 1421 movl %edx,%ecx 1422 andl $3,%ecx 1423 repe 1424 cmpsb 1425 je 2f 14261: 1427 incl %eax 14282: 1429 popl %esi 1430 popl %edi 1431 ret 1432 1433 1434/* 1435 * Handling of special 386 registers and descriptor tables etc 1436 */ 1437/* void lgdt(struct region_descriptor *rdp); */ 1438ENTRY(lgdt) 1439 /* reload the descriptor table */ 1440 movl 4(%esp),%eax 1441 lgdt (%eax) 1442 1443 /* flush the prefetch q */ 1444 jmp 1f 1445 nop 14461: 1447 /* reload "stale" selectors */ 1448 movl $KDSEL,%eax 1449 movl %ax,%ds 1450 movl %ax,%es 1451 movl %ax,%fs 1452 movl %ax,%gs 1453 movl %ax,%ss 1454 1455 /* reload code selector by turning return into intersegmental return */ 1456 movl (%esp),%eax 1457 pushl %eax 1458 movl $KCSEL,4(%esp) 1459 lret 1460 1461/* 1462 * void lidt(struct region_descriptor *rdp); 1463 */ 1464ENTRY(lidt) 1465 movl 4(%esp),%eax 1466 lidt (%eax) 1467 ret 1468 1469/* 1470 * void lldt(u_short sel) 1471 */ 1472ENTRY(lldt) 1473 lldt 4(%esp) 1474 ret 1475 1476/* 1477 * void ltr(u_short sel) 1478 */ 1479ENTRY(ltr) 1480 ltr 4(%esp) 1481 ret 1482 1483/* ssdtosd(*ssdp,*sdp) */ 1484ENTRY(ssdtosd) 1485 pushl %ebx 1486 movl 8(%esp),%ecx 1487 movl 8(%ecx),%ebx 1488 shll $16,%ebx 1489 movl (%ecx),%edx 1490 roll $16,%edx 1491 movb %dh,%bl 1492 movb %dl,%bh 1493 rorl $8,%ebx 1494 movl 4(%ecx),%eax 1495 movw %ax,%dx 1496 andl $0xf0000,%eax 1497 orl %eax,%ebx 1498 movl 12(%esp),%ecx 1499 movl %edx,(%ecx) 1500 movl %ebx,4(%ecx) 1501 popl %ebx 1502 ret 1503 1504/* load_cr0(cr0) */ 1505ENTRY(load_cr0) 1506 movl 4(%esp),%eax 1507 movl %eax,%cr0 1508 ret 1509 1510/* rcr0() */ 1511ENTRY(rcr0) 1512 movl %cr0,%eax 1513 ret 1514 1515/* rcr3() */ 1516ENTRY(rcr3) 1517 movl %cr3,%eax 1518 ret 1519 1520/* void load_cr3(caddr_t cr3) */ 1521ENTRY(load_cr3) 1522 movl 4(%esp),%eax 1523 movl %eax,%cr3 1524 ret 1525 1526 1527/*****************************************************************************/ 1528/* setjump, longjump */ 1529/*****************************************************************************/ 1530 1531ENTRY(setjmp) 1532 movl 4(%esp),%eax 1533 movl %ebx,(%eax) /* save ebx */ 1534 movl %esp,4(%eax) /* save esp */ 1535 movl %ebp,8(%eax) /* save ebp */ 1536 movl %esi,12(%eax) /* save esi */ 1537 movl %edi,16(%eax) /* save edi */ 1538 movl (%esp),%edx /* get rta */ 1539 movl %edx,20(%eax) /* save eip */ 1540 xorl %eax,%eax /* return(0); */ 1541 ret 1542 1543ENTRY(longjmp) 1544 movl 4(%esp),%eax 1545 movl (%eax),%ebx /* restore ebx */ 1546 movl 4(%eax),%esp /* restore esp */ 1547 movl 8(%eax),%ebp /* restore ebp */ 1548 movl 12(%eax),%esi /* restore esi */ 1549 movl 16(%eax),%edi /* restore edi */ 1550 movl 20(%eax),%edx /* get rta */ 1551 movl %edx,(%esp) /* put in return frame */ 1552 xorl %eax,%eax /* return(1); */ 1553 incl %eax 1554 ret 1555 1556/* 1557 * Here for doing BB-profiling (gcc -a). 1558 * We rely on the "bbset" instead, but need a dummy function. 1559 */ 1560NON_GPROF_ENTRY(__bb_init_func) 1561 movl 4(%esp),%eax 1562 movl $1,(%eax) 1563 .byte 0xc3 /* avoid macro for `ret' */ 1564