support.s revision 29041
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $Id: support.s,v 1.56 1997/08/09 00:02:44 dyson Exp $ 34 */ 35 36#include "npx.h" 37 38#include <machine/asmacros.h> 39#include <machine/cputypes.h> 40#include <machine/pmap.h> 41#include <machine/specialreg.h> 42 43#include "assym.s" 44 45#define KDSEL 0x10 /* kernel data selector */ 46#define KCSEL 0x8 /* kernel code selector */ 47#define IDXSHIFT 10 48 49 .data 50 .globl _bcopy_vector 51_bcopy_vector: 52 .long _generic_bcopy 53 .globl _bzero 54_bzero: 55 .long _generic_bzero 56 .globl _copyin_vector 57_copyin_vector: 58 .long _generic_copyin 59 .globl _copyout_vector 60_copyout_vector: 61 .long _generic_copyout 62 .globl _ovbcopy_vector 63_ovbcopy_vector: 64 .long _generic_bcopy 65#if defined(I586_CPU) && NNPX > 0 66kernel_fpu_lock: 67 .byte 0xfe 68 .space 3 69#endif 70 71 .text 72 73/* 74 * bcopy family 75 * void bzero(void *buf, u_int len) 76 */ 77 78ENTRY(generic_bzero) 79 pushl %edi 80 movl 8(%esp),%edi 81 movl 12(%esp),%ecx 82 xorl %eax,%eax 83 shrl $2,%ecx 84 cld 85 rep 86 stosl 87 movl 12(%esp),%ecx 88 andl $3,%ecx 89 rep 90 stosb 91 popl %edi 92 ret 93 94#if defined(I486_CPU) 95ENTRY(i486_bzero) 96 movl 4(%esp),%edx 97 movl 8(%esp),%ecx 98 xorl %eax,%eax 99/* 100 * do 64 byte chunks first 101 * 102 * XXX this is probably over-unrolled at least for DX2's 103 */ 1042: 105 cmpl $64,%ecx 106 jb 3f 107 movl %eax,(%edx) 108 movl %eax,4(%edx) 109 movl %eax,8(%edx) 110 movl %eax,12(%edx) 111 movl %eax,16(%edx) 112 movl %eax,20(%edx) 113 movl %eax,24(%edx) 114 movl %eax,28(%edx) 115 movl %eax,32(%edx) 116 movl %eax,36(%edx) 117 movl %eax,40(%edx) 118 movl %eax,44(%edx) 119 movl %eax,48(%edx) 120 movl %eax,52(%edx) 121 movl %eax,56(%edx) 122 movl %eax,60(%edx) 123 addl $64,%edx 124 subl $64,%ecx 125 jnz 2b 126 ret 127 128/* 129 * do 16 byte chunks 130 */ 131 SUPERALIGN_TEXT 1323: 133 cmpl $16,%ecx 134 jb 4f 135 movl %eax,(%edx) 136 movl %eax,4(%edx) 137 movl %eax,8(%edx) 138 movl %eax,12(%edx) 139 addl $16,%edx 140 subl $16,%ecx 141 jnz 3b 142 ret 143 144/* 145 * do 4 byte chunks 146 */ 147 SUPERALIGN_TEXT 1484: 149 cmpl $4,%ecx 150 jb 5f 151 movl %eax,(%edx) 152 addl $4,%edx 153 subl $4,%ecx 154 jnz 4b 155 ret 156 157/* 158 * do 1 byte chunks 159 * a jump table seems to be faster than a loop or more range reductions 160 * 161 * XXX need a const section for non-text 162 */ 163 .data 164jtab: 165 .long do0 166 .long do1 167 .long do2 168 .long do3 169 170 .text 171 SUPERALIGN_TEXT 1725: 173 jmp jtab(,%ecx,4) 174 175 SUPERALIGN_TEXT 176do3: 177 movw %ax,(%edx) 178 movb %al,2(%edx) 179 ret 180 181 SUPERALIGN_TEXT 182do2: 183 movw %ax,(%edx) 184 ret 185 186 SUPERALIGN_TEXT 187do1: 188 movb %al,(%edx) 189 ret 190 191 SUPERALIGN_TEXT 192do0: 193 ret 194#endif 195 196#if defined(I586_CPU) && NNPX > 0 197ENTRY(i586_bzero) 198 movl 4(%esp),%edx 199 movl 8(%esp),%ecx 200 201 /* 202 * The FPU register method is twice as fast as the integer register 203 * method unless the target is in the L1 cache and we pre-allocate a 204 * cache line for it (then the integer register method is 4-5 times 205 * faster). However, we never pre-allocate cache lines, since that 206 * would make the integer method 25% or more slower for the common 207 * case when the target isn't in either the L1 cache or the L2 cache. 208 * Thus we normally use the FPU register method unless the overhead 209 * would be too large. 210 */ 211 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 212 jb intreg_i586_bzero 213 214 /* 215 * The FPU registers may belong to an application or to fastmove() 216 * or to another invocation of bcopy() or ourself in a higher level 217 * interrupt or trap handler. Preserving the registers is 218 * complicated since we avoid it if possible at all levels. We 219 * want to localize the complications even when that increases them. 220 * Here the extra work involves preserving CR0_TS in TS. 221 * `npxproc != NULL' is supposed to be the condition that all the 222 * FPU resources belong to an application, but npxproc and CR0_TS 223 * aren't set atomically enough for this condition to work in 224 * interrupt handlers. 225 * 226 * Case 1: FPU registers belong to the application: we must preserve 227 * the registers if we use them, so we only use the FPU register 228 * method if the target size is large enough to amortize the extra 229 * overhead for preserving them. CR0_TS must be preserved although 230 * it is very likely to end up as set. 231 * 232 * Case 2: FPU registers belong to fastmove(): fastmove() currently 233 * makes the registers look like they belong to an application so 234 * that cpu_switch() and savectx() don't have to know about it, so 235 * this case reduces to case 1. 236 * 237 * Case 3: FPU registers belong to the kernel: don't use the FPU 238 * register method. This case is unlikely, and supporting it would 239 * be more complicated and might take too much stack. 240 * 241 * Case 4: FPU registers don't belong to anyone: the FPU registers 242 * don't need to be preserved, so we always use the FPU register 243 * method. CR0_TS must be preserved although it is very likely to 244 * always end up as clear. 245 */ 246 cmpl $0,_npxproc 247 je i586_bz1 248 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 249 jb intreg_i586_bzero 250 sarb $1,kernel_fpu_lock 251 jc intreg_i586_bzero 252 smsw %ax 253 clts 254 subl $108,%esp 255 fnsave 0(%esp) 256 jmp i586_bz2 257 258i586_bz1: 259 sarb $1,kernel_fpu_lock 260 jc intreg_i586_bzero 261 smsw %ax 262 clts 263 fninit /* XXX should avoid needing this */ 264i586_bz2: 265 fldz 266 267 /* 268 * Align to an 8 byte boundary (misalignment in the main loop would 269 * cost a factor of >= 2). Avoid jumps (at little cost if it is 270 * already aligned) by always zeroing 8 bytes and using the part up 271 * to the _next_ alignment position. 272 */ 273 fstl 0(%edx) 274 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 275 addl $8,%edx 276 andl $~7,%edx 277 subl %edx,%ecx 278 279 /* 280 * Similarly align `len' to a multiple of 8. 281 */ 282 fstl -8(%edx,%ecx) 283 decl %ecx 284 andl $~7,%ecx 285 286 /* 287 * This wouldn't be any faster if it were unrolled, since the loop 288 * control instructions are much faster than the fstl and/or done 289 * in parallel with it so their overhead is insignificant. 290 */ 291fpureg_i586_bzero_loop: 292 fstl 0(%edx) 293 addl $8,%edx 294 subl $8,%ecx 295 cmpl $8,%ecx 296 jae fpureg_i586_bzero_loop 297 298 cmpl $0,_npxproc 299 je i586_bz3 300 frstor 0(%esp) 301 addl $108,%esp 302 lmsw %ax 303 movb $0xfe,kernel_fpu_lock 304 ret 305 306i586_bz3: 307 fstpl %st(0) 308 lmsw %ax 309 movb $0xfe,kernel_fpu_lock 310 ret 311 312intreg_i586_bzero: 313 /* 314 * `rep stos' seems to be the best method in practice for small 315 * counts. Fancy methods usually take too long to start up due 316 * to cache and BTB misses. 317 */ 318 pushl %edi 319 movl %edx,%edi 320 xorl %eax,%eax 321 shrl $2,%ecx 322 cld 323 rep 324 stosl 325 movl 12(%esp),%ecx 326 andl $3,%ecx 327 jne 1f 328 popl %edi 329 ret 330 3311: 332 rep 333 stosb 334 popl %edi 335 ret 336#endif /* I586_CPU && NNPX > 0 */ 337 338/* fillw(pat, base, cnt) */ 339ENTRY(fillw) 340 pushl %edi 341 movl 8(%esp),%eax 342 movl 12(%esp),%edi 343 movl 16(%esp),%ecx 344 cld 345 rep 346 stosw 347 popl %edi 348 ret 349 350ENTRY(bcopyb) 351bcopyb: 352 pushl %esi 353 pushl %edi 354 movl 12(%esp),%esi 355 movl 16(%esp),%edi 356 movl 20(%esp),%ecx 357 movl %edi,%eax 358 subl %esi,%eax 359 cmpl %ecx,%eax /* overlapping && src < dst? */ 360 jb 1f 361 cld /* nope, copy forwards */ 362 rep 363 movsb 364 popl %edi 365 popl %esi 366 ret 367 368 ALIGN_TEXT 3691: 370 addl %ecx,%edi /* copy backwards. */ 371 addl %ecx,%esi 372 decl %edi 373 decl %esi 374 std 375 rep 376 movsb 377 popl %edi 378 popl %esi 379 cld 380 ret 381 382ENTRY(bcopy) 383 MEXITCOUNT 384 jmp *_bcopy_vector 385 386ENTRY(ovbcopy) 387 MEXITCOUNT 388 jmp *_ovbcopy_vector 389 390/* 391 * generic_bcopy(src, dst, cnt) 392 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 393 */ 394ENTRY(generic_bcopy) 395 pushl %esi 396 pushl %edi 397 movl 12(%esp),%esi 398 movl 16(%esp),%edi 399 movl 20(%esp),%ecx 400 401 movl %edi,%eax 402 subl %esi,%eax 403 cmpl %ecx,%eax /* overlapping && src < dst? */ 404 jb 1f 405 406 shrl $2,%ecx /* copy by 32-bit words */ 407 cld /* nope, copy forwards */ 408 rep 409 movsl 410 movl 20(%esp),%ecx 411 andl $3,%ecx /* any bytes left? */ 412 rep 413 movsb 414 popl %edi 415 popl %esi 416 ret 417 418 ALIGN_TEXT 4191: 420 addl %ecx,%edi /* copy backwards */ 421 addl %ecx,%esi 422 decl %edi 423 decl %esi 424 andl $3,%ecx /* any fractional bytes? */ 425 std 426 rep 427 movsb 428 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 429 shrl $2,%ecx 430 subl $3,%esi 431 subl $3,%edi 432 rep 433 movsl 434 popl %edi 435 popl %esi 436 cld 437 ret 438 439#if defined(I586_CPU) && NNPX > 0 440ENTRY(i586_bcopy) 441 pushl %esi 442 pushl %edi 443 movl 12(%esp),%esi 444 movl 16(%esp),%edi 445 movl 20(%esp),%ecx 446 447 movl %edi,%eax 448 subl %esi,%eax 449 cmpl %ecx,%eax /* overlapping && src < dst? */ 450 jb 1f 451 452 cmpl $1024,%ecx 453 jb small_i586_bcopy 454 455 sarb $1,kernel_fpu_lock 456 jc small_i586_bcopy 457 cmpl $0,_npxproc 458 je i586_bc1 459 smsw %dx 460 clts 461 subl $108,%esp 462 fnsave 0(%esp) 463 jmp 4f 464 465i586_bc1: 466 smsw %dx 467 clts 468 fninit /* XXX should avoid needing this */ 469 470 ALIGN_TEXT 4714: 472 pushl %ecx 473#define DCACHE_SIZE 8192 474 cmpl $(DCACHE_SIZE-512)/2,%ecx 475 jbe 2f 476 movl $(DCACHE_SIZE-512)/2,%ecx 4772: 478 subl %ecx,0(%esp) 479 cmpl $256,%ecx 480 jb 5f /* XXX should prefetch if %ecx >= 32 */ 481 pushl %esi 482 pushl %ecx 483 ALIGN_TEXT 4843: 485 movl 0(%esi),%eax 486 movl 32(%esi),%eax 487 movl 64(%esi),%eax 488 movl 96(%esi),%eax 489 movl 128(%esi),%eax 490 movl 160(%esi),%eax 491 movl 192(%esi),%eax 492 movl 224(%esi),%eax 493 addl $256,%esi 494 subl $256,%ecx 495 cmpl $256,%ecx 496 jae 3b 497 popl %ecx 498 popl %esi 4995: 500 ALIGN_TEXT 501large_i586_bcopy_loop: 502 fildq 0(%esi) 503 fildq 8(%esi) 504 fildq 16(%esi) 505 fildq 24(%esi) 506 fildq 32(%esi) 507 fildq 40(%esi) 508 fildq 48(%esi) 509 fildq 56(%esi) 510 fistpq 56(%edi) 511 fistpq 48(%edi) 512 fistpq 40(%edi) 513 fistpq 32(%edi) 514 fistpq 24(%edi) 515 fistpq 16(%edi) 516 fistpq 8(%edi) 517 fistpq 0(%edi) 518 addl $64,%esi 519 addl $64,%edi 520 subl $64,%ecx 521 cmpl $64,%ecx 522 jae large_i586_bcopy_loop 523 popl %eax 524 addl %eax,%ecx 525 cmpl $64,%ecx 526 jae 4b 527 528 cmpl $0,_npxproc 529 je i586_bc2 530 frstor 0(%esp) 531 addl $108,%esp 532i586_bc2: 533 lmsw %dx 534 movb $0xfe,kernel_fpu_lock 535 536/* 537 * This is a duplicate of the main part of generic_bcopy. See the comments 538 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 539 * would mess up high resolution profiling. 540 */ 541 ALIGN_TEXT 542small_i586_bcopy: 543 shrl $2,%ecx 544 cld 545 rep 546 movsl 547 movl 20(%esp),%ecx 548 andl $3,%ecx 549 rep 550 movsb 551 popl %edi 552 popl %esi 553 ret 554 555 ALIGN_TEXT 5561: 557 addl %ecx,%edi 558 addl %ecx,%esi 559 decl %edi 560 decl %esi 561 andl $3,%ecx 562 std 563 rep 564 movsb 565 movl 20(%esp),%ecx 566 shrl $2,%ecx 567 subl $3,%esi 568 subl $3,%edi 569 rep 570 movsl 571 popl %edi 572 popl %esi 573 cld 574 ret 575#endif /* I586_CPU && NNPX > 0 */ 576 577/* 578 * Note: memcpy does not support overlapping copies 579 */ 580ENTRY(memcpy) 581 pushl %edi 582 pushl %esi 583 movl 12(%esp),%edi 584 movl 16(%esp),%esi 585 movl 20(%esp),%ecx 586 movl %edi,%eax 587 shrl $2,%ecx /* copy by 32-bit words */ 588 cld /* nope, copy forwards */ 589 rep 590 movsl 591 movl 20(%esp),%ecx 592 andl $3,%ecx /* any bytes left? */ 593 rep 594 movsb 595 popl %esi 596 popl %edi 597 ret 598 599 600/*****************************************************************************/ 601/* copyout and fubyte family */ 602/*****************************************************************************/ 603/* 604 * Access user memory from inside the kernel. These routines and possibly 605 * the math- and DOS emulators should be the only places that do this. 606 * 607 * We have to access the memory with user's permissions, so use a segment 608 * selector with RPL 3. For writes to user space we have to additionally 609 * check the PTE for write permission, because the 386 does not check 610 * write permissions when we are executing with EPL 0. The 486 does check 611 * this if the WP bit is set in CR0, so we can use a simpler version here. 612 * 613 * These routines set curpcb->onfault for the time they execute. When a 614 * protection violation occurs inside the functions, the trap handler 615 * returns to *curpcb->onfault instead of the function. 616 */ 617 618/* copyout(from_kernel, to_user, len) */ 619ENTRY(copyout) 620 MEXITCOUNT 621 jmp *_copyout_vector 622 623ENTRY(generic_copyout) 624 movl _curpcb,%eax 625 movl $copyout_fault,PCB_ONFAULT(%eax) 626 pushl %esi 627 pushl %edi 628 pushl %ebx 629 movl 16(%esp),%esi 630 movl 20(%esp),%edi 631 movl 24(%esp),%ebx 632 testl %ebx,%ebx /* anything to do? */ 633 jz done_copyout 634 635 /* 636 * Check explicitly for non-user addresses. If 486 write protection 637 * is being used, this check is essential because we are in kernel 638 * mode so the h/w does not provide any protection against writing 639 * kernel addresses. 640 */ 641 642 /* 643 * First, prevent address wrapping. 644 */ 645 movl %edi,%eax 646 addl %ebx,%eax 647 jc copyout_fault 648/* 649 * XXX STOP USING VM_MAXUSER_ADDRESS. 650 * It is an end address, not a max, so every time it is used correctly it 651 * looks like there is an off by one error, and of course it caused an off 652 * by one error in several places. 653 */ 654 cmpl $VM_MAXUSER_ADDRESS,%eax 655 ja copyout_fault 656 657#if defined(I386_CPU) 658 659#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 660 cmpl $CPUCLASS_386,_cpu_class 661 jne 3f 662#endif 663/* 664 * We have to check each PTE for user write permission. 665 * The checking may cause a page fault, so it is important to set 666 * up everything for return via copyout_fault before here. 667 */ 668 /* compute number of pages */ 669 movl %edi,%ecx 670 andl $PAGE_MASK,%ecx 671 addl %ebx,%ecx 672 decl %ecx 673 shrl $IDXSHIFT+2,%ecx 674 incl %ecx 675 676 /* compute PTE offset for start address */ 677 movl %edi,%edx 678 shrl $IDXSHIFT,%edx 679 andb $0xfc,%dl 680 6811: 682 /* check PTE for each page */ 683 leal _PTmap(%edx),%eax 684 shrl $IDXSHIFT,%eax 685 andb $0xfc,%al 686 testb $PG_V,_PTmap(%eax) /* PTE page must be valid */ 687 je 4f 688 movb _PTmap(%edx),%al 689 andb $PG_V|PG_RW|PG_U,%al /* page must be valid and user writable */ 690 cmpb $PG_V|PG_RW|PG_U,%al 691 je 2f 692 6934: 694 /* simulate a trap */ 695 pushl %edx 696 pushl %ecx 697 shll $IDXSHIFT,%edx 698 pushl %edx 699 call _trapwrite /* trapwrite(addr) */ 700 popl %edx 701 popl %ecx 702 popl %edx 703 704 testl %eax,%eax /* if not ok, return EFAULT */ 705 jnz copyout_fault 706 7072: 708 addl $4,%edx 709 decl %ecx 710 jnz 1b /* check next page */ 711#endif /* I386_CPU */ 712 713 /* bcopy(%esi, %edi, %ebx) */ 7143: 715 movl %ebx,%ecx 716 717#if defined(I586_CPU) && NNPX > 0 718 ALIGN_TEXT 719slow_copyout: 720#endif 721 shrl $2,%ecx 722 cld 723 rep 724 movsl 725 movb %bl,%cl 726 andb $3,%cl 727 rep 728 movsb 729 730done_copyout: 731 popl %ebx 732 popl %edi 733 popl %esi 734 xorl %eax,%eax 735 movl _curpcb,%edx 736 movl %eax,PCB_ONFAULT(%edx) 737 ret 738 739 ALIGN_TEXT 740copyout_fault: 741 popl %ebx 742 popl %edi 743 popl %esi 744 movl _curpcb,%edx 745 movl $0,PCB_ONFAULT(%edx) 746 movl $EFAULT,%eax 747 ret 748 749#if defined(I586_CPU) && NNPX > 0 750ENTRY(i586_copyout) 751 /* 752 * Duplicated from generic_copyout. Could be done a bit better. 753 */ 754 movl _curpcb,%eax 755 movl $copyout_fault,PCB_ONFAULT(%eax) 756 pushl %esi 757 pushl %edi 758 pushl %ebx 759 movl 16(%esp),%esi 760 movl 20(%esp),%edi 761 movl 24(%esp),%ebx 762 testl %ebx,%ebx /* anything to do? */ 763 jz done_copyout 764 765 /* 766 * Check explicitly for non-user addresses. If 486 write protection 767 * is being used, this check is essential because we are in kernel 768 * mode so the h/w does not provide any protection against writing 769 * kernel addresses. 770 */ 771 772 /* 773 * First, prevent address wrapping. 774 */ 775 movl %edi,%eax 776 addl %ebx,%eax 777 jc copyout_fault 778/* 779 * XXX STOP USING VM_MAXUSER_ADDRESS. 780 * It is an end address, not a max, so every time it is used correctly it 781 * looks like there is an off by one error, and of course it caused an off 782 * by one error in several places. 783 */ 784 cmpl $VM_MAXUSER_ADDRESS,%eax 785 ja copyout_fault 786 787 /* bcopy(%esi, %edi, %ebx) */ 7883: 789 movl %ebx,%ecx 790 /* 791 * End of duplicated code. 792 */ 793 794 cmpl $1024,%ecx 795 jb slow_copyout 796 797 pushl %ecx 798 call _fastmove 799 addl $4,%esp 800 jmp done_copyout 801#endif /* I586_CPU && NNPX > 0 */ 802 803/* copyin(from_user, to_kernel, len) */ 804ENTRY(copyin) 805 MEXITCOUNT 806 jmp *_copyin_vector 807 808ENTRY(generic_copyin) 809 movl _curpcb,%eax 810 movl $copyin_fault,PCB_ONFAULT(%eax) 811 pushl %esi 812 pushl %edi 813 movl 12(%esp),%esi /* caddr_t from */ 814 movl 16(%esp),%edi /* caddr_t to */ 815 movl 20(%esp),%ecx /* size_t len */ 816 817 /* 818 * make sure address is valid 819 */ 820 movl %esi,%edx 821 addl %ecx,%edx 822 jc copyin_fault 823 cmpl $VM_MAXUSER_ADDRESS,%edx 824 ja copyin_fault 825 826#if defined(I586_CPU) && NNPX > 0 827 ALIGN_TEXT 828slow_copyin: 829#endif 830 movb %cl,%al 831 shrl $2,%ecx /* copy longword-wise */ 832 cld 833 rep 834 movsl 835 movb %al,%cl 836 andb $3,%cl /* copy remaining bytes */ 837 rep 838 movsb 839 840#if defined(I586_CPU) && NNPX > 0 841 ALIGN_TEXT 842done_copyin: 843#endif 844 popl %edi 845 popl %esi 846 xorl %eax,%eax 847 movl _curpcb,%edx 848 movl %eax,PCB_ONFAULT(%edx) 849 ret 850 851 ALIGN_TEXT 852copyin_fault: 853 popl %edi 854 popl %esi 855 movl _curpcb,%edx 856 movl $0,PCB_ONFAULT(%edx) 857 movl $EFAULT,%eax 858 ret 859 860#if defined(I586_CPU) && NNPX > 0 861ENTRY(i586_copyin) 862 /* 863 * Duplicated from generic_copyin. Could be done a bit better. 864 */ 865 movl _curpcb,%eax 866 movl $copyin_fault,PCB_ONFAULT(%eax) 867 pushl %esi 868 pushl %edi 869 movl 12(%esp),%esi /* caddr_t from */ 870 movl 16(%esp),%edi /* caddr_t to */ 871 movl 20(%esp),%ecx /* size_t len */ 872 873 /* 874 * make sure address is valid 875 */ 876 movl %esi,%edx 877 addl %ecx,%edx 878 jc copyin_fault 879 cmpl $VM_MAXUSER_ADDRESS,%edx 880 ja copyin_fault 881 /* 882 * End of duplicated code. 883 */ 884 885 cmpl $1024,%ecx 886 jb slow_copyin 887 888 pushl %ebx /* XXX prepare for fastmove_fault */ 889 pushl %ecx 890 call _fastmove 891 addl $8,%esp 892 jmp done_copyin 893#endif /* I586_CPU && NNPX > 0 */ 894 895#if defined(I586_CPU) && NNPX > 0 896/* fastmove(src, dst, len) 897 src in %esi 898 dst in %edi 899 len in %ecx XXX changed to on stack for profiling 900 uses %eax and %edx for tmp. storage 901 */ 902/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 903ENTRY(fastmove) 904 pushl %ebp 905 movl %esp,%ebp 906 subl $PCB_SAVEFPU_SIZE+3*4,%esp 907 908 movl 8(%ebp),%ecx 909 cmpl $63,%ecx 910 jbe fastmove_tail 911 912 testl $7,%esi /* check if src addr is multiple of 8 */ 913 jnz fastmove_tail 914 915 testl $7,%edi /* check if dst addr is multiple of 8 */ 916 jnz fastmove_tail 917 918/* if (npxproc != NULL) { */ 919 cmpl $0,_npxproc 920 je 6f 921/* fnsave(&curpcb->pcb_savefpu); */ 922 movl _curpcb,%eax 923 fnsave PCB_SAVEFPU(%eax) 924/* npxproc = NULL; */ 925 movl $0,_npxproc 926/* } */ 9276: 928/* now we own the FPU. */ 929 930/* 931 * The process' FP state is saved in the pcb, but if we get 932 * switched, the cpu_switch() will store our FP state in the 933 * pcb. It should be possible to avoid all the copying for 934 * this, e.g., by setting a flag to tell cpu_switch() to 935 * save the state somewhere else. 936 */ 937/* tmp = curpcb->pcb_savefpu; */ 938 movl %ecx,-12(%ebp) 939 movl %esi,-8(%ebp) 940 movl %edi,-4(%ebp) 941 movl %esp,%edi 942 movl _curpcb,%esi 943 addl $PCB_SAVEFPU,%esi 944 cld 945 movl $PCB_SAVEFPU_SIZE>>2,%ecx 946 rep 947 movsl 948 movl -12(%ebp),%ecx 949 movl -8(%ebp),%esi 950 movl -4(%ebp),%edi 951/* stop_emulating(); */ 952 clts 953/* npxproc = curproc; */ 954 movl _curproc,%eax 955 movl %eax,_npxproc 956 movl _curpcb,%eax 957 movl $fastmove_fault,PCB_ONFAULT(%eax) 9584: 959 movl %ecx,-12(%ebp) 960 cmpl $1792,%ecx 961 jbe 2f 962 movl $1792,%ecx 9632: 964 subl %ecx,-12(%ebp) 965 cmpl $256,%ecx 966 jb 5f 967 movl %ecx,-8(%ebp) 968 movl %esi,-4(%ebp) 969 ALIGN_TEXT 9703: 971 movl 0(%esi),%eax 972 movl 32(%esi),%eax 973 movl 64(%esi),%eax 974 movl 96(%esi),%eax 975 movl 128(%esi),%eax 976 movl 160(%esi),%eax 977 movl 192(%esi),%eax 978 movl 224(%esi),%eax 979 addl $256,%esi 980 subl $256,%ecx 981 cmpl $256,%ecx 982 jae 3b 983 movl -8(%ebp),%ecx 984 movl -4(%ebp),%esi 9855: 986 ALIGN_TEXT 987fastmove_loop: 988 fildq 0(%esi) 989 fildq 8(%esi) 990 fildq 16(%esi) 991 fildq 24(%esi) 992 fildq 32(%esi) 993 fildq 40(%esi) 994 fildq 48(%esi) 995 fildq 56(%esi) 996 fistpq 56(%edi) 997 fistpq 48(%edi) 998 fistpq 40(%edi) 999 fistpq 32(%edi) 1000 fistpq 24(%edi) 1001 fistpq 16(%edi) 1002 fistpq 8(%edi) 1003 fistpq 0(%edi) 1004 addl $-64,%ecx 1005 addl $64,%esi 1006 addl $64,%edi 1007 cmpl $63,%ecx 1008 ja fastmove_loop 1009 movl -12(%ebp),%eax 1010 addl %eax,%ecx 1011 cmpl $64,%ecx 1012 jae 4b 1013 1014/* curpcb->pcb_savefpu = tmp; */ 1015 movl %ecx,-12(%ebp) 1016 movl %esi,-8(%ebp) 1017 movl %edi,-4(%ebp) 1018 movl _curpcb,%edi 1019 addl $PCB_SAVEFPU,%edi 1020 movl %esp,%esi 1021 cld 1022 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1023 rep 1024 movsl 1025 movl -12(%ebp),%ecx 1026 movl -8(%ebp),%esi 1027 movl -4(%ebp),%edi 1028 1029/* start_emulating(); */ 1030 smsw %ax 1031 orb $CR0_TS,%al 1032 lmsw %ax 1033/* npxproc = NULL; */ 1034 movl $0,_npxproc 1035 1036 ALIGN_TEXT 1037fastmove_tail: 1038 movl _curpcb,%eax 1039 movl $fastmove_tail_fault,PCB_ONFAULT(%eax) 1040 1041 movb %cl,%al 1042 shrl $2,%ecx /* copy longword-wise */ 1043 cld 1044 rep 1045 movsl 1046 movb %al,%cl 1047 andb $3,%cl /* copy remaining bytes */ 1048 rep 1049 movsb 1050 1051 movl %ebp,%esp 1052 popl %ebp 1053 ret 1054 1055 ALIGN_TEXT 1056fastmove_fault: 1057 movl _curpcb,%edi 1058 addl $PCB_SAVEFPU,%edi 1059 movl %esp,%esi 1060 cld 1061 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1062 rep 1063 movsl 1064 1065 smsw %ax 1066 orb $CR0_TS,%al 1067 lmsw %ax 1068 movl $0,_npxproc 1069 1070fastmove_tail_fault: 1071 movl %ebp,%esp 1072 popl %ebp 1073 addl $8,%esp 1074 popl %ebx 1075 popl %edi 1076 popl %esi 1077 movl _curpcb,%edx 1078 movl $0,PCB_ONFAULT(%edx) 1079 movl $EFAULT,%eax 1080 ret 1081#endif /* I586_CPU && NNPX > 0 */ 1082 1083/* 1084 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory 1085 */ 1086ENTRY(fuword) 1087 movl _curpcb,%ecx 1088 movl $fusufault,PCB_ONFAULT(%ecx) 1089 movl 4(%esp),%edx /* from */ 1090 1091 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 1092 ja fusufault 1093 1094 movl (%edx),%eax 1095 movl $0,PCB_ONFAULT(%ecx) 1096 ret 1097 1098/* 1099 * These two routines are called from the profiling code, potentially 1100 * at interrupt time. If they fail, that's okay, good things will 1101 * happen later. Fail all the time for now - until the trap code is 1102 * able to deal with this. 1103 */ 1104ALTENTRY(suswintr) 1105ENTRY(fuswintr) 1106 movl $-1,%eax 1107 ret 1108 1109ENTRY(fusword) 1110 movl _curpcb,%ecx 1111 movl $fusufault,PCB_ONFAULT(%ecx) 1112 movl 4(%esp),%edx 1113 1114 cmpl $VM_MAXUSER_ADDRESS-2,%edx 1115 ja fusufault 1116 1117 movzwl (%edx),%eax 1118 movl $0,PCB_ONFAULT(%ecx) 1119 ret 1120 1121ENTRY(fubyte) 1122 movl _curpcb,%ecx 1123 movl $fusufault,PCB_ONFAULT(%ecx) 1124 movl 4(%esp),%edx 1125 1126 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1127 ja fusufault 1128 1129 movzbl (%edx),%eax 1130 movl $0,PCB_ONFAULT(%ecx) 1131 ret 1132 1133 ALIGN_TEXT 1134fusufault: 1135 movl _curpcb,%ecx 1136 xorl %eax,%eax 1137 movl %eax,PCB_ONFAULT(%ecx) 1138 decl %eax 1139 ret 1140 1141/* 1142 * su{byte,sword,word}: write a byte (word, longword) to user memory 1143 */ 1144ENTRY(suword) 1145 movl _curpcb,%ecx 1146 movl $fusufault,PCB_ONFAULT(%ecx) 1147 movl 4(%esp),%edx 1148 1149#if defined(I386_CPU) 1150 1151#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1152 cmpl $CPUCLASS_386,_cpu_class 1153 jne 2f /* we only have to set the right segment selector */ 1154#endif /* I486_CPU || I586_CPU || I686_CPU */ 1155 1156 /* XXX - page boundary crossing is still not handled */ 1157 movl %edx,%eax 1158 shrl $IDXSHIFT,%edx 1159 andb $0xfc,%dl 1160 1161 leal _PTmap(%edx),%ecx 1162 shrl $IDXSHIFT,%ecx 1163 andb $0xfc,%cl 1164 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1165 je 4f 1166 movb _PTmap(%edx),%dl 1167 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1168 cmpb $PG_V|PG_RW|PG_U,%dl 1169 je 1f 1170 11714: 1172 /* simulate a trap */ 1173 pushl %eax 1174 call _trapwrite 1175 popl %edx /* remove junk parameter from stack */ 1176 testl %eax,%eax 1177 jnz fusufault 11781: 1179 movl 4(%esp),%edx 1180#endif 1181 11822: 1183 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1184 ja fusufault 1185 1186 movl 8(%esp),%eax 1187 movl %eax,(%edx) 1188 xorl %eax,%eax 1189 movl _curpcb,%ecx 1190 movl %eax,PCB_ONFAULT(%ecx) 1191 ret 1192 1193ENTRY(susword) 1194 movl _curpcb,%ecx 1195 movl $fusufault,PCB_ONFAULT(%ecx) 1196 movl 4(%esp),%edx 1197 1198#if defined(I386_CPU) 1199 1200#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1201 cmpl $CPUCLASS_386,_cpu_class 1202 jne 2f 1203#endif /* I486_CPU || I586_CPU || I686_CPU */ 1204 1205 /* XXX - page boundary crossing is still not handled */ 1206 movl %edx,%eax 1207 shrl $IDXSHIFT,%edx 1208 andb $0xfc,%dl 1209 1210 leal _PTmap(%edx),%ecx 1211 shrl $IDXSHIFT,%ecx 1212 andb $0xfc,%cl 1213 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1214 je 4f 1215 movb _PTmap(%edx),%dl 1216 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1217 cmpb $PG_V|PG_RW|PG_U,%dl 1218 je 1f 1219 12204: 1221 /* simulate a trap */ 1222 pushl %eax 1223 call _trapwrite 1224 popl %edx /* remove junk parameter from stack */ 1225 testl %eax,%eax 1226 jnz fusufault 12271: 1228 movl 4(%esp),%edx 1229#endif 1230 12312: 1232 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1233 ja fusufault 1234 1235 movw 8(%esp),%ax 1236 movw %ax,(%edx) 1237 xorl %eax,%eax 1238 movl _curpcb,%ecx /* restore trashed register */ 1239 movl %eax,PCB_ONFAULT(%ecx) 1240 ret 1241 1242ALTENTRY(suibyte) 1243ENTRY(subyte) 1244 movl _curpcb,%ecx 1245 movl $fusufault,PCB_ONFAULT(%ecx) 1246 movl 4(%esp),%edx 1247 1248#if defined(I386_CPU) 1249 1250#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1251 cmpl $CPUCLASS_386,_cpu_class 1252 jne 2f 1253#endif /* I486_CPU || I586_CPU || I686_CPU */ 1254 1255 movl %edx,%eax 1256 shrl $IDXSHIFT,%edx 1257 andb $0xfc,%dl 1258 1259 leal _PTmap(%edx),%ecx 1260 shrl $IDXSHIFT,%ecx 1261 andb $0xfc,%cl 1262 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1263 je 4f 1264 movb _PTmap(%edx),%dl 1265 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1266 cmpb $PG_V|PG_RW|PG_U,%dl 1267 je 1f 1268 12694: 1270 /* simulate a trap */ 1271 pushl %eax 1272 call _trapwrite 1273 popl %edx /* remove junk parameter from stack */ 1274 testl %eax,%eax 1275 jnz fusufault 12761: 1277 movl 4(%esp),%edx 1278#endif 1279 12802: 1281 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1282 ja fusufault 1283 1284 movb 8(%esp),%al 1285 movb %al,(%edx) 1286 xorl %eax,%eax 1287 movl _curpcb,%ecx /* restore trashed register */ 1288 movl %eax,PCB_ONFAULT(%ecx) 1289 ret 1290 1291/* 1292 * copyinstr(from, to, maxlen, int *lencopied) 1293 * copy a string from from to to, stop when a 0 character is reached. 1294 * return ENAMETOOLONG if string is longer than maxlen, and 1295 * EFAULT on protection violations. If lencopied is non-zero, 1296 * return the actual length in *lencopied. 1297 */ 1298ENTRY(copyinstr) 1299 pushl %esi 1300 pushl %edi 1301 movl _curpcb,%ecx 1302 movl $cpystrflt,PCB_ONFAULT(%ecx) 1303 1304 movl 12(%esp),%esi /* %esi = from */ 1305 movl 16(%esp),%edi /* %edi = to */ 1306 movl 20(%esp),%edx /* %edx = maxlen */ 1307 1308 movl $VM_MAXUSER_ADDRESS,%eax 1309 1310 /* make sure 'from' is within bounds */ 1311 subl %esi,%eax 1312 jbe cpystrflt 1313 1314 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1315 cmpl %edx,%eax 1316 jae 1f 1317 movl %eax,%edx 1318 movl %eax,20(%esp) 13191: 1320 incl %edx 1321 cld 1322 13232: 1324 decl %edx 1325 jz 3f 1326 1327 lodsb 1328 stosb 1329 orb %al,%al 1330 jnz 2b 1331 1332 /* Success -- 0 byte reached */ 1333 decl %edx 1334 xorl %eax,%eax 1335 jmp cpystrflt_x 13363: 1337 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1338 cmpl $VM_MAXUSER_ADDRESS,%esi 1339 jae cpystrflt 13404: 1341 movl $ENAMETOOLONG,%eax 1342 jmp cpystrflt_x 1343 1344cpystrflt: 1345 movl $EFAULT,%eax 1346 1347cpystrflt_x: 1348 /* set *lencopied and return %eax */ 1349 movl _curpcb,%ecx 1350 movl $0,PCB_ONFAULT(%ecx) 1351 movl 20(%esp),%ecx 1352 subl %edx,%ecx 1353 movl 24(%esp),%edx 1354 testl %edx,%edx 1355 jz 1f 1356 movl %ecx,(%edx) 13571: 1358 popl %edi 1359 popl %esi 1360 ret 1361 1362 1363/* 1364 * copystr(from, to, maxlen, int *lencopied) 1365 */ 1366ENTRY(copystr) 1367 pushl %esi 1368 pushl %edi 1369 1370 movl 12(%esp),%esi /* %esi = from */ 1371 movl 16(%esp),%edi /* %edi = to */ 1372 movl 20(%esp),%edx /* %edx = maxlen */ 1373 incl %edx 1374 cld 13751: 1376 decl %edx 1377 jz 4f 1378 lodsb 1379 stosb 1380 orb %al,%al 1381 jnz 1b 1382 1383 /* Success -- 0 byte reached */ 1384 decl %edx 1385 xorl %eax,%eax 1386 jmp 6f 13874: 1388 /* edx is zero -- return ENAMETOOLONG */ 1389 movl $ENAMETOOLONG,%eax 1390 13916: 1392 /* set *lencopied and return %eax */ 1393 movl 20(%esp),%ecx 1394 subl %edx,%ecx 1395 movl 24(%esp),%edx 1396 testl %edx,%edx 1397 jz 7f 1398 movl %ecx,(%edx) 13997: 1400 popl %edi 1401 popl %esi 1402 ret 1403 1404ENTRY(bcmp) 1405 pushl %edi 1406 pushl %esi 1407 movl 12(%esp),%edi 1408 movl 16(%esp),%esi 1409 movl 20(%esp),%edx 1410 xorl %eax,%eax 1411 1412 movl %edx,%ecx 1413 shrl $2,%ecx 1414 cld /* compare forwards */ 1415 repe 1416 cmpsl 1417 jne 1f 1418 1419 movl %edx,%ecx 1420 andl $3,%ecx 1421 repe 1422 cmpsb 1423 je 2f 14241: 1425 incl %eax 14262: 1427 popl %esi 1428 popl %edi 1429 ret 1430 1431 1432/* 1433 * Handling of special 386 registers and descriptor tables etc 1434 */ 1435/* void lgdt(struct region_descriptor *rdp); */ 1436ENTRY(lgdt) 1437 /* reload the descriptor table */ 1438 movl 4(%esp),%eax 1439 lgdt (%eax) 1440 1441 /* flush the prefetch q */ 1442 jmp 1f 1443 nop 14441: 1445 /* reload "stale" selectors */ 1446 movl $KDSEL,%eax 1447 movl %ax,%ds 1448 movl %ax,%es 1449 movl %ax,%fs 1450 movl %ax,%gs 1451 movl %ax,%ss 1452 1453 /* reload code selector by turning return into intersegmental return */ 1454 movl (%esp),%eax 1455 pushl %eax 1456 movl $KCSEL,4(%esp) 1457 lret 1458 1459/* 1460 * void lidt(struct region_descriptor *rdp); 1461 */ 1462ENTRY(lidt) 1463 movl 4(%esp),%eax 1464 lidt (%eax) 1465 ret 1466 1467/* 1468 * void lldt(u_short sel) 1469 */ 1470ENTRY(lldt) 1471 lldt 4(%esp) 1472 ret 1473 1474/* 1475 * void ltr(u_short sel) 1476 */ 1477ENTRY(ltr) 1478 ltr 4(%esp) 1479 ret 1480 1481/* ssdtosd(*ssdp,*sdp) */ 1482ENTRY(ssdtosd) 1483 pushl %ebx 1484 movl 8(%esp),%ecx 1485 movl 8(%ecx),%ebx 1486 shll $16,%ebx 1487 movl (%ecx),%edx 1488 roll $16,%edx 1489 movb %dh,%bl 1490 movb %dl,%bh 1491 rorl $8,%ebx 1492 movl 4(%ecx),%eax 1493 movw %ax,%dx 1494 andl $0xf0000,%eax 1495 orl %eax,%ebx 1496 movl 12(%esp),%ecx 1497 movl %edx,(%ecx) 1498 movl %ebx,4(%ecx) 1499 popl %ebx 1500 ret 1501 1502/* load_cr0(cr0) */ 1503ENTRY(load_cr0) 1504 movl 4(%esp),%eax 1505 movl %eax,%cr0 1506 ret 1507 1508/* rcr0() */ 1509ENTRY(rcr0) 1510 movl %cr0,%eax 1511 ret 1512 1513/* rcr3() */ 1514ENTRY(rcr3) 1515 movl %cr3,%eax 1516 ret 1517 1518/* void load_cr3(caddr_t cr3) */ 1519ENTRY(load_cr3) 1520 movl 4(%esp),%eax 1521 movl %eax,%cr3 1522 ret 1523 1524/* rcr4() */ 1525ENTRY(rcr4) 1526 movl %cr4,%eax 1527 ret 1528 1529/* void load_cr4(caddr_t cr4) */ 1530ENTRY(load_cr4) 1531 movl 4(%esp),%eax 1532 movl %eax,%cr4 1533 ret 1534 1535/*****************************************************************************/ 1536/* setjump, longjump */ 1537/*****************************************************************************/ 1538 1539ENTRY(setjmp) 1540 movl 4(%esp),%eax 1541 movl %ebx,(%eax) /* save ebx */ 1542 movl %esp,4(%eax) /* save esp */ 1543 movl %ebp,8(%eax) /* save ebp */ 1544 movl %esi,12(%eax) /* save esi */ 1545 movl %edi,16(%eax) /* save edi */ 1546 movl (%esp),%edx /* get rta */ 1547 movl %edx,20(%eax) /* save eip */ 1548 xorl %eax,%eax /* return(0); */ 1549 ret 1550 1551ENTRY(longjmp) 1552 movl 4(%esp),%eax 1553 movl (%eax),%ebx /* restore ebx */ 1554 movl 4(%eax),%esp /* restore esp */ 1555 movl 8(%eax),%ebp /* restore ebp */ 1556 movl 12(%eax),%esi /* restore esi */ 1557 movl 16(%eax),%edi /* restore edi */ 1558 movl 20(%eax),%edx /* get rta */ 1559 movl %edx,(%esp) /* put in return frame */ 1560 xorl %eax,%eax /* return(1); */ 1561 incl %eax 1562 ret 1563 1564/* 1565 * Here for doing BB-profiling (gcc -a). 1566 * We rely on the "bbset" instead, but need a dummy function. 1567 */ 1568NON_GPROF_ENTRY(__bb_init_func) 1569 movl 4(%esp),%eax 1570 movl $1,(%eax) 1571 .byte 0xc3 /* avoid macro for `ret' */ 1572