support.s revision 26812
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $Id: support.s,v 1.53 1997/05/29 05:11:10 peter Exp $ 34 */ 35 36#include "npx.h" 37#include "opt_cpu.h" 38 39#include <machine/asmacros.h> 40#include <machine/cputypes.h> 41#include <machine/pmap.h> 42#include <machine/specialreg.h> 43 44#include "assym.s" 45 46#define KDSEL 0x10 /* kernel data selector */ 47#define KCSEL 0x8 /* kernel code selector */ 48#define IDXSHIFT 10 49 50 .data 51 .globl _bcopy_vector 52_bcopy_vector: 53 .long _generic_bcopy 54 .globl _bzero 55_bzero: 56 .long _generic_bzero 57 .globl _copyin_vector 58_copyin_vector: 59 .long _generic_copyin 60 .globl _copyout_vector 61_copyout_vector: 62 .long _generic_copyout 63 .globl _ovbcopy_vector 64_ovbcopy_vector: 65 .long _generic_bcopy 66#if defined(I586_CPU) && NNPX > 0 67kernel_fpu_lock: 68 .byte 0xfe 69 .space 3 70#endif 71 72 .text 73 74/* 75 * bcopy family 76 * void bzero(void *buf, u_int len) 77 */ 78 79ENTRY(generic_bzero) 80 pushl %edi 81 movl 8(%esp),%edi 82 movl 12(%esp),%ecx 83 xorl %eax,%eax 84 shrl $2,%ecx 85 cld 86 rep 87 stosl 88 movl 12(%esp),%ecx 89 andl $3,%ecx 90 rep 91 stosb 92 popl %edi 93 ret 94 95#if defined(I486_CPU) 96ENTRY(i486_bzero) 97 movl 4(%esp),%edx 98 movl 8(%esp),%ecx 99 xorl %eax,%eax 100/* 101 * do 64 byte chunks first 102 * 103 * XXX this is probably over-unrolled at least for DX2's 104 */ 1052: 106 cmpl $64,%ecx 107 jb 3f 108 movl %eax,(%edx) 109 movl %eax,4(%edx) 110 movl %eax,8(%edx) 111 movl %eax,12(%edx) 112 movl %eax,16(%edx) 113 movl %eax,20(%edx) 114 movl %eax,24(%edx) 115 movl %eax,28(%edx) 116 movl %eax,32(%edx) 117 movl %eax,36(%edx) 118 movl %eax,40(%edx) 119 movl %eax,44(%edx) 120 movl %eax,48(%edx) 121 movl %eax,52(%edx) 122 movl %eax,56(%edx) 123 movl %eax,60(%edx) 124 addl $64,%edx 125 subl $64,%ecx 126 jnz 2b 127 ret 128 129/* 130 * do 16 byte chunks 131 */ 132 SUPERALIGN_TEXT 1333: 134 cmpl $16,%ecx 135 jb 4f 136 movl %eax,(%edx) 137 movl %eax,4(%edx) 138 movl %eax,8(%edx) 139 movl %eax,12(%edx) 140 addl $16,%edx 141 subl $16,%ecx 142 jnz 3b 143 ret 144 145/* 146 * do 4 byte chunks 147 */ 148 SUPERALIGN_TEXT 1494: 150 cmpl $4,%ecx 151 jb 5f 152 movl %eax,(%edx) 153 addl $4,%edx 154 subl $4,%ecx 155 jnz 4b 156 ret 157 158/* 159 * do 1 byte chunks 160 * a jump table seems to be faster than a loop or more range reductions 161 * 162 * XXX need a const section for non-text 163 */ 164 .data 165jtab: 166 .long do0 167 .long do1 168 .long do2 169 .long do3 170 171 .text 172 SUPERALIGN_TEXT 1735: 174 jmp jtab(,%ecx,4) 175 176 SUPERALIGN_TEXT 177do3: 178 movw %ax,(%edx) 179 movb %al,2(%edx) 180 ret 181 182 SUPERALIGN_TEXT 183do2: 184 movw %ax,(%edx) 185 ret 186 187 SUPERALIGN_TEXT 188do1: 189 movb %al,(%edx) 190 ret 191 192 SUPERALIGN_TEXT 193do0: 194 ret 195#endif 196 197#if defined(I586_CPU) && NNPX > 0 198ENTRY(i586_bzero) 199 movl 4(%esp),%edx 200 movl 8(%esp),%ecx 201 202 /* 203 * The FPU register method is twice as fast as the integer register 204 * method unless the target is in the L1 cache and we pre-allocate a 205 * cache line for it (then the integer register method is 4-5 times 206 * faster). However, we never pre-allocate cache lines, since that 207 * would make the integer method 25% or more slower for the common 208 * case when the target isn't in either the L1 cache or the L2 cache. 209 * Thus we normally use the FPU register method unless the overhead 210 * would be too large. 211 */ 212 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 213 jb intreg_i586_bzero 214 215 /* 216 * The FPU registers may belong to an application or to fastmove() 217 * or to another invocation of bcopy() or ourself in a higher level 218 * interrupt or trap handler. Preserving the registers is 219 * complicated since we avoid it if possible at all levels. We 220 * want to localize the complications even when that increases them. 221 * Here the extra work involves preserving CR0_TS in TS. 222 * `npxproc != NULL' is supposed to be the condition that all the 223 * FPU resources belong to an application, but npxproc and CR0_TS 224 * aren't set atomically enough for this condition to work in 225 * interrupt handlers. 226 * 227 * Case 1: FPU registers belong to the application: we must preserve 228 * the registers if we use them, so we only use the FPU register 229 * method if the target size is large enough to amortize the extra 230 * overhead for preserving them. CR0_TS must be preserved although 231 * it is very likely to end up as set. 232 * 233 * Case 2: FPU registers belong to fastmove(): fastmove() currently 234 * makes the registers look like they belong to an application so 235 * that cpu_switch() and savectx() don't have to know about it, so 236 * this case reduces to case 1. 237 * 238 * Case 3: FPU registers belong to the kernel: don't use the FPU 239 * register method. This case is unlikely, and supporting it would 240 * be more complicated and might take too much stack. 241 * 242 * Case 4: FPU registers don't belong to anyone: the FPU registers 243 * don't need to be preserved, so we always use the FPU register 244 * method. CR0_TS must be preserved although it is very likely to 245 * always end up as clear. 246 */ 247 cmpl $0,_npxproc 248 je i586_bz1 249 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 250 jb intreg_i586_bzero 251 sarb $1,kernel_fpu_lock 252 jc intreg_i586_bzero 253 smsw %ax 254 clts 255 subl $108,%esp 256 fnsave 0(%esp) 257 jmp i586_bz2 258 259i586_bz1: 260 sarb $1,kernel_fpu_lock 261 jc intreg_i586_bzero 262 smsw %ax 263 clts 264 fninit /* XXX should avoid needing this */ 265i586_bz2: 266 fldz 267 268 /* 269 * Align to an 8 byte boundary (misalignment in the main loop would 270 * cost a factor of >= 2). Avoid jumps (at little cost if it is 271 * already aligned) by always zeroing 8 bytes and using the part up 272 * to the _next_ alignment position. 273 */ 274 fstl 0(%edx) 275 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 276 addl $8,%edx 277 andl $~7,%edx 278 subl %edx,%ecx 279 280 /* 281 * Similarly align `len' to a multiple of 8. 282 */ 283 fstl -8(%edx,%ecx) 284 decl %ecx 285 andl $~7,%ecx 286 287 /* 288 * This wouldn't be any faster if it were unrolled, since the loop 289 * control instructions are much faster than the fstl and/or done 290 * in parallel with it so their overhead is insignificant. 291 */ 292fpureg_i586_bzero_loop: 293 fstl 0(%edx) 294 addl $8,%edx 295 subl $8,%ecx 296 cmpl $8,%ecx 297 jae fpureg_i586_bzero_loop 298 299 cmpl $0,_npxproc 300 je i586_bz3 301 frstor 0(%esp) 302 addl $108,%esp 303 lmsw %ax 304 movb $0xfe,kernel_fpu_lock 305 ret 306 307i586_bz3: 308 fstpl %st(0) 309 lmsw %ax 310 movb $0xfe,kernel_fpu_lock 311 ret 312 313intreg_i586_bzero: 314 /* 315 * `rep stos' seems to be the best method in practice for small 316 * counts. Fancy methods usually take too long to start up due 317 * to cache and BTB misses. 318 */ 319 pushl %edi 320 movl %edx,%edi 321 xorl %eax,%eax 322 shrl $2,%ecx 323 cld 324 rep 325 stosl 326 movl 12(%esp),%ecx 327 andl $3,%ecx 328 jne 1f 329 popl %edi 330 ret 331 3321: 333 rep 334 stosb 335 popl %edi 336 ret 337#endif /* I586_CPU && NNPX > 0 */ 338 339/* fillw(pat, base, cnt) */ 340ENTRY(fillw) 341 pushl %edi 342 movl 8(%esp),%eax 343 movl 12(%esp),%edi 344 movl 16(%esp),%ecx 345 cld 346 rep 347 stosw 348 popl %edi 349 ret 350 351ENTRY(bcopyb) 352bcopyb: 353 pushl %esi 354 pushl %edi 355 movl 12(%esp),%esi 356 movl 16(%esp),%edi 357 movl 20(%esp),%ecx 358 movl %edi,%eax 359 subl %esi,%eax 360 cmpl %ecx,%eax /* overlapping && src < dst? */ 361 jb 1f 362 cld /* nope, copy forwards */ 363 rep 364 movsb 365 popl %edi 366 popl %esi 367 ret 368 369 ALIGN_TEXT 3701: 371 addl %ecx,%edi /* copy backwards. */ 372 addl %ecx,%esi 373 decl %edi 374 decl %esi 375 std 376 rep 377 movsb 378 popl %edi 379 popl %esi 380 cld 381 ret 382 383ENTRY(bcopy) 384 MEXITCOUNT 385 jmp *_bcopy_vector 386 387ENTRY(ovbcopy) 388 MEXITCOUNT 389 jmp *_ovbcopy_vector 390 391/* 392 * generic_bcopy(src, dst, cnt) 393 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 394 */ 395ENTRY(generic_bcopy) 396 pushl %esi 397 pushl %edi 398 movl 12(%esp),%esi 399 movl 16(%esp),%edi 400 movl 20(%esp),%ecx 401 402 movl %edi,%eax 403 subl %esi,%eax 404 cmpl %ecx,%eax /* overlapping && src < dst? */ 405 jb 1f 406 407 shrl $2,%ecx /* copy by 32-bit words */ 408 cld /* nope, copy forwards */ 409 rep 410 movsl 411 movl 20(%esp),%ecx 412 andl $3,%ecx /* any bytes left? */ 413 rep 414 movsb 415 popl %edi 416 popl %esi 417 ret 418 419 ALIGN_TEXT 4201: 421 addl %ecx,%edi /* copy backwards */ 422 addl %ecx,%esi 423 decl %edi 424 decl %esi 425 andl $3,%ecx /* any fractional bytes? */ 426 std 427 rep 428 movsb 429 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 430 shrl $2,%ecx 431 subl $3,%esi 432 subl $3,%edi 433 rep 434 movsl 435 popl %edi 436 popl %esi 437 cld 438 ret 439 440#if defined(I586_CPU) && NNPX > 0 441ENTRY(i586_bcopy) 442 pushl %esi 443 pushl %edi 444 movl 12(%esp),%esi 445 movl 16(%esp),%edi 446 movl 20(%esp),%ecx 447 448 movl %edi,%eax 449 subl %esi,%eax 450 cmpl %ecx,%eax /* overlapping && src < dst? */ 451 jb 1f 452 453 cmpl $1024,%ecx 454 jb small_i586_bcopy 455 456 sarb $1,kernel_fpu_lock 457 jc small_i586_bcopy 458 cmpl $0,_npxproc 459 je i586_bc1 460 smsw %dx 461 clts 462 subl $108,%esp 463 fnsave 0(%esp) 464 jmp 4f 465 466i586_bc1: 467 smsw %dx 468 clts 469 fninit /* XXX should avoid needing this */ 470 471 ALIGN_TEXT 4724: 473 pushl %ecx 474#define DCACHE_SIZE 8192 475 cmpl $(DCACHE_SIZE-512)/2,%ecx 476 jbe 2f 477 movl $(DCACHE_SIZE-512)/2,%ecx 4782: 479 subl %ecx,0(%esp) 480 cmpl $256,%ecx 481 jb 5f /* XXX should prefetch if %ecx >= 32 */ 482 pushl %esi 483 pushl %ecx 484 ALIGN_TEXT 4853: 486 movl 0(%esi),%eax 487 movl 32(%esi),%eax 488 movl 64(%esi),%eax 489 movl 96(%esi),%eax 490 movl 128(%esi),%eax 491 movl 160(%esi),%eax 492 movl 192(%esi),%eax 493 movl 224(%esi),%eax 494 addl $256,%esi 495 subl $256,%ecx 496 cmpl $256,%ecx 497 jae 3b 498 popl %ecx 499 popl %esi 5005: 501 ALIGN_TEXT 502large_i586_bcopy_loop: 503 fildq 0(%esi) 504 fildq 8(%esi) 505 fildq 16(%esi) 506 fildq 24(%esi) 507 fildq 32(%esi) 508 fildq 40(%esi) 509 fildq 48(%esi) 510 fildq 56(%esi) 511 fistpq 56(%edi) 512 fistpq 48(%edi) 513 fistpq 40(%edi) 514 fistpq 32(%edi) 515 fistpq 24(%edi) 516 fistpq 16(%edi) 517 fistpq 8(%edi) 518 fistpq 0(%edi) 519 addl $64,%esi 520 addl $64,%edi 521 subl $64,%ecx 522 cmpl $64,%ecx 523 jae large_i586_bcopy_loop 524 popl %eax 525 addl %eax,%ecx 526 cmpl $64,%ecx 527 jae 4b 528 529 cmpl $0,_npxproc 530 je i586_bc2 531 frstor 0(%esp) 532 addl $108,%esp 533i586_bc2: 534 lmsw %dx 535 movb $0xfe,kernel_fpu_lock 536 537/* 538 * This is a duplicate of the main part of generic_bcopy. See the comments 539 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 540 * would mess up high resolution profiling. 541 */ 542 ALIGN_TEXT 543small_i586_bcopy: 544 shrl $2,%ecx 545 cld 546 rep 547 movsl 548 movl 20(%esp),%ecx 549 andl $3,%ecx 550 rep 551 movsb 552 popl %edi 553 popl %esi 554 ret 555 556 ALIGN_TEXT 5571: 558 addl %ecx,%edi 559 addl %ecx,%esi 560 decl %edi 561 decl %esi 562 andl $3,%ecx 563 std 564 rep 565 movsb 566 movl 20(%esp),%ecx 567 shrl $2,%ecx 568 subl $3,%esi 569 subl $3,%edi 570 rep 571 movsl 572 popl %edi 573 popl %esi 574 cld 575 ret 576#endif /* I586_CPU && NNPX > 0 */ 577 578/* 579 * Note: memcpy does not support overlapping copies 580 */ 581ENTRY(memcpy) 582 pushl %edi 583 pushl %esi 584 movl 12(%esp),%edi 585 movl 16(%esp),%esi 586 movl 20(%esp),%ecx 587 movl %edi,%eax 588 shrl $2,%ecx /* copy by 32-bit words */ 589 cld /* nope, copy forwards */ 590 rep 591 movsl 592 movl 20(%esp),%ecx 593 andl $3,%ecx /* any bytes left? */ 594 rep 595 movsb 596 popl %esi 597 popl %edi 598 ret 599 600 601/*****************************************************************************/ 602/* copyout and fubyte family */ 603/*****************************************************************************/ 604/* 605 * Access user memory from inside the kernel. These routines and possibly 606 * the math- and DOS emulators should be the only places that do this. 607 * 608 * We have to access the memory with user's permissions, so use a segment 609 * selector with RPL 3. For writes to user space we have to additionally 610 * check the PTE for write permission, because the 386 does not check 611 * write permissions when we are executing with EPL 0. The 486 does check 612 * this if the WP bit is set in CR0, so we can use a simpler version here. 613 * 614 * These routines set curpcb->onfault for the time they execute. When a 615 * protection violation occurs inside the functions, the trap handler 616 * returns to *curpcb->onfault instead of the function. 617 */ 618 619/* copyout(from_kernel, to_user, len) */ 620ENTRY(copyout) 621 MEXITCOUNT 622 jmp *_copyout_vector 623 624ENTRY(generic_copyout) 625 movl _curpcb,%eax 626 movl $copyout_fault,PCB_ONFAULT(%eax) 627 pushl %esi 628 pushl %edi 629 pushl %ebx 630 movl 16(%esp),%esi 631 movl 20(%esp),%edi 632 movl 24(%esp),%ebx 633 testl %ebx,%ebx /* anything to do? */ 634 jz done_copyout 635 636 /* 637 * Check explicitly for non-user addresses. If 486 write protection 638 * is being used, this check is essential because we are in kernel 639 * mode so the h/w does not provide any protection against writing 640 * kernel addresses. 641 */ 642 643 /* 644 * First, prevent address wrapping. 645 */ 646 movl %edi,%eax 647 addl %ebx,%eax 648 jc copyout_fault 649/* 650 * XXX STOP USING VM_MAXUSER_ADDRESS. 651 * It is an end address, not a max, so every time it is used correctly it 652 * looks like there is an off by one error, and of course it caused an off 653 * by one error in several places. 654 */ 655 cmpl $VM_MAXUSER_ADDRESS,%eax 656 ja copyout_fault 657 658#if defined(I386_CPU) 659 660#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 661 cmpl $CPUCLASS_386,_cpu_class 662 jne 3f 663#endif 664/* 665 * We have to check each PTE for user write permission. 666 * The checking may cause a page fault, so it is important to set 667 * up everything for return via copyout_fault before here. 668 */ 669 /* compute number of pages */ 670 movl %edi,%ecx 671 andl $PAGE_MASK,%ecx 672 addl %ebx,%ecx 673 decl %ecx 674 shrl $IDXSHIFT+2,%ecx 675 incl %ecx 676 677 /* compute PTE offset for start address */ 678 movl %edi,%edx 679 shrl $IDXSHIFT,%edx 680 andb $0xfc,%dl 681 6821: 683 /* check PTE for each page */ 684 leal _PTmap(%edx),%eax 685 shrl $IDXSHIFT,%eax 686 andb $0xfc,%al 687 testb $PG_V,_PTmap(%eax) /* PTE page must be valid */ 688 je 4f 689 movb _PTmap(%edx),%al 690 andb $PG_V|PG_RW|PG_U,%al /* page must be valid and user writable */ 691 cmpb $PG_V|PG_RW|PG_U,%al 692 je 2f 693 6944: 695 /* simulate a trap */ 696 pushl %edx 697 pushl %ecx 698 shll $IDXSHIFT,%edx 699 pushl %edx 700 call _trapwrite /* trapwrite(addr) */ 701 popl %edx 702 popl %ecx 703 popl %edx 704 705 testl %eax,%eax /* if not ok, return EFAULT */ 706 jnz copyout_fault 707 7082: 709 addl $4,%edx 710 decl %ecx 711 jnz 1b /* check next page */ 712#endif /* I386_CPU */ 713 714 /* bcopy(%esi, %edi, %ebx) */ 7153: 716 movl %ebx,%ecx 717 718#if defined(I586_CPU) && NNPX > 0 719 ALIGN_TEXT 720slow_copyout: 721#endif 722 shrl $2,%ecx 723 cld 724 rep 725 movsl 726 movb %bl,%cl 727 andb $3,%cl 728 rep 729 movsb 730 731done_copyout: 732 popl %ebx 733 popl %edi 734 popl %esi 735 xorl %eax,%eax 736 movl _curpcb,%edx 737 movl %eax,PCB_ONFAULT(%edx) 738 ret 739 740 ALIGN_TEXT 741copyout_fault: 742 popl %ebx 743 popl %edi 744 popl %esi 745 movl _curpcb,%edx 746 movl $0,PCB_ONFAULT(%edx) 747 movl $EFAULT,%eax 748 ret 749 750#if defined(I586_CPU) && NNPX > 0 751ENTRY(i586_copyout) 752 /* 753 * Duplicated from generic_copyout. Could be done a bit better. 754 */ 755 movl _curpcb,%eax 756 movl $copyout_fault,PCB_ONFAULT(%eax) 757 pushl %esi 758 pushl %edi 759 pushl %ebx 760 movl 16(%esp),%esi 761 movl 20(%esp),%edi 762 movl 24(%esp),%ebx 763 testl %ebx,%ebx /* anything to do? */ 764 jz done_copyout 765 766 /* 767 * Check explicitly for non-user addresses. If 486 write protection 768 * is being used, this check is essential because we are in kernel 769 * mode so the h/w does not provide any protection against writing 770 * kernel addresses. 771 */ 772 773 /* 774 * First, prevent address wrapping. 775 */ 776 movl %edi,%eax 777 addl %ebx,%eax 778 jc copyout_fault 779/* 780 * XXX STOP USING VM_MAXUSER_ADDRESS. 781 * It is an end address, not a max, so every time it is used correctly it 782 * looks like there is an off by one error, and of course it caused an off 783 * by one error in several places. 784 */ 785 cmpl $VM_MAXUSER_ADDRESS,%eax 786 ja copyout_fault 787 788 /* bcopy(%esi, %edi, %ebx) */ 7893: 790 movl %ebx,%ecx 791 /* 792 * End of duplicated code. 793 */ 794 795 cmpl $1024,%ecx 796 jb slow_copyout 797 798 pushl %ecx 799 call _fastmove 800 addl $4,%esp 801 jmp done_copyout 802#endif /* I586_CPU && NNPX > 0 */ 803 804/* copyin(from_user, to_kernel, len) */ 805ENTRY(copyin) 806 MEXITCOUNT 807 jmp *_copyin_vector 808 809ENTRY(generic_copyin) 810 movl _curpcb,%eax 811 movl $copyin_fault,PCB_ONFAULT(%eax) 812 pushl %esi 813 pushl %edi 814 movl 12(%esp),%esi /* caddr_t from */ 815 movl 16(%esp),%edi /* caddr_t to */ 816 movl 20(%esp),%ecx /* size_t len */ 817 818 /* 819 * make sure address is valid 820 */ 821 movl %esi,%edx 822 addl %ecx,%edx 823 jc copyin_fault 824 cmpl $VM_MAXUSER_ADDRESS,%edx 825 ja copyin_fault 826 827#if defined(I586_CPU) && NNPX > 0 828 ALIGN_TEXT 829slow_copyin: 830#endif 831 movb %cl,%al 832 shrl $2,%ecx /* copy longword-wise */ 833 cld 834 rep 835 movsl 836 movb %al,%cl 837 andb $3,%cl /* copy remaining bytes */ 838 rep 839 movsb 840 841#if defined(I586_CPU) && NNPX > 0 842 ALIGN_TEXT 843done_copyin: 844#endif 845 popl %edi 846 popl %esi 847 xorl %eax,%eax 848 movl _curpcb,%edx 849 movl %eax,PCB_ONFAULT(%edx) 850 ret 851 852 ALIGN_TEXT 853copyin_fault: 854 popl %edi 855 popl %esi 856 movl _curpcb,%edx 857 movl $0,PCB_ONFAULT(%edx) 858 movl $EFAULT,%eax 859 ret 860 861#if defined(I586_CPU) && NNPX > 0 862ENTRY(i586_copyin) 863 /* 864 * Duplicated from generic_copyin. Could be done a bit better. 865 */ 866 movl _curpcb,%eax 867 movl $copyin_fault,PCB_ONFAULT(%eax) 868 pushl %esi 869 pushl %edi 870 movl 12(%esp),%esi /* caddr_t from */ 871 movl 16(%esp),%edi /* caddr_t to */ 872 movl 20(%esp),%ecx /* size_t len */ 873 874 /* 875 * make sure address is valid 876 */ 877 movl %esi,%edx 878 addl %ecx,%edx 879 jc copyin_fault 880 cmpl $VM_MAXUSER_ADDRESS,%edx 881 ja copyin_fault 882 /* 883 * End of duplicated code. 884 */ 885 886 cmpl $1024,%ecx 887 jb slow_copyin 888 889 pushl %ebx /* XXX prepare for fastmove_fault */ 890 pushl %ecx 891 call _fastmove 892 addl $8,%esp 893 jmp done_copyin 894#endif /* I586_CPU && NNPX > 0 */ 895 896#if defined(I586_CPU) && NNPX > 0 897/* fastmove(src, dst, len) 898 src in %esi 899 dst in %edi 900 len in %ecx XXX changed to on stack for profiling 901 uses %eax and %edx for tmp. storage 902 */ 903/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 904ENTRY(fastmove) 905 pushl %ebp 906 movl %esp,%ebp 907 subl $PCB_SAVEFPU_SIZE+3*4,%esp 908 909 movl 8(%ebp),%ecx 910 cmpl $63,%ecx 911 jbe fastmove_tail 912 913 testl $7,%esi /* check if src addr is multiple of 8 */ 914 jnz fastmove_tail 915 916 testl $7,%edi /* check if dst addr is multiple of 8 */ 917 jnz fastmove_tail 918 919/* if (npxproc != NULL) { */ 920 cmpl $0,_npxproc 921 je 6f 922/* fnsave(&curpcb->pcb_savefpu); */ 923 movl _curpcb,%eax 924 fnsave PCB_SAVEFPU(%eax) 925/* npxproc = NULL; */ 926 movl $0,_npxproc 927/* } */ 9286: 929/* now we own the FPU. */ 930 931/* 932 * The process' FP state is saved in the pcb, but if we get 933 * switched, the cpu_switch() will store our FP state in the 934 * pcb. It should be possible to avoid all the copying for 935 * this, e.g., by setting a flag to tell cpu_switch() to 936 * save the state somewhere else. 937 */ 938/* tmp = curpcb->pcb_savefpu; */ 939 movl %ecx,-12(%ebp) 940 movl %esi,-8(%ebp) 941 movl %edi,-4(%ebp) 942 movl %esp,%edi 943 movl _curpcb,%esi 944 addl $PCB_SAVEFPU,%esi 945 cld 946 movl $PCB_SAVEFPU_SIZE>>2,%ecx 947 rep 948 movsl 949 movl -12(%ebp),%ecx 950 movl -8(%ebp),%esi 951 movl -4(%ebp),%edi 952/* stop_emulating(); */ 953 clts 954/* npxproc = curproc; */ 955 movl _curproc,%eax 956 movl %eax,_npxproc 957 movl _curpcb,%eax 958 movl $fastmove_fault,PCB_ONFAULT(%eax) 9594: 960 movl %ecx,-12(%ebp) 961 cmpl $1792,%ecx 962 jbe 2f 963 movl $1792,%ecx 9642: 965 subl %ecx,-12(%ebp) 966 cmpl $256,%ecx 967 jb 5f 968 movl %ecx,-8(%ebp) 969 movl %esi,-4(%ebp) 970 ALIGN_TEXT 9713: 972 movl 0(%esi),%eax 973 movl 32(%esi),%eax 974 movl 64(%esi),%eax 975 movl 96(%esi),%eax 976 movl 128(%esi),%eax 977 movl 160(%esi),%eax 978 movl 192(%esi),%eax 979 movl 224(%esi),%eax 980 addl $256,%esi 981 subl $256,%ecx 982 cmpl $256,%ecx 983 jae 3b 984 movl -8(%ebp),%ecx 985 movl -4(%ebp),%esi 9865: 987 ALIGN_TEXT 988fastmove_loop: 989 fildq 0(%esi) 990 fildq 8(%esi) 991 fildq 16(%esi) 992 fildq 24(%esi) 993 fildq 32(%esi) 994 fildq 40(%esi) 995 fildq 48(%esi) 996 fildq 56(%esi) 997 fistpq 56(%edi) 998 fistpq 48(%edi) 999 fistpq 40(%edi) 1000 fistpq 32(%edi) 1001 fistpq 24(%edi) 1002 fistpq 16(%edi) 1003 fistpq 8(%edi) 1004 fistpq 0(%edi) 1005 addl $-64,%ecx 1006 addl $64,%esi 1007 addl $64,%edi 1008 cmpl $63,%ecx 1009 ja fastmove_loop 1010 movl -12(%ebp),%eax 1011 addl %eax,%ecx 1012 cmpl $64,%ecx 1013 jae 4b 1014 1015/* curpcb->pcb_savefpu = tmp; */ 1016 movl %ecx,-12(%ebp) 1017 movl %esi,-8(%ebp) 1018 movl %edi,-4(%ebp) 1019 movl _curpcb,%edi 1020 addl $PCB_SAVEFPU,%edi 1021 movl %esp,%esi 1022 cld 1023 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1024 rep 1025 movsl 1026 movl -12(%ebp),%ecx 1027 movl -8(%ebp),%esi 1028 movl -4(%ebp),%edi 1029 1030/* start_emulating(); */ 1031 smsw %ax 1032 orb $CR0_TS,%al 1033 lmsw %ax 1034/* npxproc = NULL; */ 1035 movl $0,_npxproc 1036 1037 ALIGN_TEXT 1038fastmove_tail: 1039 movl _curpcb,%eax 1040 movl $fastmove_tail_fault,PCB_ONFAULT(%eax) 1041 1042 movb %cl,%al 1043 shrl $2,%ecx /* copy longword-wise */ 1044 cld 1045 rep 1046 movsl 1047 movb %al,%cl 1048 andb $3,%cl /* copy remaining bytes */ 1049 rep 1050 movsb 1051 1052 movl %ebp,%esp 1053 popl %ebp 1054 ret 1055 1056 ALIGN_TEXT 1057fastmove_fault: 1058 movl _curpcb,%edi 1059 addl $PCB_SAVEFPU,%edi 1060 movl %esp,%esi 1061 cld 1062 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1063 rep 1064 movsl 1065 1066 smsw %ax 1067 orb $CR0_TS,%al 1068 lmsw %ax 1069 movl $0,_npxproc 1070 1071fastmove_tail_fault: 1072 movl %ebp,%esp 1073 popl %ebp 1074 addl $8,%esp 1075 popl %ebx 1076 popl %edi 1077 popl %esi 1078 movl _curpcb,%edx 1079 movl $0,PCB_ONFAULT(%edx) 1080 movl $EFAULT,%eax 1081 ret 1082#endif /* I586_CPU && NNPX > 0 */ 1083 1084/* 1085 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory 1086 */ 1087ENTRY(fuword) 1088 movl _curpcb,%ecx 1089 movl $fusufault,PCB_ONFAULT(%ecx) 1090 movl 4(%esp),%edx /* from */ 1091 1092 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 1093 ja fusufault 1094 1095 movl (%edx),%eax 1096 movl $0,PCB_ONFAULT(%ecx) 1097 ret 1098 1099/* 1100 * These two routines are called from the profiling code, potentially 1101 * at interrupt time. If they fail, that's okay, good things will 1102 * happen later. Fail all the time for now - until the trap code is 1103 * able to deal with this. 1104 */ 1105ALTENTRY(suswintr) 1106ENTRY(fuswintr) 1107 movl $-1,%eax 1108 ret 1109 1110ENTRY(fusword) 1111 movl _curpcb,%ecx 1112 movl $fusufault,PCB_ONFAULT(%ecx) 1113 movl 4(%esp),%edx 1114 1115 cmpl $VM_MAXUSER_ADDRESS-2,%edx 1116 ja fusufault 1117 1118 movzwl (%edx),%eax 1119 movl $0,PCB_ONFAULT(%ecx) 1120 ret 1121 1122ENTRY(fubyte) 1123 movl _curpcb,%ecx 1124 movl $fusufault,PCB_ONFAULT(%ecx) 1125 movl 4(%esp),%edx 1126 1127 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1128 ja fusufault 1129 1130 movzbl (%edx),%eax 1131 movl $0,PCB_ONFAULT(%ecx) 1132 ret 1133 1134 ALIGN_TEXT 1135fusufault: 1136 movl _curpcb,%ecx 1137 xorl %eax,%eax 1138 movl %eax,PCB_ONFAULT(%ecx) 1139 decl %eax 1140 ret 1141 1142/* 1143 * su{byte,sword,word}: write a byte (word, longword) to user memory 1144 */ 1145ENTRY(suword) 1146 movl _curpcb,%ecx 1147 movl $fusufault,PCB_ONFAULT(%ecx) 1148 movl 4(%esp),%edx 1149 1150#if defined(I386_CPU) 1151 1152#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1153 cmpl $CPUCLASS_386,_cpu_class 1154 jne 2f /* we only have to set the right segment selector */ 1155#endif /* I486_CPU || I586_CPU || I686_CPU */ 1156 1157 /* XXX - page boundary crossing is still not handled */ 1158 movl %edx,%eax 1159 shrl $IDXSHIFT,%edx 1160 andb $0xfc,%dl 1161 1162 leal _PTmap(%edx),%ecx 1163 shrl $IDXSHIFT,%ecx 1164 andb $0xfc,%cl 1165 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1166 je 4f 1167 movb _PTmap(%edx),%dl 1168 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1169 cmpb $PG_V|PG_RW|PG_U,%dl 1170 je 1f 1171 11724: 1173 /* simulate a trap */ 1174 pushl %eax 1175 call _trapwrite 1176 popl %edx /* remove junk parameter from stack */ 1177 testl %eax,%eax 1178 jnz fusufault 11791: 1180 movl 4(%esp),%edx 1181#endif 1182 11832: 1184 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1185 ja fusufault 1186 1187 movl 8(%esp),%eax 1188 movl %eax,(%edx) 1189 xorl %eax,%eax 1190 movl _curpcb,%ecx 1191 movl %eax,PCB_ONFAULT(%ecx) 1192 ret 1193 1194ENTRY(susword) 1195 movl _curpcb,%ecx 1196 movl $fusufault,PCB_ONFAULT(%ecx) 1197 movl 4(%esp),%edx 1198 1199#if defined(I386_CPU) 1200 1201#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1202 cmpl $CPUCLASS_386,_cpu_class 1203 jne 2f 1204#endif /* I486_CPU || I586_CPU || I686_CPU */ 1205 1206 /* XXX - page boundary crossing is still not handled */ 1207 movl %edx,%eax 1208 shrl $IDXSHIFT,%edx 1209 andb $0xfc,%dl 1210 1211 leal _PTmap(%edx),%ecx 1212 shrl $IDXSHIFT,%ecx 1213 andb $0xfc,%cl 1214 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1215 je 4f 1216 movb _PTmap(%edx),%dl 1217 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1218 cmpb $PG_V|PG_RW|PG_U,%dl 1219 je 1f 1220 12214: 1222 /* simulate a trap */ 1223 pushl %eax 1224 call _trapwrite 1225 popl %edx /* remove junk parameter from stack */ 1226 testl %eax,%eax 1227 jnz fusufault 12281: 1229 movl 4(%esp),%edx 1230#endif 1231 12322: 1233 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1234 ja fusufault 1235 1236 movw 8(%esp),%ax 1237 movw %ax,(%edx) 1238 xorl %eax,%eax 1239 movl _curpcb,%ecx /* restore trashed register */ 1240 movl %eax,PCB_ONFAULT(%ecx) 1241 ret 1242 1243ALTENTRY(suibyte) 1244ENTRY(subyte) 1245 movl _curpcb,%ecx 1246 movl $fusufault,PCB_ONFAULT(%ecx) 1247 movl 4(%esp),%edx 1248 1249#if defined(I386_CPU) 1250 1251#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1252 cmpl $CPUCLASS_386,_cpu_class 1253 jne 2f 1254#endif /* I486_CPU || I586_CPU || I686_CPU */ 1255 1256 movl %edx,%eax 1257 shrl $IDXSHIFT,%edx 1258 andb $0xfc,%dl 1259 1260 leal _PTmap(%edx),%ecx 1261 shrl $IDXSHIFT,%ecx 1262 andb $0xfc,%cl 1263 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1264 je 4f 1265 movb _PTmap(%edx),%dl 1266 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1267 cmpb $PG_V|PG_RW|PG_U,%dl 1268 je 1f 1269 12704: 1271 /* simulate a trap */ 1272 pushl %eax 1273 call _trapwrite 1274 popl %edx /* remove junk parameter from stack */ 1275 testl %eax,%eax 1276 jnz fusufault 12771: 1278 movl 4(%esp),%edx 1279#endif 1280 12812: 1282 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1283 ja fusufault 1284 1285 movb 8(%esp),%al 1286 movb %al,(%edx) 1287 xorl %eax,%eax 1288 movl _curpcb,%ecx /* restore trashed register */ 1289 movl %eax,PCB_ONFAULT(%ecx) 1290 ret 1291 1292/* 1293 * copyinstr(from, to, maxlen, int *lencopied) 1294 * copy a string from from to to, stop when a 0 character is reached. 1295 * return ENAMETOOLONG if string is longer than maxlen, and 1296 * EFAULT on protection violations. If lencopied is non-zero, 1297 * return the actual length in *lencopied. 1298 */ 1299ENTRY(copyinstr) 1300 pushl %esi 1301 pushl %edi 1302 movl _curpcb,%ecx 1303 movl $cpystrflt,PCB_ONFAULT(%ecx) 1304 1305 movl 12(%esp),%esi /* %esi = from */ 1306 movl 16(%esp),%edi /* %edi = to */ 1307 movl 20(%esp),%edx /* %edx = maxlen */ 1308 1309 movl $VM_MAXUSER_ADDRESS,%eax 1310 1311 /* make sure 'from' is within bounds */ 1312 subl %esi,%eax 1313 jbe cpystrflt 1314 1315 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1316 cmpl %edx,%eax 1317 jae 1f 1318 movl %eax,%edx 1319 movl %eax,20(%esp) 13201: 1321 incl %edx 1322 cld 1323 13242: 1325 decl %edx 1326 jz 3f 1327 1328 lodsb 1329 stosb 1330 orb %al,%al 1331 jnz 2b 1332 1333 /* Success -- 0 byte reached */ 1334 decl %edx 1335 xorl %eax,%eax 1336 jmp cpystrflt_x 13373: 1338 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1339 cmpl $VM_MAXUSER_ADDRESS,%esi 1340 jae cpystrflt 13414: 1342 movl $ENAMETOOLONG,%eax 1343 jmp cpystrflt_x 1344 1345cpystrflt: 1346 movl $EFAULT,%eax 1347 1348cpystrflt_x: 1349 /* set *lencopied and return %eax */ 1350 movl _curpcb,%ecx 1351 movl $0,PCB_ONFAULT(%ecx) 1352 movl 20(%esp),%ecx 1353 subl %edx,%ecx 1354 movl 24(%esp),%edx 1355 testl %edx,%edx 1356 jz 1f 1357 movl %ecx,(%edx) 13581: 1359 popl %edi 1360 popl %esi 1361 ret 1362 1363 1364/* 1365 * copystr(from, to, maxlen, int *lencopied) 1366 */ 1367ENTRY(copystr) 1368 pushl %esi 1369 pushl %edi 1370 1371 movl 12(%esp),%esi /* %esi = from */ 1372 movl 16(%esp),%edi /* %edi = to */ 1373 movl 20(%esp),%edx /* %edx = maxlen */ 1374 incl %edx 1375 cld 13761: 1377 decl %edx 1378 jz 4f 1379 lodsb 1380 stosb 1381 orb %al,%al 1382 jnz 1b 1383 1384 /* Success -- 0 byte reached */ 1385 decl %edx 1386 xorl %eax,%eax 1387 jmp 6f 13884: 1389 /* edx is zero -- return ENAMETOOLONG */ 1390 movl $ENAMETOOLONG,%eax 1391 13926: 1393 /* set *lencopied and return %eax */ 1394 movl 20(%esp),%ecx 1395 subl %edx,%ecx 1396 movl 24(%esp),%edx 1397 testl %edx,%edx 1398 jz 7f 1399 movl %ecx,(%edx) 14007: 1401 popl %edi 1402 popl %esi 1403 ret 1404 1405ENTRY(bcmp) 1406 pushl %edi 1407 pushl %esi 1408 movl 12(%esp),%edi 1409 movl 16(%esp),%esi 1410 movl 20(%esp),%edx 1411 xorl %eax,%eax 1412 1413 movl %edx,%ecx 1414 shrl $2,%ecx 1415 cld /* compare forwards */ 1416 repe 1417 cmpsl 1418 jne 1f 1419 1420 movl %edx,%ecx 1421 andl $3,%ecx 1422 repe 1423 cmpsb 1424 je 2f 14251: 1426 incl %eax 14272: 1428 popl %esi 1429 popl %edi 1430 ret 1431 1432 1433/* 1434 * Handling of special 386 registers and descriptor tables etc 1435 */ 1436/* void lgdt(struct region_descriptor *rdp); */ 1437ENTRY(lgdt) 1438 /* reload the descriptor table */ 1439 movl 4(%esp),%eax 1440 lgdt (%eax) 1441 1442 /* flush the prefetch q */ 1443 jmp 1f 1444 nop 14451: 1446 /* reload "stale" selectors */ 1447 movl $KDSEL,%eax 1448 movl %ax,%ds 1449 movl %ax,%es 1450 movl %ax,%fs 1451 movl %ax,%gs 1452 movl %ax,%ss 1453 1454 /* reload code selector by turning return into intersegmental return */ 1455 movl (%esp),%eax 1456 pushl %eax 1457 movl $KCSEL,4(%esp) 1458 lret 1459 1460/* 1461 * void lidt(struct region_descriptor *rdp); 1462 */ 1463ENTRY(lidt) 1464 movl 4(%esp),%eax 1465 lidt (%eax) 1466 ret 1467 1468/* 1469 * void lldt(u_short sel) 1470 */ 1471ENTRY(lldt) 1472 lldt 4(%esp) 1473 ret 1474 1475/* 1476 * void ltr(u_short sel) 1477 */ 1478ENTRY(ltr) 1479 ltr 4(%esp) 1480 ret 1481 1482/* ssdtosd(*ssdp,*sdp) */ 1483ENTRY(ssdtosd) 1484 pushl %ebx 1485 movl 8(%esp),%ecx 1486 movl 8(%ecx),%ebx 1487 shll $16,%ebx 1488 movl (%ecx),%edx 1489 roll $16,%edx 1490 movb %dh,%bl 1491 movb %dl,%bh 1492 rorl $8,%ebx 1493 movl 4(%ecx),%eax 1494 movw %ax,%dx 1495 andl $0xf0000,%eax 1496 orl %eax,%ebx 1497 movl 12(%esp),%ecx 1498 movl %edx,(%ecx) 1499 movl %ebx,4(%ecx) 1500 popl %ebx 1501 ret 1502 1503/* load_cr0(cr0) */ 1504ENTRY(load_cr0) 1505 movl 4(%esp),%eax 1506 movl %eax,%cr0 1507 ret 1508 1509/* rcr0() */ 1510ENTRY(rcr0) 1511 movl %cr0,%eax 1512 ret 1513 1514/* rcr3() */ 1515ENTRY(rcr3) 1516 movl %cr3,%eax 1517 ret 1518 1519/* void load_cr3(caddr_t cr3) */ 1520ENTRY(load_cr3) 1521 movl 4(%esp),%eax 1522 movl %eax,%cr3 1523 ret 1524 1525 1526/*****************************************************************************/ 1527/* setjump, longjump */ 1528/*****************************************************************************/ 1529 1530ENTRY(setjmp) 1531 movl 4(%esp),%eax 1532 movl %ebx,(%eax) /* save ebx */ 1533 movl %esp,4(%eax) /* save esp */ 1534 movl %ebp,8(%eax) /* save ebp */ 1535 movl %esi,12(%eax) /* save esi */ 1536 movl %edi,16(%eax) /* save edi */ 1537 movl (%esp),%edx /* get rta */ 1538 movl %edx,20(%eax) /* save eip */ 1539 xorl %eax,%eax /* return(0); */ 1540 ret 1541 1542ENTRY(longjmp) 1543 movl 4(%esp),%eax 1544 movl (%eax),%ebx /* restore ebx */ 1545 movl 4(%eax),%esp /* restore esp */ 1546 movl 8(%eax),%ebp /* restore ebp */ 1547 movl 12(%eax),%esi /* restore esi */ 1548 movl 16(%eax),%edi /* restore edi */ 1549 movl 20(%eax),%edx /* get rta */ 1550 movl %edx,(%esp) /* put in return frame */ 1551 xorl %eax,%eax /* return(1); */ 1552 incl %eax 1553 ret 1554 1555/* 1556 * Here for doing BB-profiling (gcc -a). 1557 * We rely on the "bbset" instead, but need a dummy function. 1558 */ 1559NON_GPROF_ENTRY(__bb_init_func) 1560 movl 4(%esp),%eax 1561 movl $1,(%eax) 1562 .byte 0xc3 /* avoid macro for `ret' */ 1563