support.s revision 19678
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $Id: support.s,v 1.43 1996/11/11 20:38:53 bde Exp $ 34 */ 35 36#include "opt_cpu.h" 37 38#include <machine/asmacros.h> 39#include <machine/cputypes.h> 40#include <machine/specialreg.h> 41 42#include "assym.s" 43 44#define KDSEL 0x10 /* kernel data selector */ 45#define IDXSHIFT 10 46 47 .data 48 .globl _bcopy_vector 49_bcopy_vector: 50 .long _generic_bcopy 51 .globl _bzero 52_bzero: 53 .long _generic_bzero 54 .globl _copyin_vector 55_copyin_vector: 56 .long _generic_copyin 57 .globl _copyout_vector 58_copyout_vector: 59 .long _generic_copyout 60 .globl _ovbcopy_vector 61_ovbcopy_vector: 62 .long _generic_bcopy 63kernel_fpu_lock: 64 .byte 0xfe 65 .space 3 66 67 .text 68 69/* 70 * bcopy family 71 * void bzero(void *buf, u_int len) 72 */ 73 74ENTRY(generic_bzero) 75 pushl %edi 76 movl 8(%esp),%edi 77 movl 12(%esp),%ecx 78 xorl %eax,%eax 79 shrl $2,%ecx 80 cld 81 rep 82 stosl 83 movl 12(%esp),%ecx 84 andl $3,%ecx 85 rep 86 stosb 87 popl %edi 88 ret 89 90#if defined(I486_CPU) 91ENTRY(i486_bzero) 92 movl 4(%esp),%edx 93 movl 8(%esp),%ecx 94 xorl %eax,%eax 95/* 96 * do 64 byte chunks first 97 * 98 * XXX this is probably over-unrolled at least for DX2's 99 */ 1002: 101 cmpl $64,%ecx 102 jb 3f 103 movl %eax,(%edx) 104 movl %eax,4(%edx) 105 movl %eax,8(%edx) 106 movl %eax,12(%edx) 107 movl %eax,16(%edx) 108 movl %eax,20(%edx) 109 movl %eax,24(%edx) 110 movl %eax,28(%edx) 111 movl %eax,32(%edx) 112 movl %eax,36(%edx) 113 movl %eax,40(%edx) 114 movl %eax,44(%edx) 115 movl %eax,48(%edx) 116 movl %eax,52(%edx) 117 movl %eax,56(%edx) 118 movl %eax,60(%edx) 119 addl $64,%edx 120 subl $64,%ecx 121 jnz 2b 122 ret 123 124/* 125 * do 16 byte chunks 126 */ 127 SUPERALIGN_TEXT 1283: 129 cmpl $16,%ecx 130 jb 4f 131 movl %eax,(%edx) 132 movl %eax,4(%edx) 133 movl %eax,8(%edx) 134 movl %eax,12(%edx) 135 addl $16,%edx 136 subl $16,%ecx 137 jnz 3b 138 ret 139 140/* 141 * do 4 byte chunks 142 */ 143 SUPERALIGN_TEXT 1444: 145 cmpl $4,%ecx 146 jb 5f 147 movl %eax,(%edx) 148 addl $4,%edx 149 subl $4,%ecx 150 jnz 4b 151 ret 152 153/* 154 * do 1 byte chunks 155 * a jump table seems to be faster than a loop or more range reductions 156 * 157 * XXX need a const section for non-text 158 */ 159 .data 160jtab: 161 .long do0 162 .long do1 163 .long do2 164 .long do3 165 166 .text 167 SUPERALIGN_TEXT 1685: 169 jmp jtab(,%ecx,4) 170 171 SUPERALIGN_TEXT 172do3: 173 movw %ax,(%edx) 174 movb %al,2(%edx) 175 ret 176 177 SUPERALIGN_TEXT 178do2: 179 movw %ax,(%edx) 180 ret 181 182 SUPERALIGN_TEXT 183do1: 184 movb %al,(%edx) 185 ret 186 187 SUPERALIGN_TEXT 188do0: 189 ret 190#endif 191 192#ifdef I586_CPU 193ENTRY(i586_bzero) 194 movl 4(%esp),%edx 195 movl 8(%esp),%ecx 196 197 /* 198 * The FPU register method is twice as fast as the integer register 199 * method unless the target is in the L1 cache and we pre-allocate a 200 * cache line for it (then the integer register method is 4-5 times 201 * faster). However, we never pre-allocate cache lines, since that 202 * would make the integer method 25% or more slower for the common 203 * case when the target isn't in either the L1 cache or the L2 cache. 204 * Thus we normally use the FPU register method unless the overhead 205 * would be too large. 206 */ 207 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 208 jb intreg_i586_bzero 209 210 /* 211 * The FPU registers may belong to an application or to fastmove() 212 * or to another invocation of bcopy() or ourself in a higher level 213 * interrupt or trap handler. Preserving the registers is 214 * complicated since we avoid it if possible at all levels. We 215 * want to localize the complications even when that increases them. 216 * Here the extra work involves preserving CR0_TS in TS. 217 * `npxproc != NULL' is supposed to be the condition that all the 218 * FPU resources belong to an application, but npxproc and CR0_TS 219 * aren't set atomically enough for this condition to work in 220 * interrupt handlers. 221 * 222 * Case 1: FPU registers belong to the application: we must preserve 223 * the registers if we use them, so we only use the FPU register 224 * method if the target size is large enough to amortize the extra 225 * overhead for preserving them. CR0_TS must be preserved although 226 * it is very likely to end up as set. 227 * 228 * Case 2: FPU registers belong to fastmove(): fastmove() currently 229 * makes the registers look like they belong to an application so 230 * that cpu_switch() and savectx() don't have to know about it, so 231 * this case reduces to case 1. 232 * 233 * Case 3: FPU registers belong to the kernel: don't use the FPU 234 * register method. This case is unlikely, and supporting it would 235 * be more complicated and might take too much stack. 236 * 237 * Case 4: FPU registers don't belong to anyone: the FPU registers 238 * don't need to be preserved, so we always use the FPU register 239 * method. CR0_TS must be preserved although it is very likely to 240 * always end up as clear. 241 */ 242 cmpl $0,_npxproc 243 je i586_bz1 244 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 245 jb intreg_i586_bzero 246 sarb $1,kernel_fpu_lock 247 jc intreg_i586_bzero 248 smsw %ax 249 clts 250 subl $108,%esp 251 fnsave 0(%esp) 252 jmp i586_bz2 253 254i586_bz1: 255 sarb $1,kernel_fpu_lock 256 jc intreg_i586_bzero 257 smsw %ax 258 clts 259 fninit /* XXX should avoid needing this */ 260i586_bz2: 261 fldz 262 263 /* 264 * Align to an 8 byte boundary (misalignment in the main loop would 265 * cost a factor of >= 2). Avoid jumps (at little cost if it is 266 * already aligned) by always zeroing 8 bytes and using the part up 267 * to the _next_ alignment position. 268 */ 269 fstl 0(%edx) 270 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 271 addl $8,%edx 272 andl $~7,%edx 273 subl %edx,%ecx 274 275 /* 276 * Similarly align `len' to a multiple of 8. 277 */ 278 fstl -8(%edx,%ecx) 279 decl %ecx 280 andl $~7,%ecx 281 282 /* 283 * This wouldn't be any faster if it were unrolled, since the loop 284 * control instructions are much faster than the fstl and/or done 285 * in parallel with it so their overhead is insignificant. 286 */ 287fpureg_i586_bzero_loop: 288 fstl 0(%edx) 289 addl $8,%edx 290 subl $8,%ecx 291 cmpl $8,%ecx 292 jae fpureg_i586_bzero_loop 293 294 cmpl $0,_npxproc 295 je i586_bz3 296 frstor 0(%esp) 297 addl $108,%esp 298 lmsw %ax 299 movb $0xfe,kernel_fpu_lock 300 ret 301 302i586_bz3: 303 fstpl %st(0) 304 lmsw %ax 305 movb $0xfe,kernel_fpu_lock 306 ret 307 308intreg_i586_bzero: 309 /* 310 * `rep stos' seems to be the best method in practice for small 311 * counts. Fancy methods usually take too long to start up due 312 * to cache and BTB misses. 313 */ 314 pushl %edi 315 movl %edx,%edi 316 xorl %eax,%eax 317 shrl $2,%ecx 318 cld 319 rep 320 stosl 321 movl 12(%esp),%ecx 322 andl $3,%ecx 323 jne 1f 324 popl %edi 325 ret 326 3271: 328 rep 329 stosb 330 popl %edi 331 ret 332#endif /* I586_CPU */ 333 334/* fillw(pat, base, cnt) */ 335ENTRY(fillw) 336 pushl %edi 337 movl 8(%esp),%eax 338 movl 12(%esp),%edi 339 movl 16(%esp),%ecx 340 cld 341 rep 342 stosw 343 popl %edi 344 ret 345 346ENTRY(bcopyb) 347bcopyb: 348 pushl %esi 349 pushl %edi 350 movl 12(%esp),%esi 351 movl 16(%esp),%edi 352 movl 20(%esp),%ecx 353 movl %edi,%eax 354 subl %esi,%eax 355 cmpl %ecx,%eax /* overlapping && src < dst? */ 356 jb 1f 357 cld /* nope, copy forwards */ 358 rep 359 movsb 360 popl %edi 361 popl %esi 362 ret 363 364 ALIGN_TEXT 3651: 366 addl %ecx,%edi /* copy backwards. */ 367 addl %ecx,%esi 368 decl %edi 369 decl %esi 370 std 371 rep 372 movsb 373 popl %edi 374 popl %esi 375 cld 376 ret 377 378ENTRY(bcopy) 379 MEXITCOUNT 380 jmp *_bcopy_vector 381 382ENTRY(ovbcopy) 383 MEXITCOUNT 384 jmp *_ovbcopy_vector 385 386/* 387 * generic_bcopy(src, dst, cnt) 388 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 389 */ 390ENTRY(generic_bcopy) 391 pushl %esi 392 pushl %edi 393 movl 12(%esp),%esi 394 movl 16(%esp),%edi 395 movl 20(%esp),%ecx 396 397 movl %edi,%eax 398 subl %esi,%eax 399 cmpl %ecx,%eax /* overlapping && src < dst? */ 400 jb 1f 401 402 shrl $2,%ecx /* copy by 32-bit words */ 403 cld /* nope, copy forwards */ 404 rep 405 movsl 406 movl 20(%esp),%ecx 407 andl $3,%ecx /* any bytes left? */ 408 rep 409 movsb 410 popl %edi 411 popl %esi 412 ret 413 414 ALIGN_TEXT 4151: 416 addl %ecx,%edi /* copy backwards */ 417 addl %ecx,%esi 418 decl %edi 419 decl %esi 420 andl $3,%ecx /* any fractional bytes? */ 421 std 422 rep 423 movsb 424 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 425 shrl $2,%ecx 426 subl $3,%esi 427 subl $3,%edi 428 rep 429 movsl 430 popl %edi 431 popl %esi 432 cld 433 ret 434 435#ifdef I586_CPU 436ENTRY(i586_bcopy) 437 pushl %esi 438 pushl %edi 439 movl 12(%esp),%esi 440 movl 16(%esp),%edi 441 movl 20(%esp),%ecx 442 443 movl %edi,%eax 444 subl %esi,%eax 445 cmpl %ecx,%eax /* overlapping && src < dst? */ 446 jb 1f 447 448 cmpl $1024,%ecx 449 jb small_i586_bcopy 450 451 sarb $1,kernel_fpu_lock 452 jc small_i586_bcopy 453 cmpl $0,_npxproc 454 je i586_bc1 455 smsw %dx 456 clts 457 subl $108,%esp 458 fnsave 0(%esp) 459 jmp 4f 460 461i586_bc1: 462 smsw %dx 463 clts 464 fninit /* XXX should avoid needing this */ 465 466 ALIGN_TEXT 4674: 468 pushl %ecx 469#define DCACHE_SIZE 8192 470 cmpl $(DCACHE_SIZE-512)/2,%ecx 471 jbe 2f 472 movl $(DCACHE_SIZE-512)/2,%ecx 4732: 474 subl %ecx,0(%esp) 475 cmpl $256,%ecx 476 jb 5f /* XXX should prefetch if %ecx >= 32 */ 477 pushl %esi 478 pushl %ecx 479 ALIGN_TEXT 4803: 481 movl 0(%esi),%eax 482 movl 32(%esi),%eax 483 movl 64(%esi),%eax 484 movl 96(%esi),%eax 485 movl 128(%esi),%eax 486 movl 160(%esi),%eax 487 movl 192(%esi),%eax 488 movl 224(%esi),%eax 489 addl $256,%esi 490 subl $256,%ecx 491 cmpl $256,%ecx 492 jae 3b 493 popl %ecx 494 popl %esi 4955: 496 ALIGN_TEXT 497large_i586_bcopy_loop: 498 fildq 0(%esi) 499 fildq 8(%esi) 500 fildq 16(%esi) 501 fildq 24(%esi) 502 fildq 32(%esi) 503 fildq 40(%esi) 504 fildq 48(%esi) 505 fildq 56(%esi) 506 fistpq 56(%edi) 507 fistpq 48(%edi) 508 fistpq 40(%edi) 509 fistpq 32(%edi) 510 fistpq 24(%edi) 511 fistpq 16(%edi) 512 fistpq 8(%edi) 513 fistpq 0(%edi) 514 addl $64,%esi 515 addl $64,%edi 516 subl $64,%ecx 517 cmpl $64,%ecx 518 jae large_i586_bcopy_loop 519 popl %eax 520 addl %eax,%ecx 521 cmpl $64,%ecx 522 jae 4b 523 524 cmpl $0,_npxproc 525 je i586_bc2 526 frstor 0(%esp) 527 addl $108,%esp 528i586_bc2: 529 lmsw %dx 530 movb $0xfe,kernel_fpu_lock 531 532/* 533 * This is a duplicate of the main part of generic_bcopy. See the comments 534 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 535 * would mess up high resolution profiling. 536 */ 537 ALIGN_TEXT 538small_i586_bcopy: 539 shrl $2,%ecx 540 cld 541 rep 542 movsl 543 movl 20(%esp),%ecx 544 andl $3,%ecx 545 rep 546 movsb 547 popl %edi 548 popl %esi 549 ret 550 551 ALIGN_TEXT 5521: 553 addl %ecx,%edi 554 addl %ecx,%esi 555 decl %edi 556 decl %esi 557 andl $3,%ecx 558 std 559 rep 560 movsb 561 movl 20(%esp),%ecx 562 shrl $2,%ecx 563 subl $3,%esi 564 subl $3,%edi 565 rep 566 movsl 567 popl %edi 568 popl %esi 569 cld 570 ret 571#endif /* I586_CPU */ 572 573/* 574 * Note: memcpy does not support overlapping copies 575 */ 576ENTRY(memcpy) 577 pushl %edi 578 pushl %esi 579 movl 12(%esp),%edi 580 movl 16(%esp),%esi 581 movl 20(%esp),%ecx 582 movl %edi,%eax 583 shrl $2,%ecx /* copy by 32-bit words */ 584 cld /* nope, copy forwards */ 585 rep 586 movsl 587 movl 20(%esp),%ecx 588 andl $3,%ecx /* any bytes left? */ 589 rep 590 movsb 591 popl %esi 592 popl %edi 593 ret 594 595 596/*****************************************************************************/ 597/* copyout and fubyte family */ 598/*****************************************************************************/ 599/* 600 * Access user memory from inside the kernel. These routines and possibly 601 * the math- and DOS emulators should be the only places that do this. 602 * 603 * We have to access the memory with user's permissions, so use a segment 604 * selector with RPL 3. For writes to user space we have to additionally 605 * check the PTE for write permission, because the 386 does not check 606 * write permissions when we are executing with EPL 0. The 486 does check 607 * this if the WP bit is set in CR0, so we can use a simpler version here. 608 * 609 * These routines set curpcb->onfault for the time they execute. When a 610 * protection violation occurs inside the functions, the trap handler 611 * returns to *curpcb->onfault instead of the function. 612 */ 613 614/* copyout(from_kernel, to_user, len) */ 615ENTRY(copyout) 616 MEXITCOUNT 617 jmp *_copyout_vector 618 619ENTRY(generic_copyout) 620 movl _curpcb,%eax 621 movl $copyout_fault,PCB_ONFAULT(%eax) 622 pushl %esi 623 pushl %edi 624 pushl %ebx 625 movl 16(%esp),%esi 626 movl 20(%esp),%edi 627 movl 24(%esp),%ebx 628 testl %ebx,%ebx /* anything to do? */ 629 jz done_copyout 630 631 /* 632 * Check explicitly for non-user addresses. If 486 write protection 633 * is being used, this check is essential because we are in kernel 634 * mode so the h/w does not provide any protection against writing 635 * kernel addresses. 636 */ 637 638 /* 639 * First, prevent address wrapping. 640 */ 641 movl %edi,%eax 642 addl %ebx,%eax 643 jc copyout_fault 644/* 645 * XXX STOP USING VM_MAXUSER_ADDRESS. 646 * It is an end address, not a max, so every time it is used correctly it 647 * looks like there is an off by one error, and of course it caused an off 648 * by one error in several places. 649 */ 650 cmpl $VM_MAXUSER_ADDRESS,%eax 651 ja copyout_fault 652 653#if defined(I386_CPU) 654 655#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 656 cmpl $CPUCLASS_386,_cpu_class 657 jne 3f 658#endif 659/* 660 * We have to check each PTE for user write permission. 661 * The checking may cause a page fault, so it is important to set 662 * up everything for return via copyout_fault before here. 663 */ 664 /* compute number of pages */ 665 movl %edi,%ecx 666 andl $PAGE_MASK,%ecx 667 addl %ebx,%ecx 668 decl %ecx 669 shrl $IDXSHIFT+2,%ecx 670 incl %ecx 671 672 /* compute PTE offset for start address */ 673 movl %edi,%edx 674 shrl $IDXSHIFT,%edx 675 andb $0xfc,%dl 676 6771: /* check PTE for each page */ 678 movb _PTmap(%edx),%al 679 andb $0x07,%al /* Pages must be VALID + USERACC + WRITABLE */ 680 cmpb $0x07,%al 681 je 2f 682 683 /* simulate a trap */ 684 pushl %edx 685 pushl %ecx 686 shll $IDXSHIFT,%edx 687 pushl %edx 688 call _trapwrite /* trapwrite(addr) */ 689 popl %edx 690 popl %ecx 691 popl %edx 692 693 testl %eax,%eax /* if not ok, return EFAULT */ 694 jnz copyout_fault 695 6962: 697 addl $4,%edx 698 decl %ecx 699 jnz 1b /* check next page */ 700#endif /* I386_CPU */ 701 702 /* bcopy(%esi, %edi, %ebx) */ 7033: 704 movl %ebx,%ecx 705 706#ifdef I586_CPU 707 ALIGN_TEXT 708slow_copyout: 709#endif 710 shrl $2,%ecx 711 cld 712 rep 713 movsl 714 movb %bl,%cl 715 andb $3,%cl 716 rep 717 movsb 718 719done_copyout: 720 popl %ebx 721 popl %edi 722 popl %esi 723 xorl %eax,%eax 724 movl _curpcb,%edx 725 movl %eax,PCB_ONFAULT(%edx) 726 ret 727 728 ALIGN_TEXT 729copyout_fault: 730 popl %ebx 731 popl %edi 732 popl %esi 733 movl _curpcb,%edx 734 movl $0,PCB_ONFAULT(%edx) 735 movl $EFAULT,%eax 736 ret 737 738#ifdef I586_CPU 739ENTRY(i586_copyout) 740 /* 741 * Duplicated from generic_copyout. Could be done a bit better. 742 */ 743 movl _curpcb,%eax 744 movl $copyout_fault,PCB_ONFAULT(%eax) 745 pushl %esi 746 pushl %edi 747 pushl %ebx 748 movl 16(%esp),%esi 749 movl 20(%esp),%edi 750 movl 24(%esp),%ebx 751 testl %ebx,%ebx /* anything to do? */ 752 jz done_copyout 753 754 /* 755 * Check explicitly for non-user addresses. If 486 write protection 756 * is being used, this check is essential because we are in kernel 757 * mode so the h/w does not provide any protection against writing 758 * kernel addresses. 759 */ 760 761 /* 762 * First, prevent address wrapping. 763 */ 764 movl %edi,%eax 765 addl %ebx,%eax 766 jc copyout_fault 767/* 768 * XXX STOP USING VM_MAXUSER_ADDRESS. 769 * It is an end address, not a max, so every time it is used correctly it 770 * looks like there is an off by one error, and of course it caused an off 771 * by one error in several places. 772 */ 773 cmpl $VM_MAXUSER_ADDRESS,%eax 774 ja copyout_fault 775 776 /* bcopy(%esi, %edi, %ebx) */ 7773: 778 movl %ebx,%ecx 779 /* 780 * End of duplicated code. 781 */ 782 783 cmpl $1024,%ecx 784 jb slow_copyout 785 786 pushl %ecx 787 call _fastmove 788 addl $4,%esp 789 jmp done_copyout 790#endif /* I586_CPU */ 791 792/* copyin(from_user, to_kernel, len) */ 793ENTRY(copyin) 794 MEXITCOUNT 795 jmp *_copyin_vector 796 797ENTRY(generic_copyin) 798 movl _curpcb,%eax 799 movl $copyin_fault,PCB_ONFAULT(%eax) 800 pushl %esi 801 pushl %edi 802 movl 12(%esp),%esi /* caddr_t from */ 803 movl 16(%esp),%edi /* caddr_t to */ 804 movl 20(%esp),%ecx /* size_t len */ 805 806 /* 807 * make sure address is valid 808 */ 809 movl %esi,%edx 810 addl %ecx,%edx 811 jc copyin_fault 812 cmpl $VM_MAXUSER_ADDRESS,%edx 813 ja copyin_fault 814 815#ifdef I586_CPU 816 ALIGN_TEXT 817slow_copyin: 818#endif 819 movb %cl,%al 820 shrl $2,%ecx /* copy longword-wise */ 821 cld 822 rep 823 movsl 824 movb %al,%cl 825 andb $3,%cl /* copy remaining bytes */ 826 rep 827 movsb 828 829#if defined(I586_CPU) 830 ALIGN_TEXT 831done_copyin: 832#endif /* I586_CPU */ 833 popl %edi 834 popl %esi 835 xorl %eax,%eax 836 movl _curpcb,%edx 837 movl %eax,PCB_ONFAULT(%edx) 838 ret 839 840 ALIGN_TEXT 841copyin_fault: 842 popl %edi 843 popl %esi 844 movl _curpcb,%edx 845 movl $0,PCB_ONFAULT(%edx) 846 movl $EFAULT,%eax 847 ret 848 849#ifdef I586_CPU 850ENTRY(i586_copyin) 851 /* 852 * Duplicated from generic_copyin. Could be done a bit better. 853 */ 854 movl _curpcb,%eax 855 movl $copyin_fault,PCB_ONFAULT(%eax) 856 pushl %esi 857 pushl %edi 858 movl 12(%esp),%esi /* caddr_t from */ 859 movl 16(%esp),%edi /* caddr_t to */ 860 movl 20(%esp),%ecx /* size_t len */ 861 862 /* 863 * make sure address is valid 864 */ 865 movl %esi,%edx 866 addl %ecx,%edx 867 jc copyin_fault 868 cmpl $VM_MAXUSER_ADDRESS,%edx 869 ja copyin_fault 870 /* 871 * End of duplicated code. 872 */ 873 874 cmpl $1024,%ecx 875 jb slow_copyin 876 877 pushl %ecx 878 call _fastmove 879 addl $4,%esp 880 jmp done_copyin 881#endif /* I586_CPU */ 882 883#if defined(I586_CPU) 884/* fastmove(src, dst, len) 885 src in %esi 886 dst in %edi 887 len in %ecx XXX changed to on stack for profiling 888 uses %eax and %edx for tmp. storage 889 */ 890/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 891ENTRY(fastmove) 892 movl 4(%esp),%ecx 893 cmpl $63,%ecx 894 jbe fastmove_tail 895 896 testl $7,%esi /* check if src addr is multiple of 8 */ 897 jnz fastmove_tail 898 899 testl $7,%edi /* check if dst addr is multiple of 8 */ 900 jnz fastmove_tail 901 902 pushl %ebp 903 movl %esp,%ebp 904 subl $PCB_SAVEFPU_SIZE,%esp 905 906/* if (npxproc != NULL) { */ 907 cmpl $0,_npxproc 908 je 6f 909/* fnsave(&curpcb->pcb_savefpu); */ 910 movl _curpcb,%eax 911 fnsave PCB_SAVEFPU(%eax) 912/* npxproc = NULL; */ 913 movl $0,_npxproc 914/* } */ 9156: 916/* now we own the FPU. */ 917 918/* 919 * The process' FP state is saved in the pcb, but if we get 920 * switched, the cpu_switch() will store our FP state in the 921 * pcb. It should be possible to avoid all the copying for 922 * this, e.g., by setting a flag to tell cpu_switch() to 923 * save the state somewhere else. 924 */ 925/* tmp = curpcb->pcb_savefpu; */ 926 pushl %edi 927 pushl %esi 928 pushl %ecx 929 leal -PCB_SAVEFPU_SIZE(%ebp),%edi 930 movl _curpcb,%esi 931 addl $PCB_SAVEFPU,%esi 932 cld 933 movl $PCB_SAVEFPU_SIZE>>2,%ecx 934 rep 935 movsl 936 popl %ecx 937 popl %esi 938 popl %edi 939/* stop_emulating(); */ 940 clts 941/* npxproc = curproc; */ 942 movl _curproc,%eax 943 movl %eax,_npxproc 9444: 945 pushl %ecx 946 cmpl $1792,%ecx 947 jbe 2f 948 movl $1792,%ecx 9492: 950 subl %ecx,0(%esp) 951 cmpl $256,%ecx 952 jb 5f 953 pushl %esi 954 pushl %ecx 955 ALIGN_TEXT 9563: 957 movl 0(%esi),%eax 958 movl 32(%esi),%eax 959 movl 64(%esi),%eax 960 movl 96(%esi),%eax 961 movl 128(%esi),%eax 962 movl 160(%esi),%eax 963 movl 192(%esi),%eax 964 movl 224(%esi),%eax 965 addl $256,%esi 966 subl $256,%ecx 967 cmpl $256,%ecx 968 jae 3b 969 popl %ecx 970 popl %esi 9715: 972 ALIGN_TEXT 973fastmove_loop: 974 fildq 0(%esi) 975 fildq 8(%esi) 976 fildq 16(%esi) 977 fildq 24(%esi) 978 fildq 32(%esi) 979 fildq 40(%esi) 980 fildq 48(%esi) 981 fildq 56(%esi) 982 fistpq 56(%edi) 983 fistpq 48(%edi) 984 fistpq 40(%edi) 985 fistpq 32(%edi) 986 fistpq 24(%edi) 987 fistpq 16(%edi) 988 fistpq 8(%edi) 989 fistpq 0(%edi) 990 addl $-64,%ecx 991 addl $64,%esi 992 addl $64,%edi 993 cmpl $63,%ecx 994 ja fastmove_loop 995 popl %eax 996 addl %eax,%ecx 997 cmpl $64,%ecx 998 jae 4b 999 1000/* curpcb->pcb_savefpu = tmp; */ 1001 pushl %edi 1002 pushl %esi 1003 pushl %ecx 1004 movl _curpcb,%edi 1005 addl $PCB_SAVEFPU,%edi 1006 leal -PCB_SAVEFPU_SIZE(%ebp),%esi 1007 cld 1008 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1009 rep 1010 movsl 1011 popl %ecx 1012 popl %esi 1013 popl %edi 1014 1015/* start_emulating(); */ 1016 smsw %ax 1017 orb $CR0_TS,%al 1018 lmsw %ax 1019/* npxproc = NULL; */ 1020 movl $0,_npxproc 1021 movl %ebp,%esp 1022 popl %ebp 1023 1024 ALIGN_TEXT 1025fastmove_tail: 1026 movb %cl,%al 1027 shrl $2,%ecx /* copy longword-wise */ 1028 cld 1029 rep 1030 movsl 1031 movb %al,%cl 1032 andb $3,%cl /* copy remaining bytes */ 1033 rep 1034 movsb 1035 1036 ret 1037#endif /* I586_CPU */ 1038 1039/* 1040 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory 1041 */ 1042ENTRY(fuword) 1043 movl _curpcb,%ecx 1044 movl $fusufault,PCB_ONFAULT(%ecx) 1045 movl 4(%esp),%edx /* from */ 1046 1047 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 1048 ja fusufault 1049 1050 movl (%edx),%eax 1051 movl $0,PCB_ONFAULT(%ecx) 1052 ret 1053 1054/* 1055 * These two routines are called from the profiling code, potentially 1056 * at interrupt time. If they fail, that's okay, good things will 1057 * happen later. Fail all the time for now - until the trap code is 1058 * able to deal with this. 1059 */ 1060ALTENTRY(suswintr) 1061ENTRY(fuswintr) 1062 movl $-1,%eax 1063 ret 1064 1065ENTRY(fusword) 1066 movl _curpcb,%ecx 1067 movl $fusufault,PCB_ONFAULT(%ecx) 1068 movl 4(%esp),%edx 1069 1070 cmpl $VM_MAXUSER_ADDRESS-2,%edx 1071 ja fusufault 1072 1073 movzwl (%edx),%eax 1074 movl $0,PCB_ONFAULT(%ecx) 1075 ret 1076 1077ENTRY(fubyte) 1078 movl _curpcb,%ecx 1079 movl $fusufault,PCB_ONFAULT(%ecx) 1080 movl 4(%esp),%edx 1081 1082 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1083 ja fusufault 1084 1085 movzbl (%edx),%eax 1086 movl $0,PCB_ONFAULT(%ecx) 1087 ret 1088 1089 ALIGN_TEXT 1090fusufault: 1091 movl _curpcb,%ecx 1092 xorl %eax,%eax 1093 movl %eax,PCB_ONFAULT(%ecx) 1094 decl %eax 1095 ret 1096 1097/* 1098 * su{byte,sword,word}: write a byte (word, longword) to user memory 1099 */ 1100ENTRY(suword) 1101 movl _curpcb,%ecx 1102 movl $fusufault,PCB_ONFAULT(%ecx) 1103 movl 4(%esp),%edx 1104 1105#if defined(I386_CPU) 1106 1107#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1108 cmpl $CPUCLASS_386,_cpu_class 1109 jne 2f /* we only have to set the right segment selector */ 1110#endif /* I486_CPU || I586_CPU || I686_CPU */ 1111 1112 /* XXX - page boundary crossing is still not handled */ 1113 movl %edx,%eax 1114 shrl $IDXSHIFT,%edx 1115 andb $0xfc,%dl 1116 movb _PTmap(%edx),%dl 1117 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1118 cmpb $0x7,%dl 1119 je 1f 1120 1121 /* simulate a trap */ 1122 pushl %eax 1123 call _trapwrite 1124 popl %edx /* remove junk parameter from stack */ 1125 movl _curpcb,%ecx /* restore trashed register */ 1126 testl %eax,%eax 1127 jnz fusufault 11281: 1129 movl 4(%esp),%edx 1130#endif 1131 11322: 1133 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1134 ja fusufault 1135 1136 movl 8(%esp),%eax 1137 movl %eax,(%edx) 1138 xorl %eax,%eax 1139 movl %eax,PCB_ONFAULT(%ecx) 1140 ret 1141 1142ENTRY(susword) 1143 movl _curpcb,%ecx 1144 movl $fusufault,PCB_ONFAULT(%ecx) 1145 movl 4(%esp),%edx 1146 1147#if defined(I386_CPU) 1148 1149#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1150 cmpl $CPUCLASS_386,_cpu_class 1151 jne 2f 1152#endif /* I486_CPU || I586_CPU || I686_CPU */ 1153 1154 /* XXX - page boundary crossing is still not handled */ 1155 movl %edx,%eax 1156 shrl $IDXSHIFT,%edx 1157 andb $0xfc,%dl 1158 movb _PTmap(%edx),%dl 1159 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1160 cmpb $0x7,%dl 1161 je 1f 1162 1163 /* simulate a trap */ 1164 pushl %eax 1165 call _trapwrite 1166 popl %edx /* remove junk parameter from stack */ 1167 movl _curpcb,%ecx /* restore trashed register */ 1168 testl %eax,%eax 1169 jnz fusufault 11701: 1171 movl 4(%esp),%edx 1172#endif 1173 11742: 1175 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1176 ja fusufault 1177 1178 movw 8(%esp),%ax 1179 movw %ax,(%edx) 1180 xorl %eax,%eax 1181 movl %eax,PCB_ONFAULT(%ecx) 1182 ret 1183 1184ALTENTRY(suibyte) 1185ENTRY(subyte) 1186 movl _curpcb,%ecx 1187 movl $fusufault,PCB_ONFAULT(%ecx) 1188 movl 4(%esp),%edx 1189 1190#if defined(I386_CPU) 1191 1192#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1193 cmpl $CPUCLASS_386,_cpu_class 1194 jne 2f 1195#endif /* I486_CPU || I586_CPU || I686_CPU */ 1196 1197 movl %edx,%eax 1198 shrl $IDXSHIFT,%edx 1199 andb $0xfc,%dl 1200 movb _PTmap(%edx),%dl 1201 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1202 cmpb $0x7,%dl 1203 je 1f 1204 1205 /* simulate a trap */ 1206 pushl %eax 1207 call _trapwrite 1208 popl %edx /* remove junk parameter from stack */ 1209 movl _curpcb,%ecx /* restore trashed register */ 1210 testl %eax,%eax 1211 jnz fusufault 12121: 1213 movl 4(%esp),%edx 1214#endif 1215 12162: 1217 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1218 ja fusufault 1219 1220 movb 8(%esp),%al 1221 movb %al,(%edx) 1222 xorl %eax,%eax 1223 movl %eax,PCB_ONFAULT(%ecx) 1224 ret 1225 1226/* 1227 * copyinstr(from, to, maxlen, int *lencopied) 1228 * copy a string from from to to, stop when a 0 character is reached. 1229 * return ENAMETOOLONG if string is longer than maxlen, and 1230 * EFAULT on protection violations. If lencopied is non-zero, 1231 * return the actual length in *lencopied. 1232 */ 1233ENTRY(copyinstr) 1234 pushl %esi 1235 pushl %edi 1236 movl _curpcb,%ecx 1237 movl $cpystrflt,PCB_ONFAULT(%ecx) 1238 1239 movl 12(%esp),%esi /* %esi = from */ 1240 movl 16(%esp),%edi /* %edi = to */ 1241 movl 20(%esp),%edx /* %edx = maxlen */ 1242 1243 movl $VM_MAXUSER_ADDRESS,%eax 1244 1245 /* make sure 'from' is within bounds */ 1246 subl %esi,%eax 1247 jbe cpystrflt 1248 1249 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1250 cmpl %edx,%eax 1251 jae 1f 1252 movl %eax,%edx 1253 movl %eax,20(%esp) 12541: 1255 incl %edx 1256 cld 1257 12582: 1259 decl %edx 1260 jz 3f 1261 1262 lodsb 1263 stosb 1264 orb %al,%al 1265 jnz 2b 1266 1267 /* Success -- 0 byte reached */ 1268 decl %edx 1269 xorl %eax,%eax 1270 jmp cpystrflt_x 12713: 1272 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1273 cmpl $VM_MAXUSER_ADDRESS,%esi 1274 jae cpystrflt 12754: 1276 movl $ENAMETOOLONG,%eax 1277 jmp cpystrflt_x 1278 1279cpystrflt: 1280 movl $EFAULT,%eax 1281 1282cpystrflt_x: 1283 /* set *lencopied and return %eax */ 1284 movl _curpcb,%ecx 1285 movl $0,PCB_ONFAULT(%ecx) 1286 movl 20(%esp),%ecx 1287 subl %edx,%ecx 1288 movl 24(%esp),%edx 1289 testl %edx,%edx 1290 jz 1f 1291 movl %ecx,(%edx) 12921: 1293 popl %edi 1294 popl %esi 1295 ret 1296 1297 1298/* 1299 * copystr(from, to, maxlen, int *lencopied) 1300 */ 1301ENTRY(copystr) 1302 pushl %esi 1303 pushl %edi 1304 1305 movl 12(%esp),%esi /* %esi = from */ 1306 movl 16(%esp),%edi /* %edi = to */ 1307 movl 20(%esp),%edx /* %edx = maxlen */ 1308 incl %edx 1309 cld 13101: 1311 decl %edx 1312 jz 4f 1313 lodsb 1314 stosb 1315 orb %al,%al 1316 jnz 1b 1317 1318 /* Success -- 0 byte reached */ 1319 decl %edx 1320 xorl %eax,%eax 1321 jmp 6f 13224: 1323 /* edx is zero -- return ENAMETOOLONG */ 1324 movl $ENAMETOOLONG,%eax 1325 13266: 1327 /* set *lencopied and return %eax */ 1328 movl 20(%esp),%ecx 1329 subl %edx,%ecx 1330 movl 24(%esp),%edx 1331 testl %edx,%edx 1332 jz 7f 1333 movl %ecx,(%edx) 13347: 1335 popl %edi 1336 popl %esi 1337 ret 1338 1339ENTRY(bcmp) 1340 pushl %edi 1341 pushl %esi 1342 movl 12(%esp),%edi 1343 movl 16(%esp),%esi 1344 movl 20(%esp),%edx 1345 xorl %eax,%eax 1346 1347 movl %edx,%ecx 1348 shrl $2,%ecx 1349 cld /* compare forwards */ 1350 repe 1351 cmpsl 1352 jne 1f 1353 1354 movl %edx,%ecx 1355 andl $3,%ecx 1356 repe 1357 cmpsb 1358 je 2f 13591: 1360 incl %eax 13612: 1362 popl %esi 1363 popl %edi 1364 ret 1365 1366 1367/* 1368 * Handling of special 386 registers and descriptor tables etc 1369 */ 1370/* void lgdt(struct region_descriptor *rdp); */ 1371ENTRY(lgdt) 1372 /* reload the descriptor table */ 1373 movl 4(%esp),%eax 1374 lgdt (%eax) 1375 1376 /* flush the prefetch q */ 1377 jmp 1f 1378 nop 13791: 1380 /* reload "stale" selectors */ 1381 movl $KDSEL,%eax 1382 movl %ax,%ds 1383 movl %ax,%es 1384 movl %ax,%ss 1385 1386 /* reload code selector by turning return into intersegmental return */ 1387 movl (%esp),%eax 1388 pushl %eax 1389# movl $KCSEL,4(%esp) 1390 movl $8,4(%esp) 1391 lret 1392 1393/* 1394 * void lidt(struct region_descriptor *rdp); 1395 */ 1396ENTRY(lidt) 1397 movl 4(%esp),%eax 1398 lidt (%eax) 1399 ret 1400 1401/* 1402 * void lldt(u_short sel) 1403 */ 1404ENTRY(lldt) 1405 lldt 4(%esp) 1406 ret 1407 1408/* 1409 * void ltr(u_short sel) 1410 */ 1411ENTRY(ltr) 1412 ltr 4(%esp) 1413 ret 1414 1415/* ssdtosd(*ssdp,*sdp) */ 1416ENTRY(ssdtosd) 1417 pushl %ebx 1418 movl 8(%esp),%ecx 1419 movl 8(%ecx),%ebx 1420 shll $16,%ebx 1421 movl (%ecx),%edx 1422 roll $16,%edx 1423 movb %dh,%bl 1424 movb %dl,%bh 1425 rorl $8,%ebx 1426 movl 4(%ecx),%eax 1427 movw %ax,%dx 1428 andl $0xf0000,%eax 1429 orl %eax,%ebx 1430 movl 12(%esp),%ecx 1431 movl %edx,(%ecx) 1432 movl %ebx,4(%ecx) 1433 popl %ebx 1434 ret 1435 1436/* load_cr0(cr0) */ 1437ENTRY(load_cr0) 1438 movl 4(%esp),%eax 1439 movl %eax,%cr0 1440 ret 1441 1442/* rcr0() */ 1443ENTRY(rcr0) 1444 movl %cr0,%eax 1445 ret 1446 1447/* rcr3() */ 1448ENTRY(rcr3) 1449 movl %cr3,%eax 1450 ret 1451 1452/* void load_cr3(caddr_t cr3) */ 1453ENTRY(load_cr3) 1454 movl 4(%esp),%eax 1455 movl %eax,%cr3 1456 ret 1457 1458 1459/*****************************************************************************/ 1460/* setjump, longjump */ 1461/*****************************************************************************/ 1462 1463ENTRY(setjmp) 1464 movl 4(%esp),%eax 1465 movl %ebx,(%eax) /* save ebx */ 1466 movl %esp,4(%eax) /* save esp */ 1467 movl %ebp,8(%eax) /* save ebp */ 1468 movl %esi,12(%eax) /* save esi */ 1469 movl %edi,16(%eax) /* save edi */ 1470 movl (%esp),%edx /* get rta */ 1471 movl %edx,20(%eax) /* save eip */ 1472 xorl %eax,%eax /* return(0); */ 1473 ret 1474 1475ENTRY(longjmp) 1476 movl 4(%esp),%eax 1477 movl (%eax),%ebx /* restore ebx */ 1478 movl 4(%eax),%esp /* restore esp */ 1479 movl 8(%eax),%ebp /* restore ebp */ 1480 movl 12(%eax),%esi /* restore esi */ 1481 movl 16(%eax),%edi /* restore edi */ 1482 movl 20(%eax),%edx /* get rta */ 1483 movl %edx,(%esp) /* put in return frame */ 1484 xorl %eax,%eax /* return(1); */ 1485 incl %eax 1486 ret 1487 1488/* 1489 * Here for doing BB-profiling (gcc -a). 1490 * We rely on the "bbset" instead, but need a dummy function. 1491 */ 1492NON_GPROF_ENTRY(__bb_init_func) 1493 movl 4(%esp),%eax 1494 movl $1,(%eax) 1495 .byte 0xc3 /* avoid macro for `ret' */ 1496