support.s revision 19653
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $Id: support.s,v 1.42 1996/11/08 02:38:44 asami Exp $ 34 */ 35 36#include "opt_cpu.h" 37#include "opt_temporary.h" /* for I586_*_B* */ 38 39#include <machine/asmacros.h> 40#include <machine/cputypes.h> 41#include <machine/specialreg.h> 42 43#include "assym.s" 44 45#define KDSEL 0x10 /* kernel data selector */ 46#define IDXSHIFT 10 47 48 .data 49 .globl _bcopy_vector 50_bcopy_vector: 51 .long _generic_bcopy 52 .globl _bzero 53_bzero: 54 .long _generic_bzero 55 .globl _copyin_vector 56_copyin_vector: 57 .long _generic_copyin 58 .globl _copyout_vector 59_copyout_vector: 60 .long _generic_copyout 61 .globl _ovbcopy_vector 62_ovbcopy_vector: 63 .long _generic_bcopy 64kernel_fpu_lock: 65 .byte 0xfe 66 .space 3 67 68 .text 69 70/* 71 * bcopy family 72 * void bzero(void *buf, u_int len) 73 */ 74 75ENTRY(generic_bzero) 76 pushl %edi 77 movl 8(%esp),%edi 78 movl 12(%esp),%ecx 79 xorl %eax,%eax 80 shrl $2,%ecx 81 cld 82 rep 83 stosl 84 movl 12(%esp),%ecx 85 andl $3,%ecx 86 rep 87 stosb 88 popl %edi 89 ret 90 91#if defined(I486_CPU) 92ENTRY(i486_bzero) 93 movl 4(%esp),%edx 94 movl 8(%esp),%ecx 95 xorl %eax,%eax 96/* 97 * do 64 byte chunks first 98 * 99 * XXX this is probably over-unrolled at least for DX2's 100 */ 1012: 102 cmpl $64,%ecx 103 jb 3f 104 movl %eax,(%edx) 105 movl %eax,4(%edx) 106 movl %eax,8(%edx) 107 movl %eax,12(%edx) 108 movl %eax,16(%edx) 109 movl %eax,20(%edx) 110 movl %eax,24(%edx) 111 movl %eax,28(%edx) 112 movl %eax,32(%edx) 113 movl %eax,36(%edx) 114 movl %eax,40(%edx) 115 movl %eax,44(%edx) 116 movl %eax,48(%edx) 117 movl %eax,52(%edx) 118 movl %eax,56(%edx) 119 movl %eax,60(%edx) 120 addl $64,%edx 121 subl $64,%ecx 122 jnz 2b 123 ret 124 125/* 126 * do 16 byte chunks 127 */ 128 SUPERALIGN_TEXT 1293: 130 cmpl $16,%ecx 131 jb 4f 132 movl %eax,(%edx) 133 movl %eax,4(%edx) 134 movl %eax,8(%edx) 135 movl %eax,12(%edx) 136 addl $16,%edx 137 subl $16,%ecx 138 jnz 3b 139 ret 140 141/* 142 * do 4 byte chunks 143 */ 144 SUPERALIGN_TEXT 1454: 146 cmpl $4,%ecx 147 jb 5f 148 movl %eax,(%edx) 149 addl $4,%edx 150 subl $4,%ecx 151 jnz 4b 152 ret 153 154/* 155 * do 1 byte chunks 156 * a jump table seems to be faster than a loop or more range reductions 157 * 158 * XXX need a const section for non-text 159 */ 160 .data 161jtab: 162 .long do0 163 .long do1 164 .long do2 165 .long do3 166 167 .text 168 SUPERALIGN_TEXT 1695: 170 jmp jtab(,%ecx,4) 171 172 SUPERALIGN_TEXT 173do3: 174 movw %ax,(%edx) 175 movb %al,2(%edx) 176 ret 177 178 SUPERALIGN_TEXT 179do2: 180 movw %ax,(%edx) 181 ret 182 183 SUPERALIGN_TEXT 184do1: 185 movb %al,(%edx) 186 ret 187 188 SUPERALIGN_TEXT 189do0: 190 ret 191#endif 192 193#ifdef I586_CPU 194ENTRY(i586_bzero) 195 movl 4(%esp),%edx 196 movl 8(%esp),%ecx 197 198 /* 199 * The FPU register method is twice as fast as the integer register 200 * method unless the target is in the L1 cache and we pre-allocate a 201 * cache line for it (then the integer register method is 4-5 times 202 * faster). However, we never pre-allocate cache lines, since that 203 * would make the integer method 25% or more slower for the common 204 * case when the target isn't in either the L1 cache or the L2 cache. 205 * Thus we normally use the FPU register method unless the overhead 206 * would be too large. 207 */ 208 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 209 jb intreg_i586_bzero 210 211 /* 212 * The FPU registers may belong to an application or to fastmove() 213 * or to another invocation of bcopy() or ourself in a higher level 214 * interrupt or trap handler. Preserving the registers is 215 * complicated since we avoid it if possible at all levels. We 216 * want to localize the complications even when that increases them. 217 * Here the extra work involves preserving CR0_TS in TS. 218 * `npxproc != NULL' is supposed to be the condition that all the 219 * FPU resources belong to an application, but npxproc and CR0_TS 220 * aren't set atomically enough for this condition to work in 221 * interrupt handlers. 222 * 223 * Case 1: FPU registers belong to the application: we must preserve 224 * the registers if we use them, so we only use the FPU register 225 * method if the target size is large enough to amortize the extra 226 * overhead for preserving them. CR0_TS must be preserved although 227 * it is very likely to end up as set. 228 * 229 * Case 2: FPU registers belong to fastmove(): fastmove() currently 230 * makes the registers look like they belong to an application so 231 * that cpu_switch() and savectx() don't have to know about it, so 232 * this case reduces to case 1. 233 * 234 * Case 3: FPU registers belong to the kernel: don't use the FPU 235 * register method. This case is unlikely, and supporting it would 236 * be more complicated and might take too much stack. 237 * 238 * Case 4: FPU registers don't belong to anyone: the FPU registers 239 * don't need to be preserved, so we always use the FPU register 240 * method. CR0_TS must be preserved although it is very likely to 241 * always end up as clear. 242 */ 243 cmpl $0,_npxproc 244 je i586_bz1 245 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 246 jb intreg_i586_bzero 247 sarb $1,kernel_fpu_lock 248 jc intreg_i586_bzero 249 smsw %ax 250 clts 251 subl $108,%esp 252 fnsave 0(%esp) 253 jmp i586_bz2 254 255i586_bz1: 256 sarb $1,kernel_fpu_lock 257 jc intreg_i586_bzero 258 smsw %ax 259 clts 260 fninit /* XXX should avoid needing this */ 261i586_bz2: 262 fldz 263 264 /* 265 * Align to an 8 byte boundary (misalignment in the main loop would 266 * cost a factor of >= 2). Avoid jumps (at little cost if it is 267 * already aligned) by always zeroing 8 bytes and using the part up 268 * to the _next_ alignment position. 269 */ 270 fstl 0(%edx) 271 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 272 addl $8,%edx 273 andl $~7,%edx 274 subl %edx,%ecx 275 276 /* 277 * Similarly align `len' to a multiple of 8. 278 */ 279 fstl -8(%edx,%ecx) 280 decl %ecx 281 andl $~7,%ecx 282 283 /* 284 * This wouldn't be any faster if it were unrolled, since the loop 285 * control instructions are much faster than the fstl and/or done 286 * in parallel with it so their overhead is insignificant. 287 */ 288fpureg_i586_bzero_loop: 289 fstl 0(%edx) 290 addl $8,%edx 291 subl $8,%ecx 292 cmpl $8,%ecx 293 jae fpureg_i586_bzero_loop 294 295 cmpl $0,_npxproc 296 je i586_bz3 297 frstor 0(%esp) 298 addl $108,%esp 299 lmsw %ax 300 movb $0xfe,kernel_fpu_lock 301 ret 302 303i586_bz3: 304 fstpl %st(0) 305 lmsw %ax 306 movb $0xfe,kernel_fpu_lock 307 ret 308 309intreg_i586_bzero: 310 /* 311 * `rep stos' seems to be the best method in practice for small 312 * counts. Fancy methods usually take too long to start up due 313 * to cache and BTB misses. 314 */ 315 pushl %edi 316 movl %edx,%edi 317 xorl %eax,%eax 318 shrl $2,%ecx 319 cld 320 rep 321 stosl 322 movl 12(%esp),%ecx 323 andl $3,%ecx 324 jne 1f 325 popl %edi 326 ret 327 3281: 329 rep 330 stosb 331 popl %edi 332 ret 333#endif /* I586_CPU */ 334 335/* fillw(pat, base, cnt) */ 336ENTRY(fillw) 337 pushl %edi 338 movl 8(%esp),%eax 339 movl 12(%esp),%edi 340 movl 16(%esp),%ecx 341 cld 342 rep 343 stosw 344 popl %edi 345 ret 346 347ENTRY(bcopyb) 348bcopyb: 349 pushl %esi 350 pushl %edi 351 movl 12(%esp),%esi 352 movl 16(%esp),%edi 353 movl 20(%esp),%ecx 354 movl %edi,%eax 355 subl %esi,%eax 356 cmpl %ecx,%eax /* overlapping && src < dst? */ 357 jb 1f 358 cld /* nope, copy forwards */ 359 rep 360 movsb 361 popl %edi 362 popl %esi 363 ret 364 365 ALIGN_TEXT 3661: 367 addl %ecx,%edi /* copy backwards. */ 368 addl %ecx,%esi 369 decl %edi 370 decl %esi 371 std 372 rep 373 movsb 374 popl %edi 375 popl %esi 376 cld 377 ret 378 379ENTRY(bcopy) 380 MEXITCOUNT 381 jmp *_bcopy_vector 382 383ENTRY(ovbcopy) 384 MEXITCOUNT 385 jmp *_ovbcopy_vector 386 387/* 388 * generic_bcopy(src, dst, cnt) 389 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 390 */ 391ENTRY(generic_bcopy) 392 pushl %esi 393 pushl %edi 394 movl 12(%esp),%esi 395 movl 16(%esp),%edi 396 movl 20(%esp),%ecx 397 398 movl %edi,%eax 399 subl %esi,%eax 400 cmpl %ecx,%eax /* overlapping && src < dst? */ 401 jb 1f 402 403 shrl $2,%ecx /* copy by 32-bit words */ 404 cld /* nope, copy forwards */ 405 rep 406 movsl 407 movl 20(%esp),%ecx 408 andl $3,%ecx /* any bytes left? */ 409 rep 410 movsb 411 popl %edi 412 popl %esi 413 ret 414 415 ALIGN_TEXT 4161: 417 addl %ecx,%edi /* copy backwards */ 418 addl %ecx,%esi 419 decl %edi 420 decl %esi 421 andl $3,%ecx /* any fractional bytes? */ 422 std 423 rep 424 movsb 425 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 426 shrl $2,%ecx 427 subl $3,%esi 428 subl $3,%edi 429 rep 430 movsl 431 popl %edi 432 popl %esi 433 cld 434 ret 435 436#ifdef I586_CPU 437ENTRY(i586_bcopy) 438 pushl %esi 439 pushl %edi 440 movl 12(%esp),%esi 441 movl 16(%esp),%edi 442 movl 20(%esp),%ecx 443 444 movl %edi,%eax 445 subl %esi,%eax 446 cmpl %ecx,%eax /* overlapping && src < dst? */ 447 jb 1f 448 449 cmpl $1024,%ecx 450 jb small_i586_bcopy 451 452 sarb $1,kernel_fpu_lock 453 jc small_i586_bcopy 454 cmpl $0,_npxproc 455 je i586_bc1 456 smsw %dx 457 clts 458 subl $108,%esp 459 fnsave 0(%esp) 460 jmp 4f 461 462i586_bc1: 463 smsw %dx 464 clts 465 fninit /* XXX should avoid needing this */ 466 467 ALIGN_TEXT 4684: 469 pushl %ecx 470#define DCACHE_SIZE 8192 471 cmpl $(DCACHE_SIZE-512)/2,%ecx 472 jbe 2f 473 movl $(DCACHE_SIZE-512)/2,%ecx 4742: 475 subl %ecx,0(%esp) 476 cmpl $256,%ecx 477 jb 5f /* XXX should prefetch if %ecx >= 32 */ 478 pushl %esi 479 pushl %ecx 480 ALIGN_TEXT 4813: 482 movl 0(%esi),%eax 483 movl 32(%esi),%eax 484 movl 64(%esi),%eax 485 movl 96(%esi),%eax 486 movl 128(%esi),%eax 487 movl 160(%esi),%eax 488 movl 192(%esi),%eax 489 movl 224(%esi),%eax 490 addl $256,%esi 491 subl $256,%ecx 492 cmpl $256,%ecx 493 jae 3b 494 popl %ecx 495 popl %esi 4965: 497 ALIGN_TEXT 498large_i586_bcopy_loop: 499 fildq 0(%esi) 500 fildq 8(%esi) 501 fildq 16(%esi) 502 fildq 24(%esi) 503 fildq 32(%esi) 504 fildq 40(%esi) 505 fildq 48(%esi) 506 fildq 56(%esi) 507 fistpq 56(%edi) 508 fistpq 48(%edi) 509 fistpq 40(%edi) 510 fistpq 32(%edi) 511 fistpq 24(%edi) 512 fistpq 16(%edi) 513 fistpq 8(%edi) 514 fistpq 0(%edi) 515 addl $64,%esi 516 addl $64,%edi 517 subl $64,%ecx 518 cmpl $64,%ecx 519 jae large_i586_bcopy_loop 520 popl %eax 521 addl %eax,%ecx 522 cmpl $64,%ecx 523 jae 4b 524 525 cmpl $0,_npxproc 526 je i586_bc2 527 frstor 0(%esp) 528 addl $108,%esp 529i586_bc2: 530 lmsw %dx 531 movb $0xfe,kernel_fpu_lock 532 533/* 534 * This is a duplicate of the main part of generic_bcopy. See the comments 535 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 536 * would mess up high resolution profiling. 537 */ 538 ALIGN_TEXT 539small_i586_bcopy: 540 shrl $2,%ecx 541 cld 542 rep 543 movsl 544 movl 20(%esp),%ecx 545 andl $3,%ecx 546 rep 547 movsb 548 popl %edi 549 popl %esi 550 ret 551 552 ALIGN_TEXT 5531: 554 addl %ecx,%edi 555 addl %ecx,%esi 556 decl %edi 557 decl %esi 558 andl $3,%ecx 559 std 560 rep 561 movsb 562 movl 20(%esp),%ecx 563 shrl $2,%ecx 564 subl $3,%esi 565 subl $3,%edi 566 rep 567 movsl 568 popl %edi 569 popl %esi 570 cld 571 ret 572#endif /* I586_CPU */ 573 574/* 575 * Note: memcpy does not support overlapping copies 576 */ 577ENTRY(memcpy) 578 pushl %edi 579 pushl %esi 580 movl 12(%esp),%edi 581 movl 16(%esp),%esi 582 movl 20(%esp),%ecx 583 movl %edi,%eax 584 shrl $2,%ecx /* copy by 32-bit words */ 585 cld /* nope, copy forwards */ 586 rep 587 movsl 588 movl 20(%esp),%ecx 589 andl $3,%ecx /* any bytes left? */ 590 rep 591 movsb 592 popl %esi 593 popl %edi 594 ret 595 596 597/*****************************************************************************/ 598/* copyout and fubyte family */ 599/*****************************************************************************/ 600/* 601 * Access user memory from inside the kernel. These routines and possibly 602 * the math- and DOS emulators should be the only places that do this. 603 * 604 * We have to access the memory with user's permissions, so use a segment 605 * selector with RPL 3. For writes to user space we have to additionally 606 * check the PTE for write permission, because the 386 does not check 607 * write permissions when we are executing with EPL 0. The 486 does check 608 * this if the WP bit is set in CR0, so we can use a simpler version here. 609 * 610 * These routines set curpcb->onfault for the time they execute. When a 611 * protection violation occurs inside the functions, the trap handler 612 * returns to *curpcb->onfault instead of the function. 613 */ 614 615/* copyout(from_kernel, to_user, len) */ 616ENTRY(copyout) 617 MEXITCOUNT 618 jmp *_copyout_vector 619 620ENTRY(generic_copyout) 621 movl _curpcb,%eax 622 movl $copyout_fault,PCB_ONFAULT(%eax) 623 pushl %esi 624 pushl %edi 625 pushl %ebx 626 movl 16(%esp),%esi 627 movl 20(%esp),%edi 628 movl 24(%esp),%ebx 629 testl %ebx,%ebx /* anything to do? */ 630 jz done_copyout 631 632 /* 633 * Check explicitly for non-user addresses. If 486 write protection 634 * is being used, this check is essential because we are in kernel 635 * mode so the h/w does not provide any protection against writing 636 * kernel addresses. 637 */ 638 639 /* 640 * First, prevent address wrapping. 641 */ 642 movl %edi,%eax 643 addl %ebx,%eax 644 jc copyout_fault 645/* 646 * XXX STOP USING VM_MAXUSER_ADDRESS. 647 * It is an end address, not a max, so every time it is used correctly it 648 * looks like there is an off by one error, and of course it caused an off 649 * by one error in several places. 650 */ 651 cmpl $VM_MAXUSER_ADDRESS,%eax 652 ja copyout_fault 653 654#if defined(I386_CPU) 655 656#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 657 cmpl $CPUCLASS_386,_cpu_class 658 jne 3f 659#endif 660/* 661 * We have to check each PTE for user write permission. 662 * The checking may cause a page fault, so it is important to set 663 * up everything for return via copyout_fault before here. 664 */ 665 /* compute number of pages */ 666 movl %edi,%ecx 667 andl $PAGE_MASK,%ecx 668 addl %ebx,%ecx 669 decl %ecx 670 shrl $IDXSHIFT+2,%ecx 671 incl %ecx 672 673 /* compute PTE offset for start address */ 674 movl %edi,%edx 675 shrl $IDXSHIFT,%edx 676 andb $0xfc,%dl 677 6781: /* check PTE for each page */ 679 movb _PTmap(%edx),%al 680 andb $0x07,%al /* Pages must be VALID + USERACC + WRITABLE */ 681 cmpb $0x07,%al 682 je 2f 683 684 /* simulate a trap */ 685 pushl %edx 686 pushl %ecx 687 shll $IDXSHIFT,%edx 688 pushl %edx 689 call _trapwrite /* trapwrite(addr) */ 690 popl %edx 691 popl %ecx 692 popl %edx 693 694 testl %eax,%eax /* if not ok, return EFAULT */ 695 jnz copyout_fault 696 6972: 698 addl $4,%edx 699 decl %ecx 700 jnz 1b /* check next page */ 701#endif /* I386_CPU */ 702 703 /* bcopy(%esi, %edi, %ebx) */ 7043: 705 movl %ebx,%ecx 706 707#ifdef I586_CPU 708 ALIGN_TEXT 709slow_copyout: 710#endif 711 shrl $2,%ecx 712 cld 713 rep 714 movsl 715 movb %bl,%cl 716 andb $3,%cl 717 rep 718 movsb 719 720done_copyout: 721 popl %ebx 722 popl %edi 723 popl %esi 724 xorl %eax,%eax 725 movl _curpcb,%edx 726 movl %eax,PCB_ONFAULT(%edx) 727 ret 728 729 ALIGN_TEXT 730copyout_fault: 731 popl %ebx 732 popl %edi 733 popl %esi 734 movl _curpcb,%edx 735 movl $0,PCB_ONFAULT(%edx) 736 movl $EFAULT,%eax 737 ret 738 739#ifdef I586_CPU 740ENTRY(i586_copyout) 741 /* 742 * Duplicated from generic_copyout. Could be done a bit better. 743 */ 744 movl _curpcb,%eax 745 movl $copyout_fault,PCB_ONFAULT(%eax) 746 pushl %esi 747 pushl %edi 748 pushl %ebx 749 movl 16(%esp),%esi 750 movl 20(%esp),%edi 751 movl 24(%esp),%ebx 752 testl %ebx,%ebx /* anything to do? */ 753 jz done_copyout 754 755 /* 756 * Check explicitly for non-user addresses. If 486 write protection 757 * is being used, this check is essential because we are in kernel 758 * mode so the h/w does not provide any protection against writing 759 * kernel addresses. 760 */ 761 762 /* 763 * First, prevent address wrapping. 764 */ 765 movl %edi,%eax 766 addl %ebx,%eax 767 jc copyout_fault 768/* 769 * XXX STOP USING VM_MAXUSER_ADDRESS. 770 * It is an end address, not a max, so every time it is used correctly it 771 * looks like there is an off by one error, and of course it caused an off 772 * by one error in several places. 773 */ 774 cmpl $VM_MAXUSER_ADDRESS,%eax 775 ja copyout_fault 776 777 /* bcopy(%esi, %edi, %ebx) */ 7783: 779 movl %ebx,%ecx 780 /* 781 * End of duplicated code. 782 */ 783 784 cmpl $1024,%ecx 785 jb slow_copyout 786 787 pushl %ecx 788 call _fastmove 789 addl $4,%esp 790 jmp done_copyout 791#endif /* I586_CPU */ 792 793/* copyin(from_user, to_kernel, len) */ 794ENTRY(copyin) 795 MEXITCOUNT 796 jmp *_copyin_vector 797 798ENTRY(generic_copyin) 799 movl _curpcb,%eax 800 movl $copyin_fault,PCB_ONFAULT(%eax) 801 pushl %esi 802 pushl %edi 803 movl 12(%esp),%esi /* caddr_t from */ 804 movl 16(%esp),%edi /* caddr_t to */ 805 movl 20(%esp),%ecx /* size_t len */ 806 807 /* 808 * make sure address is valid 809 */ 810 movl %esi,%edx 811 addl %ecx,%edx 812 jc copyin_fault 813 cmpl $VM_MAXUSER_ADDRESS,%edx 814 ja copyin_fault 815 816#ifdef I586_CPU 817 ALIGN_TEXT 818slow_copyin: 819#endif 820 movb %cl,%al 821 shrl $2,%ecx /* copy longword-wise */ 822 cld 823 rep 824 movsl 825 movb %al,%cl 826 andb $3,%cl /* copy remaining bytes */ 827 rep 828 movsb 829 830#if defined(I586_CPU) 831 ALIGN_TEXT 832done_copyin: 833#endif /* I586_CPU */ 834 popl %edi 835 popl %esi 836 xorl %eax,%eax 837 movl _curpcb,%edx 838 movl %eax,PCB_ONFAULT(%edx) 839 ret 840 841 ALIGN_TEXT 842copyin_fault: 843 popl %edi 844 popl %esi 845 movl _curpcb,%edx 846 movl $0,PCB_ONFAULT(%edx) 847 movl $EFAULT,%eax 848 ret 849 850#ifdef I586_CPU 851ENTRY(i586_copyin) 852 /* 853 * Duplicated from generic_copyin. Could be done a bit better. 854 */ 855 movl _curpcb,%eax 856 movl $copyin_fault,PCB_ONFAULT(%eax) 857 pushl %esi 858 pushl %edi 859 movl 12(%esp),%esi /* caddr_t from */ 860 movl 16(%esp),%edi /* caddr_t to */ 861 movl 20(%esp),%ecx /* size_t len */ 862 863 /* 864 * make sure address is valid 865 */ 866 movl %esi,%edx 867 addl %ecx,%edx 868 jc copyin_fault 869 cmpl $VM_MAXUSER_ADDRESS,%edx 870 ja copyin_fault 871 /* 872 * End of duplicated code. 873 */ 874 875 cmpl $1024,%ecx 876 jb slow_copyin 877 878 pushl %ecx 879 call _fastmove 880 addl $4,%esp 881 jmp done_copyin 882#endif /* I586_CPU */ 883 884#if defined(I586_CPU) 885/* fastmove(src, dst, len) 886 src in %esi 887 dst in %edi 888 len in %ecx XXX changed to on stack for profiling 889 uses %eax and %edx for tmp. storage 890 */ 891/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 892ENTRY(fastmove) 893 movl 4(%esp),%ecx 894 cmpl $63,%ecx 895 jbe fastmove_tail 896 897 testl $7,%esi /* check if src addr is multiple of 8 */ 898 jnz fastmove_tail 899 900 testl $7,%edi /* check if dst addr is multiple of 8 */ 901 jnz fastmove_tail 902 903 pushl %ebp 904 movl %esp,%ebp 905 subl $PCB_SAVEFPU_SIZE,%esp 906 907/* if (npxproc != NULL) { */ 908 cmpl $0,_npxproc 909 je 6f 910/* fnsave(&curpcb->pcb_savefpu); */ 911 movl _curpcb,%eax 912 fnsave PCB_SAVEFPU(%eax) 913/* npxproc = NULL; */ 914 movl $0,_npxproc 915/* } */ 9166: 917/* now we own the FPU. */ 918 919/* 920 * The process' FP state is saved in the pcb, but if we get 921 * switched, the cpu_switch() will store our FP state in the 922 * pcb. It should be possible to avoid all the copying for 923 * this, e.g., by setting a flag to tell cpu_switch() to 924 * save the state somewhere else. 925 */ 926/* tmp = curpcb->pcb_savefpu; */ 927 pushl %edi 928 pushl %esi 929 pushl %ecx 930 leal -PCB_SAVEFPU_SIZE(%ebp),%edi 931 movl _curpcb,%esi 932 addl $PCB_SAVEFPU,%esi 933 cld 934 movl $PCB_SAVEFPU_SIZE>>2,%ecx 935 rep 936 movsl 937 popl %ecx 938 popl %esi 939 popl %edi 940/* stop_emulating(); */ 941 clts 942/* npxproc = curproc; */ 943 movl _curproc,%eax 944 movl %eax,_npxproc 9454: 946 pushl %ecx 947 cmpl $1792,%ecx 948 jbe 2f 949 movl $1792,%ecx 9502: 951 subl %ecx,0(%esp) 952 cmpl $256,%ecx 953 jb 5f 954 pushl %esi 955 pushl %ecx 956 ALIGN_TEXT 9573: 958 movl 0(%esi),%eax 959 movl 32(%esi),%eax 960 movl 64(%esi),%eax 961 movl 96(%esi),%eax 962 movl 128(%esi),%eax 963 movl 160(%esi),%eax 964 movl 192(%esi),%eax 965 movl 224(%esi),%eax 966 addl $256,%esi 967 subl $256,%ecx 968 cmpl $256,%ecx 969 jae 3b 970 popl %ecx 971 popl %esi 9725: 973 ALIGN_TEXT 974fastmove_loop: 975 fildq 0(%esi) 976 fildq 8(%esi) 977 fildq 16(%esi) 978 fildq 24(%esi) 979 fildq 32(%esi) 980 fildq 40(%esi) 981 fildq 48(%esi) 982 fildq 56(%esi) 983 fistpq 56(%edi) 984 fistpq 48(%edi) 985 fistpq 40(%edi) 986 fistpq 32(%edi) 987 fistpq 24(%edi) 988 fistpq 16(%edi) 989 fistpq 8(%edi) 990 fistpq 0(%edi) 991 addl $-64,%ecx 992 addl $64,%esi 993 addl $64,%edi 994 cmpl $63,%ecx 995 ja fastmove_loop 996 popl %eax 997 addl %eax,%ecx 998 cmpl $64,%ecx 999 jae 4b 1000 1001/* curpcb->pcb_savefpu = tmp; */ 1002 pushl %edi 1003 pushl %esi 1004 pushl %ecx 1005 movl _curpcb,%edi 1006 addl $PCB_SAVEFPU,%edi 1007 leal -PCB_SAVEFPU_SIZE(%ebp),%esi 1008 cld 1009 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1010 rep 1011 movsl 1012 popl %ecx 1013 popl %esi 1014 popl %edi 1015 1016/* start_emulating(); */ 1017 smsw %ax 1018 orb $CR0_TS,%al 1019 lmsw %ax 1020/* npxproc = NULL; */ 1021 movl $0,_npxproc 1022 movl %ebp,%esp 1023 popl %ebp 1024 1025 ALIGN_TEXT 1026fastmove_tail: 1027 movb %cl,%al 1028 shrl $2,%ecx /* copy longword-wise */ 1029 cld 1030 rep 1031 movsl 1032 movb %al,%cl 1033 andb $3,%cl /* copy remaining bytes */ 1034 rep 1035 movsb 1036 1037 ret 1038#endif /* I586_CPU */ 1039 1040/* 1041 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory 1042 */ 1043ENTRY(fuword) 1044 movl _curpcb,%ecx 1045 movl $fusufault,PCB_ONFAULT(%ecx) 1046 movl 4(%esp),%edx /* from */ 1047 1048 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 1049 ja fusufault 1050 1051 movl (%edx),%eax 1052 movl $0,PCB_ONFAULT(%ecx) 1053 ret 1054 1055/* 1056 * These two routines are called from the profiling code, potentially 1057 * at interrupt time. If they fail, that's okay, good things will 1058 * happen later. Fail all the time for now - until the trap code is 1059 * able to deal with this. 1060 */ 1061ALTENTRY(suswintr) 1062ENTRY(fuswintr) 1063 movl $-1,%eax 1064 ret 1065 1066ENTRY(fusword) 1067 movl _curpcb,%ecx 1068 movl $fusufault,PCB_ONFAULT(%ecx) 1069 movl 4(%esp),%edx 1070 1071 cmpl $VM_MAXUSER_ADDRESS-2,%edx 1072 ja fusufault 1073 1074 movzwl (%edx),%eax 1075 movl $0,PCB_ONFAULT(%ecx) 1076 ret 1077 1078ENTRY(fubyte) 1079 movl _curpcb,%ecx 1080 movl $fusufault,PCB_ONFAULT(%ecx) 1081 movl 4(%esp),%edx 1082 1083 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1084 ja fusufault 1085 1086 movzbl (%edx),%eax 1087 movl $0,PCB_ONFAULT(%ecx) 1088 ret 1089 1090 ALIGN_TEXT 1091fusufault: 1092 movl _curpcb,%ecx 1093 xorl %eax,%eax 1094 movl %eax,PCB_ONFAULT(%ecx) 1095 decl %eax 1096 ret 1097 1098/* 1099 * su{byte,sword,word}: write a byte (word, longword) to user memory 1100 */ 1101ENTRY(suword) 1102 movl _curpcb,%ecx 1103 movl $fusufault,PCB_ONFAULT(%ecx) 1104 movl 4(%esp),%edx 1105 1106#if defined(I386_CPU) 1107 1108#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1109 cmpl $CPUCLASS_386,_cpu_class 1110 jne 2f /* we only have to set the right segment selector */ 1111#endif /* I486_CPU || I586_CPU || I686_CPU */ 1112 1113 /* XXX - page boundary crossing is still not handled */ 1114 movl %edx,%eax 1115 shrl $IDXSHIFT,%edx 1116 andb $0xfc,%dl 1117 movb _PTmap(%edx),%dl 1118 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1119 cmpb $0x7,%dl 1120 je 1f 1121 1122 /* simulate a trap */ 1123 pushl %eax 1124 call _trapwrite 1125 popl %edx /* remove junk parameter from stack */ 1126 movl _curpcb,%ecx /* restore trashed register */ 1127 testl %eax,%eax 1128 jnz fusufault 11291: 1130 movl 4(%esp),%edx 1131#endif 1132 11332: 1134 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1135 ja fusufault 1136 1137 movl 8(%esp),%eax 1138 movl %eax,(%edx) 1139 xorl %eax,%eax 1140 movl %eax,PCB_ONFAULT(%ecx) 1141 ret 1142 1143ENTRY(susword) 1144 movl _curpcb,%ecx 1145 movl $fusufault,PCB_ONFAULT(%ecx) 1146 movl 4(%esp),%edx 1147 1148#if defined(I386_CPU) 1149 1150#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1151 cmpl $CPUCLASS_386,_cpu_class 1152 jne 2f 1153#endif /* I486_CPU || I586_CPU || I686_CPU */ 1154 1155 /* XXX - page boundary crossing is still not handled */ 1156 movl %edx,%eax 1157 shrl $IDXSHIFT,%edx 1158 andb $0xfc,%dl 1159 movb _PTmap(%edx),%dl 1160 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1161 cmpb $0x7,%dl 1162 je 1f 1163 1164 /* simulate a trap */ 1165 pushl %eax 1166 call _trapwrite 1167 popl %edx /* remove junk parameter from stack */ 1168 movl _curpcb,%ecx /* restore trashed register */ 1169 testl %eax,%eax 1170 jnz fusufault 11711: 1172 movl 4(%esp),%edx 1173#endif 1174 11752: 1176 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1177 ja fusufault 1178 1179 movw 8(%esp),%ax 1180 movw %ax,(%edx) 1181 xorl %eax,%eax 1182 movl %eax,PCB_ONFAULT(%ecx) 1183 ret 1184 1185ALTENTRY(suibyte) 1186ENTRY(subyte) 1187 movl _curpcb,%ecx 1188 movl $fusufault,PCB_ONFAULT(%ecx) 1189 movl 4(%esp),%edx 1190 1191#if defined(I386_CPU) 1192 1193#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1194 cmpl $CPUCLASS_386,_cpu_class 1195 jne 2f 1196#endif /* I486_CPU || I586_CPU || I686_CPU */ 1197 1198 movl %edx,%eax 1199 shrl $IDXSHIFT,%edx 1200 andb $0xfc,%dl 1201 movb _PTmap(%edx),%dl 1202 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1203 cmpb $0x7,%dl 1204 je 1f 1205 1206 /* simulate a trap */ 1207 pushl %eax 1208 call _trapwrite 1209 popl %edx /* remove junk parameter from stack */ 1210 movl _curpcb,%ecx /* restore trashed register */ 1211 testl %eax,%eax 1212 jnz fusufault 12131: 1214 movl 4(%esp),%edx 1215#endif 1216 12172: 1218 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1219 ja fusufault 1220 1221 movb 8(%esp),%al 1222 movb %al,(%edx) 1223 xorl %eax,%eax 1224 movl %eax,PCB_ONFAULT(%ecx) 1225 ret 1226 1227/* 1228 * copyinstr(from, to, maxlen, int *lencopied) 1229 * copy a string from from to to, stop when a 0 character is reached. 1230 * return ENAMETOOLONG if string is longer than maxlen, and 1231 * EFAULT on protection violations. If lencopied is non-zero, 1232 * return the actual length in *lencopied. 1233 */ 1234ENTRY(copyinstr) 1235 pushl %esi 1236 pushl %edi 1237 movl _curpcb,%ecx 1238 movl $cpystrflt,PCB_ONFAULT(%ecx) 1239 1240 movl 12(%esp),%esi /* %esi = from */ 1241 movl 16(%esp),%edi /* %edi = to */ 1242 movl 20(%esp),%edx /* %edx = maxlen */ 1243 1244 movl $VM_MAXUSER_ADDRESS,%eax 1245 1246 /* make sure 'from' is within bounds */ 1247 subl %esi,%eax 1248 jbe cpystrflt 1249 1250 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1251 cmpl %edx,%eax 1252 jae 1f 1253 movl %eax,%edx 1254 movl %eax,20(%esp) 12551: 1256 incl %edx 1257 cld 1258 12592: 1260 decl %edx 1261 jz 3f 1262 1263 lodsb 1264 stosb 1265 orb %al,%al 1266 jnz 2b 1267 1268 /* Success -- 0 byte reached */ 1269 decl %edx 1270 xorl %eax,%eax 1271 jmp cpystrflt_x 12723: 1273 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1274 cmpl $VM_MAXUSER_ADDRESS,%esi 1275 jae cpystrflt 12764: 1277 movl $ENAMETOOLONG,%eax 1278 jmp cpystrflt_x 1279 1280cpystrflt: 1281 movl $EFAULT,%eax 1282 1283cpystrflt_x: 1284 /* set *lencopied and return %eax */ 1285 movl _curpcb,%ecx 1286 movl $0,PCB_ONFAULT(%ecx) 1287 movl 20(%esp),%ecx 1288 subl %edx,%ecx 1289 movl 24(%esp),%edx 1290 testl %edx,%edx 1291 jz 1f 1292 movl %ecx,(%edx) 12931: 1294 popl %edi 1295 popl %esi 1296 ret 1297 1298 1299/* 1300 * copystr(from, to, maxlen, int *lencopied) 1301 */ 1302ENTRY(copystr) 1303 pushl %esi 1304 pushl %edi 1305 1306 movl 12(%esp),%esi /* %esi = from */ 1307 movl 16(%esp),%edi /* %edi = to */ 1308 movl 20(%esp),%edx /* %edx = maxlen */ 1309 incl %edx 1310 cld 13111: 1312 decl %edx 1313 jz 4f 1314 lodsb 1315 stosb 1316 orb %al,%al 1317 jnz 1b 1318 1319 /* Success -- 0 byte reached */ 1320 decl %edx 1321 xorl %eax,%eax 1322 jmp 6f 13234: 1324 /* edx is zero -- return ENAMETOOLONG */ 1325 movl $ENAMETOOLONG,%eax 1326 13276: 1328 /* set *lencopied and return %eax */ 1329 movl 20(%esp),%ecx 1330 subl %edx,%ecx 1331 movl 24(%esp),%edx 1332 testl %edx,%edx 1333 jz 7f 1334 movl %ecx,(%edx) 13357: 1336 popl %edi 1337 popl %esi 1338 ret 1339 1340ENTRY(bcmp) 1341 pushl %edi 1342 pushl %esi 1343 movl 12(%esp),%edi 1344 movl 16(%esp),%esi 1345 movl 20(%esp),%edx 1346 xorl %eax,%eax 1347 1348 movl %edx,%ecx 1349 shrl $2,%ecx 1350 cld /* compare forwards */ 1351 repe 1352 cmpsl 1353 jne 1f 1354 1355 movl %edx,%ecx 1356 andl $3,%ecx 1357 repe 1358 cmpsb 1359 je 2f 13601: 1361 incl %eax 13622: 1363 popl %esi 1364 popl %edi 1365 ret 1366 1367 1368/* 1369 * Handling of special 386 registers and descriptor tables etc 1370 */ 1371/* void lgdt(struct region_descriptor *rdp); */ 1372ENTRY(lgdt) 1373 /* reload the descriptor table */ 1374 movl 4(%esp),%eax 1375 lgdt (%eax) 1376 1377 /* flush the prefetch q */ 1378 jmp 1f 1379 nop 13801: 1381 /* reload "stale" selectors */ 1382 movl $KDSEL,%eax 1383 movl %ax,%ds 1384 movl %ax,%es 1385 movl %ax,%ss 1386 1387 /* reload code selector by turning return into intersegmental return */ 1388 movl (%esp),%eax 1389 pushl %eax 1390# movl $KCSEL,4(%esp) 1391 movl $8,4(%esp) 1392 lret 1393 1394/* 1395 * void lidt(struct region_descriptor *rdp); 1396 */ 1397ENTRY(lidt) 1398 movl 4(%esp),%eax 1399 lidt (%eax) 1400 ret 1401 1402/* 1403 * void lldt(u_short sel) 1404 */ 1405ENTRY(lldt) 1406 lldt 4(%esp) 1407 ret 1408 1409/* 1410 * void ltr(u_short sel) 1411 */ 1412ENTRY(ltr) 1413 ltr 4(%esp) 1414 ret 1415 1416/* ssdtosd(*ssdp,*sdp) */ 1417ENTRY(ssdtosd) 1418 pushl %ebx 1419 movl 8(%esp),%ecx 1420 movl 8(%ecx),%ebx 1421 shll $16,%ebx 1422 movl (%ecx),%edx 1423 roll $16,%edx 1424 movb %dh,%bl 1425 movb %dl,%bh 1426 rorl $8,%ebx 1427 movl 4(%ecx),%eax 1428 movw %ax,%dx 1429 andl $0xf0000,%eax 1430 orl %eax,%ebx 1431 movl 12(%esp),%ecx 1432 movl %edx,(%ecx) 1433 movl %ebx,4(%ecx) 1434 popl %ebx 1435 ret 1436 1437/* load_cr0(cr0) */ 1438ENTRY(load_cr0) 1439 movl 4(%esp),%eax 1440 movl %eax,%cr0 1441 ret 1442 1443/* rcr0() */ 1444ENTRY(rcr0) 1445 movl %cr0,%eax 1446 ret 1447 1448/* rcr3() */ 1449ENTRY(rcr3) 1450 movl %cr3,%eax 1451 ret 1452 1453/* void load_cr3(caddr_t cr3) */ 1454ENTRY(load_cr3) 1455 movl 4(%esp),%eax 1456 movl %eax,%cr3 1457 ret 1458 1459 1460/*****************************************************************************/ 1461/* setjump, longjump */ 1462/*****************************************************************************/ 1463 1464ENTRY(setjmp) 1465 movl 4(%esp),%eax 1466 movl %ebx,(%eax) /* save ebx */ 1467 movl %esp,4(%eax) /* save esp */ 1468 movl %ebp,8(%eax) /* save ebp */ 1469 movl %esi,12(%eax) /* save esi */ 1470 movl %edi,16(%eax) /* save edi */ 1471 movl (%esp),%edx /* get rta */ 1472 movl %edx,20(%eax) /* save eip */ 1473 xorl %eax,%eax /* return(0); */ 1474 ret 1475 1476ENTRY(longjmp) 1477 movl 4(%esp),%eax 1478 movl (%eax),%ebx /* restore ebx */ 1479 movl 4(%eax),%esp /* restore esp */ 1480 movl 8(%eax),%ebp /* restore ebp */ 1481 movl 12(%eax),%esi /* restore esi */ 1482 movl 16(%eax),%edi /* restore edi */ 1483 movl 20(%eax),%edx /* get rta */ 1484 movl %edx,(%esp) /* put in return frame */ 1485 xorl %eax,%eax /* return(1); */ 1486 incl %eax 1487 ret 1488 1489/* 1490 * Here for doing BB-profiling (gcc -a). 1491 * We rely on the "bbset" instead, but need a dummy function. 1492 */ 1493NON_GPROF_ENTRY(__bb_init_func) 1494 movl 4(%esp),%eax 1495 movl $1,(%eax) 1496 .byte 0xc3 /* avoid macro for `ret' */ 1497