support.s revision 18835
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $Id: support.s,v 1.39 1996/09/20 16:52:09 bde Exp $ 34 */ 35 36#include "opt_temporary.h" /* for I586_*_B* */ 37 38#include <machine/asmacros.h> 39#include <machine/cputypes.h> 40#include <machine/specialreg.h> 41 42#include "assym.s" 43 44#define KDSEL 0x10 /* kernel data selector */ 45#define IDXSHIFT 10 46 47 .data 48 .globl _bcopy_vector 49_bcopy_vector: 50 .long _generic_bcopy 51 .globl _bzero 52_bzero: 53 .long _generic_bzero 54 .globl _ovbcopy_vector 55_ovbcopy_vector: 56 .long _generic_bcopy 57kernel_fpu_lock: 58 .byte 0xfe 59 .space 3 60 61 .text 62 63/* 64 * bcopy family 65 * void bzero(void *buf, u_int len) 66 */ 67 68ENTRY(generic_bzero) 69 pushl %edi 70 movl 8(%esp),%edi 71 movl 12(%esp),%ecx 72 xorl %eax,%eax 73 shrl $2,%ecx 74 cld 75 rep 76 stosl 77 movl 12(%esp),%ecx 78 andl $3,%ecx 79 rep 80 stosb 81 popl %edi 82 ret 83 84#if defined(I486_CPU) 85ENTRY(i486_bzero) 86 movl 4(%esp),%edx 87 movl 8(%esp),%ecx 88 xorl %eax,%eax 89/* 90 * do 64 byte chunks first 91 * 92 * XXX this is probably over-unrolled at least for DX2's 93 */ 942: 95 cmpl $64,%ecx 96 jb 3f 97 movl %eax,(%edx) 98 movl %eax,4(%edx) 99 movl %eax,8(%edx) 100 movl %eax,12(%edx) 101 movl %eax,16(%edx) 102 movl %eax,20(%edx) 103 movl %eax,24(%edx) 104 movl %eax,28(%edx) 105 movl %eax,32(%edx) 106 movl %eax,36(%edx) 107 movl %eax,40(%edx) 108 movl %eax,44(%edx) 109 movl %eax,48(%edx) 110 movl %eax,52(%edx) 111 movl %eax,56(%edx) 112 movl %eax,60(%edx) 113 addl $64,%edx 114 subl $64,%ecx 115 jnz 2b 116 ret 117 118/* 119 * do 16 byte chunks 120 */ 121 SUPERALIGN_TEXT 1223: 123 cmpl $16,%ecx 124 jb 4f 125 movl %eax,(%edx) 126 movl %eax,4(%edx) 127 movl %eax,8(%edx) 128 movl %eax,12(%edx) 129 addl $16,%edx 130 subl $16,%ecx 131 jnz 3b 132 ret 133 134/* 135 * do 4 byte chunks 136 */ 137 SUPERALIGN_TEXT 1384: 139 cmpl $4,%ecx 140 jb 5f 141 movl %eax,(%edx) 142 addl $4,%edx 143 subl $4,%ecx 144 jnz 4b 145 ret 146 147/* 148 * do 1 byte chunks 149 * a jump table seems to be faster than a loop or more range reductions 150 * 151 * XXX need a const section for non-text 152 */ 153 .data 154jtab: 155 .long do0 156 .long do1 157 .long do2 158 .long do3 159 160 .text 161 SUPERALIGN_TEXT 1625: 163 jmp jtab(,%ecx,4) 164 165 SUPERALIGN_TEXT 166do3: 167 movw %ax,(%edx) 168 movb %al,2(%edx) 169 ret 170 171 SUPERALIGN_TEXT 172do2: 173 movw %ax,(%edx) 174 ret 175 176 SUPERALIGN_TEXT 177do1: 178 movb %al,(%edx) 179 ret 180 181 SUPERALIGN_TEXT 182do0: 183 ret 184#endif 185 186#if defined(I586_CPU) || defined(I686_CPU) 187ENTRY(i586_bzero) 188 movl 4(%esp),%edx 189 movl 8(%esp),%ecx 190 191 /* 192 * The FPU register method is twice as fast as the integer register 193 * method unless the target is in the L1 cache and we pre-allocate a 194 * cache line for it (then the integer register method is 4-5 times 195 * faster). However, we never pre-allocate cache lines, since that 196 * would make the integer method 25% or more slower for the common 197 * case when the target isn't in either the L1 cache or the L2 cache. 198 * Thus we normally use the FPU register method unless the overhead 199 * would be too large. 200 */ 201 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 202 jb intreg_i586_bzero 203 204 /* 205 * The FPU registers may belong to an application or to fastmove() 206 * or to another invocation of bcopy() or ourself in a higher level 207 * interrupt or trap handler. Preserving the registers is 208 * complicated since we avoid it if possible at all levels. We 209 * want to localize the complications even when that increases them. 210 * Here the extra work involves preserving CR0_TS in TS. 211 * `npxproc != NULL' is supposed to be the condition that all the 212 * FPU resources belong to an application, but npxproc and CR0_TS 213 * aren't set atomically enough for this condition to work in 214 * interrupt handlers. 215 * 216 * Case 1: FPU registers belong to the application: we must preserve 217 * the registers if we use them, so we only use the FPU register 218 * method if the target size is large enough to amortize the extra 219 * overhead for preserving them. CR0_TS must be preserved although 220 * it is very likely to end up as set. 221 * 222 * Case 2: FPU registers belong to fastmove(): fastmove() currently 223 * makes the registers look like they belong to an application so 224 * that cpu_switch() and savectx() don't have to know about it, so 225 * this case reduces to case 1. 226 * 227 * Case 3: FPU registers belong to the kernel: don't use the FPU 228 * register method. This case is unlikely, and supporting it would 229 * be more complicated and might take too much stack. 230 * 231 * Case 4: FPU registers don't belong to anyone: the FPU registers 232 * don't need to be preserved, so we always use the FPU register 233 * method. CR0_TS must be preserved although it is very likely to 234 * always end up as clear. 235 */ 236 cmpl $0,_npxproc 237 je i586_bz1 238 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 239 jb intreg_i586_bzero 240 sarb $1,kernel_fpu_lock 241 jc intreg_i586_bzero 242 smsw %ax 243 clts 244 subl $108,%esp 245 fnsave 0(%esp) 246 jmp i586_bz2 247 248i586_bz1: 249 sarb $1,kernel_fpu_lock 250 jc intreg_i586_bzero 251 smsw %ax 252 clts 253 fninit /* XXX should avoid needing this */ 254i586_bz2: 255 fldz 256 257 /* 258 * Align to an 8 byte boundary (misalignment in the main loop would 259 * cost a factor of >= 2). Avoid jumps (at little cost if it is 260 * already aligned) by always zeroing 8 bytes and using the part up 261 * to the _next_ alignment position. 262 */ 263 fstl 0(%edx) 264 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 265 addl $8,%edx 266 andl $~7,%edx 267 subl %edx,%ecx 268 269 /* 270 * Similarly align `len' to a multiple of 8. 271 */ 272 fstl -8(%edx,%ecx) 273 decl %ecx 274 andl $~7,%ecx 275 276 /* 277 * This wouldn't be any faster if it were unrolled, since the loop 278 * control instructions are much faster than the fstl and/or done 279 * in parallel with it so their overhead is insignificant. 280 */ 281fpureg_i586_bzero_loop: 282 fstl 0(%edx) 283 addl $8,%edx 284 subl $8,%ecx 285 cmpl $8,%ecx 286 jae fpureg_i586_bzero_loop 287 288 cmpl $0,_npxproc 289 je i586_bz3 290 frstor 0(%esp) 291 addl $108,%esp 292 lmsw %ax 293 movb $0xfe,kernel_fpu_lock 294 ret 295 296i586_bz3: 297 fstpl %st(0) 298 lmsw %ax 299 movb $0xfe,kernel_fpu_lock 300 ret 301 302intreg_i586_bzero: 303 /* 304 * `rep stos' seems to be the best method in practice for small 305 * counts. Fancy methods usually take too long to start up due 306 * to cache and BTB misses. 307 */ 308 pushl %edi 309 movl %edx,%edi 310 xorl %eax,%eax 311 shrl $2,%ecx 312 cld 313 rep 314 stosl 315 movl 12(%esp),%ecx 316 andl $3,%ecx 317 jne 1f 318 popl %edi 319 ret 320 3211: 322 rep 323 stosb 324 popl %edi 325 ret 326#endif /* I586_CPU || I686_CPU */ 327 328/* fillw(pat, base, cnt) */ 329ENTRY(fillw) 330 pushl %edi 331 movl 8(%esp),%eax 332 movl 12(%esp),%edi 333 movl 16(%esp),%ecx 334 cld 335 rep 336 stosw 337 popl %edi 338 ret 339 340ENTRY(bcopyb) 341bcopyb: 342 pushl %esi 343 pushl %edi 344 movl 12(%esp),%esi 345 movl 16(%esp),%edi 346 movl 20(%esp),%ecx 347 movl %edi,%eax 348 subl %esi,%eax 349 cmpl %ecx,%eax /* overlapping && src < dst? */ 350 jb 1f 351 cld /* nope, copy forwards */ 352 rep 353 movsb 354 popl %edi 355 popl %esi 356 ret 357 358 ALIGN_TEXT 3591: 360 addl %ecx,%edi /* copy backwards. */ 361 addl %ecx,%esi 362 decl %edi 363 decl %esi 364 std 365 rep 366 movsb 367 popl %edi 368 popl %esi 369 cld 370 ret 371 372ENTRY(bcopy) 373 MEXITCOUNT 374 jmp *_bcopy_vector 375 376ENTRY(ovbcopy) 377 MEXITCOUNT 378 jmp *_ovbcopy_vector 379 380/* 381 * generic_bcopy(src, dst, cnt) 382 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 383 */ 384ENTRY(generic_bcopy) 385 pushl %esi 386 pushl %edi 387 movl 12(%esp),%esi 388 movl 16(%esp),%edi 389 movl 20(%esp),%ecx 390 391 movl %edi,%eax 392 subl %esi,%eax 393 cmpl %ecx,%eax /* overlapping && src < dst? */ 394 jb 1f 395 396 shrl $2,%ecx /* copy by 32-bit words */ 397 cld /* nope, copy forwards */ 398 rep 399 movsl 400 movl 20(%esp),%ecx 401 andl $3,%ecx /* any bytes left? */ 402 rep 403 movsb 404 popl %edi 405 popl %esi 406 ret 407 408 ALIGN_TEXT 4091: 410 addl %ecx,%edi /* copy backwards */ 411 addl %ecx,%esi 412 decl %edi 413 decl %esi 414 andl $3,%ecx /* any fractional bytes? */ 415 std 416 rep 417 movsb 418 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 419 shrl $2,%ecx 420 subl $3,%esi 421 subl $3,%edi 422 rep 423 movsl 424 popl %edi 425 popl %esi 426 cld 427 ret 428 429ENTRY(i586_bcopy) 430 pushl %esi 431 pushl %edi 432 movl 12(%esp),%esi 433 movl 16(%esp),%edi 434 movl 20(%esp),%ecx 435 436 movl %edi,%eax 437 subl %esi,%eax 438 cmpl %ecx,%eax /* overlapping && src < dst? */ 439 jb 1f 440 441 cmpl $1024,%ecx 442 jb small_i586_bcopy 443 444 sarb $1,kernel_fpu_lock 445 jc small_i586_bcopy 446 cmpl $0,_npxproc 447 je i586_bc1 448 smsw %dx 449 clts 450 subl $108,%esp 451 fnsave 0(%esp) 452 jmp 4f 453 454i586_bc1: 455 smsw %dx 456 clts 457 fninit /* XXX should avoid needing this */ 458 459 ALIGN_TEXT 4604: 461 pushl %ecx 462#define DCACHE_SIZE 8192 463 cmpl $(DCACHE_SIZE-512)/2,%ecx 464 jbe 2f 465 movl $(DCACHE_SIZE-512)/2,%ecx 4662: 467 subl %ecx,0(%esp) 468 cmpl $256,%ecx 469 jb 5f /* XXX should prefetch if %ecx >= 32 */ 470 pushl %esi 471 pushl %ecx 472 ALIGN_TEXT 4733: 474 movl 0(%esi),%eax 475 movl 32(%esi),%eax 476 movl 64(%esi),%eax 477 movl 96(%esi),%eax 478 movl 128(%esi),%eax 479 movl 160(%esi),%eax 480 movl 192(%esi),%eax 481 movl 224(%esi),%eax 482 addl $256,%esi 483 subl $256,%ecx 484 cmpl $256,%ecx 485 jae 3b 486 popl %ecx 487 popl %esi 4885: 489 ALIGN_TEXT 490large_i586_bcopy_loop: 491 fildq 0(%esi) 492 fildq 8(%esi) 493 fildq 16(%esi) 494 fildq 24(%esi) 495 fildq 32(%esi) 496 fildq 40(%esi) 497 fildq 48(%esi) 498 fildq 56(%esi) 499 fistpq 56(%edi) 500 fistpq 48(%edi) 501 fistpq 40(%edi) 502 fistpq 32(%edi) 503 fistpq 24(%edi) 504 fistpq 16(%edi) 505 fistpq 8(%edi) 506 fistpq 0(%edi) 507 addl $64,%esi 508 addl $64,%edi 509 subl $64,%ecx 510 cmpl $64,%ecx 511 jae large_i586_bcopy_loop 512 popl %eax 513 addl %eax,%ecx 514 cmpl $64,%ecx 515 jae 4b 516 517 cmpl $0,_npxproc 518 je i586_bc2 519 frstor 0(%esp) 520 addl $108,%esp 521i586_bc2: 522 lmsw %dx 523 movb $0xfe,kernel_fpu_lock 524 525/* 526 * This is a duplicate of the main part of generic_bcopy. See the comments 527 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 528 * would mess up high resolution profiling. 529 */ 530 ALIGN_TEXT 531small_i586_bcopy: 532 shrl $2,%ecx 533 cld 534 rep 535 movsl 536 movl 20(%esp),%ecx 537 andl $3,%ecx 538 rep 539 movsb 540 popl %edi 541 popl %esi 542 ret 543 544 ALIGN_TEXT 5451: 546 addl %ecx,%edi 547 addl %ecx,%esi 548 decl %edi 549 decl %esi 550 andl $3,%ecx 551 std 552 rep 553 movsb 554 movl 20(%esp),%ecx 555 shrl $2,%ecx 556 subl $3,%esi 557 subl $3,%edi 558 rep 559 movsl 560 popl %edi 561 popl %esi 562 cld 563 ret 564 565/* 566 * Note: memcpy does not support overlapping copies 567 */ 568ENTRY(memcpy) 569 pushl %edi 570 pushl %esi 571 movl 12(%esp),%edi 572 movl 16(%esp),%esi 573 movl 20(%esp),%ecx 574 movl %edi,%eax 575 shrl $2,%ecx /* copy by 32-bit words */ 576 cld /* nope, copy forwards */ 577 rep 578 movsl 579 movl 20(%esp),%ecx 580 andl $3,%ecx /* any bytes left? */ 581 rep 582 movsb 583 popl %esi 584 popl %edi 585 ret 586 587 588/*****************************************************************************/ 589/* copyout and fubyte family */ 590/*****************************************************************************/ 591/* 592 * Access user memory from inside the kernel. These routines and possibly 593 * the math- and DOS emulators should be the only places that do this. 594 * 595 * We have to access the memory with user's permissions, so use a segment 596 * selector with RPL 3. For writes to user space we have to additionally 597 * check the PTE for write permission, because the 386 does not check 598 * write permissions when we are executing with EPL 0. The 486 does check 599 * this if the WP bit is set in CR0, so we can use a simpler version here. 600 * 601 * These routines set curpcb->onfault for the time they execute. When a 602 * protection violation occurs inside the functions, the trap handler 603 * returns to *curpcb->onfault instead of the function. 604 */ 605 606 607ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ 608 movl _curpcb,%eax 609 movl $copyout_fault,PCB_ONFAULT(%eax) 610 pushl %esi 611 pushl %edi 612 pushl %ebx 613 movl 16(%esp),%esi 614 movl 20(%esp),%edi 615 movl 24(%esp),%ebx 616 testl %ebx,%ebx /* anything to do? */ 617 jz done_copyout 618 619 /* 620 * Check explicitly for non-user addresses. If 486 write protection 621 * is being used, this check is essential because we are in kernel 622 * mode so the h/w does not provide any protection against writing 623 * kernel addresses. 624 */ 625 626 /* 627 * First, prevent address wrapping. 628 */ 629 movl %edi,%eax 630 addl %ebx,%eax 631 jc copyout_fault 632/* 633 * XXX STOP USING VM_MAXUSER_ADDRESS. 634 * It is an end address, not a max, so every time it is used correctly it 635 * looks like there is an off by one error, and of course it caused an off 636 * by one error in several places. 637 */ 638 cmpl $VM_MAXUSER_ADDRESS,%eax 639 ja copyout_fault 640 641#if defined(I386_CPU) 642 643#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 644 cmpl $CPUCLASS_386,_cpu_class 645 jne 3f 646#endif 647/* 648 * We have to check each PTE for user write permission. 649 * The checking may cause a page fault, so it is important to set 650 * up everything for return via copyout_fault before here. 651 */ 652 /* compute number of pages */ 653 movl %edi,%ecx 654 andl $PAGE_MASK,%ecx 655 addl %ebx,%ecx 656 decl %ecx 657 shrl $IDXSHIFT+2,%ecx 658 incl %ecx 659 660 /* compute PTE offset for start address */ 661 movl %edi,%edx 662 shrl $IDXSHIFT,%edx 663 andb $0xfc,%dl 664 6651: /* check PTE for each page */ 666 movb _PTmap(%edx),%al 667 andb $0x07,%al /* Pages must be VALID + USERACC + WRITABLE */ 668 cmpb $0x07,%al 669 je 2f 670 671 /* simulate a trap */ 672 pushl %edx 673 pushl %ecx 674 shll $IDXSHIFT,%edx 675 pushl %edx 676 call _trapwrite /* trapwrite(addr) */ 677 popl %edx 678 popl %ecx 679 popl %edx 680 681 testl %eax,%eax /* if not ok, return EFAULT */ 682 jnz copyout_fault 683 6842: 685 addl $4,%edx 686 decl %ecx 687 jnz 1b /* check next page */ 688#endif /* I386_CPU */ 689 690 /* bcopy(%esi, %edi, %ebx) */ 6913: 692 movl %ebx,%ecx 693#if defined(I586_CPU) && defined(I586_FAST_BCOPY) 694 cmpl $1024,%ecx 695 jb slow_copyout 696 697#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) 698 cmpl $CPUCLASS_586,_cpu_class 699 jne slow_copyout 700#endif /* I386_CPU || I486_CPU || I686_CPU */ 701 702 pushl %ecx 703 call _fastmove 704 addl $4,%esp 705 jmp done_copyout 706 707 ALIGN_TEXT 708slow_copyout: 709#endif /* I586_CPU && I586_FAST_BCOPY */ 710 shrl $2,%ecx 711 cld 712 rep 713 movsl 714 movb %bl,%cl 715 andb $3,%cl 716 rep 717 movsb 718 719done_copyout: 720 popl %ebx 721 popl %edi 722 popl %esi 723 xorl %eax,%eax 724 movl _curpcb,%edx 725 movl %eax,PCB_ONFAULT(%edx) 726 ret 727 728 ALIGN_TEXT 729copyout_fault: 730 popl %ebx 731 popl %edi 732 popl %esi 733 movl _curpcb,%edx 734 movl $0,PCB_ONFAULT(%edx) 735 movl $EFAULT,%eax 736 ret 737 738/* copyin(from_user, to_kernel, len) */ 739ENTRY(copyin) 740 movl _curpcb,%eax 741 movl $copyin_fault,PCB_ONFAULT(%eax) 742 pushl %esi 743 pushl %edi 744 movl 12(%esp),%esi /* caddr_t from */ 745 movl 16(%esp),%edi /* caddr_t to */ 746 movl 20(%esp),%ecx /* size_t len */ 747 748 /* 749 * make sure address is valid 750 */ 751 movl %esi,%edx 752 addl %ecx,%edx 753 jc copyin_fault 754 cmpl $VM_MAXUSER_ADDRESS,%edx 755 ja copyin_fault 756 757#if defined(I586_CPU) && defined(I586_FAST_BCOPY) 758 cmpl $1024,%ecx 759 jb slow_copyin 760 761#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) 762 cmpl $CPUCLASS_586,_cpu_class 763 jne slow_copyin 764#endif /* I386_CPU || I486_CPU || I686_CPU */ 765 766 pushl %ecx 767 call _fastmove 768 addl $4,%esp 769 jmp done_copyin 770 771 ALIGN_TEXT 772slow_copyin: 773#endif /* I586_CPU && I586_FAST_BCOPY */ 774 movb %cl,%al 775 shrl $2,%ecx /* copy longword-wise */ 776 cld 777 rep 778 movsl 779 movb %al,%cl 780 andb $3,%cl /* copy remaining bytes */ 781 rep 782 movsb 783 784#if defined(I586_CPU) && defined(I586_FAST_BCOPY) 785 ALIGN_TEXT 786done_copyin: 787#endif /* I586_CPU && I586_FAST_BCOPY */ 788 popl %edi 789 popl %esi 790 xorl %eax,%eax 791 movl _curpcb,%edx 792 movl %eax,PCB_ONFAULT(%edx) 793 ret 794 795 ALIGN_TEXT 796copyin_fault: 797 popl %edi 798 popl %esi 799 movl _curpcb,%edx 800 movl $0,PCB_ONFAULT(%edx) 801 movl $EFAULT,%eax 802 ret 803 804#if defined(I586_CPU) && defined(I586_FAST_BCOPY) 805/* fastmove(src, dst, len) 806 src in %esi 807 dst in %edi 808 len in %ecx XXX changed to on stack for profiling 809 uses %eax and %edx for tmp. storage 810 */ 811/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 812ENTRY(fastmove) 813 movl 4(%esp),%ecx 814 cmpl $63,%ecx 815 jbe fastmove_tail 816 817 testl $7,%esi /* check if src addr is multiple of 8 */ 818 jnz fastmove_tail 819 820 testl $7,%edi /* check if dst addr is multiple of 8 */ 821 jnz fastmove_tail 822 823 pushl %ebp 824 movl %esp,%ebp 825 subl $PCB_SAVEFPU_SIZE,%esp 826 827/* if (npxproc != NULL) { */ 828 cmpl $0,_npxproc 829 je 6f 830/* fnsave(&curpcb->pcb_savefpu); */ 831 movl _curpcb,%eax 832 fnsave PCB_SAVEFPU(%eax) 833/* npxproc = NULL; */ 834 movl $0,_npxproc 835/* } */ 8366: 837/* now we own the FPU. */ 838 839/* 840 * The process' FP state is saved in the pcb, but if we get 841 * switched, the cpu_switch() will store our FP state in the 842 * pcb. It should be possible to avoid all the copying for 843 * this, e.g., by setting a flag to tell cpu_switch() to 844 * save the state somewhere else. 845 */ 846/* tmp = curpcb->pcb_savefpu; */ 847 pushl %edi 848 pushl %esi 849 pushl %ecx 850 leal -PCB_SAVEFPU_SIZE(%ebp),%edi 851 movl _curpcb,%esi 852 addl $PCB_SAVEFPU,%esi 853 cld 854 movl $PCB_SAVEFPU_SIZE>>2,%ecx 855 rep 856 movsl 857 popl %ecx 858 popl %esi 859 popl %edi 860/* stop_emulating(); */ 861 clts 862/* npxproc = curproc; */ 863 movl _curproc,%eax 864 movl %eax,_npxproc 8654: 866 pushl %ecx 867 cmpl $1792,%ecx 868 jbe 2f 869 movl $1792,%ecx 8702: 871 subl %ecx,0(%esp) 872 cmpl $256,%ecx 873 jb 5f 874 pushl %esi 875 pushl %ecx 876 ALIGN_TEXT 8773: 878 movl 0(%esi),%eax 879 movl 32(%esi),%eax 880 movl 64(%esi),%eax 881 movl 96(%esi),%eax 882 movl 128(%esi),%eax 883 movl 160(%esi),%eax 884 movl 192(%esi),%eax 885 movl 224(%esi),%eax 886 addl $256,%esi 887 subl $256,%ecx 888 cmpl $256,%ecx 889 jae 3b 890 popl %ecx 891 popl %esi 8925: 893 ALIGN_TEXT 894fastmove_loop: 895 fildq 0(%esi) 896 fildq 8(%esi) 897 fildq 16(%esi) 898 fildq 24(%esi) 899 fildq 32(%esi) 900 fildq 40(%esi) 901 fildq 48(%esi) 902 fildq 56(%esi) 903 fistpq 56(%edi) 904 fistpq 48(%edi) 905 fistpq 40(%edi) 906 fistpq 32(%edi) 907 fistpq 24(%edi) 908 fistpq 16(%edi) 909 fistpq 8(%edi) 910 fistpq 0(%edi) 911 addl $-64,%ecx 912 addl $64,%esi 913 addl $64,%edi 914 cmpl $63,%ecx 915 ja fastmove_loop 916 popl %eax 917 addl %eax,%ecx 918 cmpl $64,%ecx 919 jae 4b 920 921/* curpcb->pcb_savefpu = tmp; */ 922 pushl %edi 923 pushl %esi 924 pushl %ecx 925 movl _curpcb,%edi 926 addl $PCB_SAVEFPU,%edi 927 leal -PCB_SAVEFPU_SIZE(%ebp),%esi 928 cld 929 movl $PCB_SAVEFPU_SIZE>>2,%ecx 930 rep 931 movsl 932 popl %ecx 933 popl %esi 934 popl %edi 935 936/* start_emulating(); */ 937 smsw %ax 938 orb $CR0_TS,%al 939 lmsw %ax 940/* npxproc = NULL; */ 941 movl $0,_npxproc 942 movl %ebp,%esp 943 popl %ebp 944 945 ALIGN_TEXT 946fastmove_tail: 947 movb %cl,%al 948 shrl $2,%ecx /* copy longword-wise */ 949 cld 950 rep 951 movsl 952 movb %al,%cl 953 andb $3,%cl /* copy remaining bytes */ 954 rep 955 movsb 956 957 ret 958#endif /* I586_CPU && I586_FAST_BCOPY */ 959 960/* 961 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory 962 */ 963ENTRY(fuword) 964 movl _curpcb,%ecx 965 movl $fusufault,PCB_ONFAULT(%ecx) 966 movl 4(%esp),%edx /* from */ 967 968 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 969 ja fusufault 970 971 movl (%edx),%eax 972 movl $0,PCB_ONFAULT(%ecx) 973 ret 974 975/* 976 * These two routines are called from the profiling code, potentially 977 * at interrupt time. If they fail, that's okay, good things will 978 * happen later. Fail all the time for now - until the trap code is 979 * able to deal with this. 980 */ 981ALTENTRY(suswintr) 982ENTRY(fuswintr) 983 movl $-1,%eax 984 ret 985 986ENTRY(fusword) 987 movl _curpcb,%ecx 988 movl $fusufault,PCB_ONFAULT(%ecx) 989 movl 4(%esp),%edx 990 991 cmpl $VM_MAXUSER_ADDRESS-2,%edx 992 ja fusufault 993 994 movzwl (%edx),%eax 995 movl $0,PCB_ONFAULT(%ecx) 996 ret 997 998ENTRY(fubyte) 999 movl _curpcb,%ecx 1000 movl $fusufault,PCB_ONFAULT(%ecx) 1001 movl 4(%esp),%edx 1002 1003 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1004 ja fusufault 1005 1006 movzbl (%edx),%eax 1007 movl $0,PCB_ONFAULT(%ecx) 1008 ret 1009 1010 ALIGN_TEXT 1011fusufault: 1012 movl _curpcb,%ecx 1013 xorl %eax,%eax 1014 movl %eax,PCB_ONFAULT(%ecx) 1015 decl %eax 1016 ret 1017 1018/* 1019 * su{byte,sword,word}: write a byte (word, longword) to user memory 1020 */ 1021ENTRY(suword) 1022 movl _curpcb,%ecx 1023 movl $fusufault,PCB_ONFAULT(%ecx) 1024 movl 4(%esp),%edx 1025 1026#if defined(I386_CPU) 1027 1028#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1029 cmpl $CPUCLASS_386,_cpu_class 1030 jne 2f /* we only have to set the right segment selector */ 1031#endif /* I486_CPU || I586_CPU || I686_CPU */ 1032 1033 /* XXX - page boundary crossing is still not handled */ 1034 movl %edx,%eax 1035 shrl $IDXSHIFT,%edx 1036 andb $0xfc,%dl 1037 movb _PTmap(%edx),%dl 1038 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1039 cmpb $0x7,%dl 1040 je 1f 1041 1042 /* simulate a trap */ 1043 pushl %eax 1044 call _trapwrite 1045 popl %edx /* remove junk parameter from stack */ 1046 movl _curpcb,%ecx /* restore trashed register */ 1047 testl %eax,%eax 1048 jnz fusufault 10491: 1050 movl 4(%esp),%edx 1051#endif 1052 10532: 1054 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1055 ja fusufault 1056 1057 movl 8(%esp),%eax 1058 movl %eax,(%edx) 1059 xorl %eax,%eax 1060 movl %eax,PCB_ONFAULT(%ecx) 1061 ret 1062 1063ENTRY(susword) 1064 movl _curpcb,%ecx 1065 movl $fusufault,PCB_ONFAULT(%ecx) 1066 movl 4(%esp),%edx 1067 1068#if defined(I386_CPU) 1069 1070#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1071 cmpl $CPUCLASS_386,_cpu_class 1072 jne 2f 1073#endif /* I486_CPU || I586_CPU || I686_CPU */ 1074 1075 /* XXX - page boundary crossing is still not handled */ 1076 movl %edx,%eax 1077 shrl $IDXSHIFT,%edx 1078 andb $0xfc,%dl 1079 movb _PTmap(%edx),%dl 1080 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1081 cmpb $0x7,%dl 1082 je 1f 1083 1084 /* simulate a trap */ 1085 pushl %eax 1086 call _trapwrite 1087 popl %edx /* remove junk parameter from stack */ 1088 movl _curpcb,%ecx /* restore trashed register */ 1089 testl %eax,%eax 1090 jnz fusufault 10911: 1092 movl 4(%esp),%edx 1093#endif 1094 10952: 1096 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1097 ja fusufault 1098 1099 movw 8(%esp),%ax 1100 movw %ax,(%edx) 1101 xorl %eax,%eax 1102 movl %eax,PCB_ONFAULT(%ecx) 1103 ret 1104 1105ALTENTRY(suibyte) 1106ENTRY(subyte) 1107 movl _curpcb,%ecx 1108 movl $fusufault,PCB_ONFAULT(%ecx) 1109 movl 4(%esp),%edx 1110 1111#if defined(I386_CPU) 1112 1113#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1114 cmpl $CPUCLASS_386,_cpu_class 1115 jne 2f 1116#endif /* I486_CPU || I586_CPU || I686_CPU */ 1117 1118 movl %edx,%eax 1119 shrl $IDXSHIFT,%edx 1120 andb $0xfc,%dl 1121 movb _PTmap(%edx),%dl 1122 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1123 cmpb $0x7,%dl 1124 je 1f 1125 1126 /* simulate a trap */ 1127 pushl %eax 1128 call _trapwrite 1129 popl %edx /* remove junk parameter from stack */ 1130 movl _curpcb,%ecx /* restore trashed register */ 1131 testl %eax,%eax 1132 jnz fusufault 11331: 1134 movl 4(%esp),%edx 1135#endif 1136 11372: 1138 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1139 ja fusufault 1140 1141 movb 8(%esp),%al 1142 movb %al,(%edx) 1143 xorl %eax,%eax 1144 movl %eax,PCB_ONFAULT(%ecx) 1145 ret 1146 1147/* 1148 * copyinstr(from, to, maxlen, int *lencopied) 1149 * copy a string from from to to, stop when a 0 character is reached. 1150 * return ENAMETOOLONG if string is longer than maxlen, and 1151 * EFAULT on protection violations. If lencopied is non-zero, 1152 * return the actual length in *lencopied. 1153 */ 1154ENTRY(copyinstr) 1155 pushl %esi 1156 pushl %edi 1157 movl _curpcb,%ecx 1158 movl $cpystrflt,PCB_ONFAULT(%ecx) 1159 1160 movl 12(%esp),%esi /* %esi = from */ 1161 movl 16(%esp),%edi /* %edi = to */ 1162 movl 20(%esp),%edx /* %edx = maxlen */ 1163 1164 movl $VM_MAXUSER_ADDRESS,%eax 1165 1166 /* make sure 'from' is within bounds */ 1167 subl %esi,%eax 1168 jbe cpystrflt 1169 1170 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1171 cmpl %edx,%eax 1172 jae 1f 1173 movl %eax,%edx 1174 movl %eax,20(%esp) 11751: 1176 incl %edx 1177 cld 1178 11792: 1180 decl %edx 1181 jz 3f 1182 1183 lodsb 1184 stosb 1185 orb %al,%al 1186 jnz 2b 1187 1188 /* Success -- 0 byte reached */ 1189 decl %edx 1190 xorl %eax,%eax 1191 jmp cpystrflt_x 11923: 1193 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1194 cmpl $VM_MAXUSER_ADDRESS,%esi 1195 jae cpystrflt 11964: 1197 movl $ENAMETOOLONG,%eax 1198 jmp cpystrflt_x 1199 1200cpystrflt: 1201 movl $EFAULT,%eax 1202 1203cpystrflt_x: 1204 /* set *lencopied and return %eax */ 1205 movl _curpcb,%ecx 1206 movl $0,PCB_ONFAULT(%ecx) 1207 movl 20(%esp),%ecx 1208 subl %edx,%ecx 1209 movl 24(%esp),%edx 1210 testl %edx,%edx 1211 jz 1f 1212 movl %ecx,(%edx) 12131: 1214 popl %edi 1215 popl %esi 1216 ret 1217 1218 1219/* 1220 * copystr(from, to, maxlen, int *lencopied) 1221 */ 1222ENTRY(copystr) 1223 pushl %esi 1224 pushl %edi 1225 1226 movl 12(%esp),%esi /* %esi = from */ 1227 movl 16(%esp),%edi /* %edi = to */ 1228 movl 20(%esp),%edx /* %edx = maxlen */ 1229 incl %edx 1230 cld 12311: 1232 decl %edx 1233 jz 4f 1234 lodsb 1235 stosb 1236 orb %al,%al 1237 jnz 1b 1238 1239 /* Success -- 0 byte reached */ 1240 decl %edx 1241 xorl %eax,%eax 1242 jmp 6f 12434: 1244 /* edx is zero -- return ENAMETOOLONG */ 1245 movl $ENAMETOOLONG,%eax 1246 12476: 1248 /* set *lencopied and return %eax */ 1249 movl 20(%esp),%ecx 1250 subl %edx,%ecx 1251 movl 24(%esp),%edx 1252 testl %edx,%edx 1253 jz 7f 1254 movl %ecx,(%edx) 12557: 1256 popl %edi 1257 popl %esi 1258 ret 1259 1260ENTRY(bcmp) 1261 pushl %edi 1262 pushl %esi 1263 movl 12(%esp),%edi 1264 movl 16(%esp),%esi 1265 movl 20(%esp),%edx 1266 xorl %eax,%eax 1267 1268 movl %edx,%ecx 1269 shrl $2,%ecx 1270 cld /* compare forwards */ 1271 repe 1272 cmpsl 1273 jne 1f 1274 1275 movl %edx,%ecx 1276 andl $3,%ecx 1277 repe 1278 cmpsb 1279 je 2f 12801: 1281 incl %eax 12822: 1283 popl %esi 1284 popl %edi 1285 ret 1286 1287 1288/* 1289 * Handling of special 386 registers and descriptor tables etc 1290 */ 1291/* void lgdt(struct region_descriptor *rdp); */ 1292ENTRY(lgdt) 1293 /* reload the descriptor table */ 1294 movl 4(%esp),%eax 1295 lgdt (%eax) 1296 1297 /* flush the prefetch q */ 1298 jmp 1f 1299 nop 13001: 1301 /* reload "stale" selectors */ 1302 movl $KDSEL,%eax 1303 movl %ax,%ds 1304 movl %ax,%es 1305 movl %ax,%ss 1306 1307 /* reload code selector by turning return into intersegmental return */ 1308 movl (%esp),%eax 1309 pushl %eax 1310# movl $KCSEL,4(%esp) 1311 movl $8,4(%esp) 1312 lret 1313 1314/* 1315 * void lidt(struct region_descriptor *rdp); 1316 */ 1317ENTRY(lidt) 1318 movl 4(%esp),%eax 1319 lidt (%eax) 1320 ret 1321 1322/* 1323 * void lldt(u_short sel) 1324 */ 1325ENTRY(lldt) 1326 lldt 4(%esp) 1327 ret 1328 1329/* 1330 * void ltr(u_short sel) 1331 */ 1332ENTRY(ltr) 1333 ltr 4(%esp) 1334 ret 1335 1336/* ssdtosd(*ssdp,*sdp) */ 1337ENTRY(ssdtosd) 1338 pushl %ebx 1339 movl 8(%esp),%ecx 1340 movl 8(%ecx),%ebx 1341 shll $16,%ebx 1342 movl (%ecx),%edx 1343 roll $16,%edx 1344 movb %dh,%bl 1345 movb %dl,%bh 1346 rorl $8,%ebx 1347 movl 4(%ecx),%eax 1348 movw %ax,%dx 1349 andl $0xf0000,%eax 1350 orl %eax,%ebx 1351 movl 12(%esp),%ecx 1352 movl %edx,(%ecx) 1353 movl %ebx,4(%ecx) 1354 popl %ebx 1355 ret 1356 1357/* load_cr0(cr0) */ 1358ENTRY(load_cr0) 1359 movl 4(%esp),%eax 1360 movl %eax,%cr0 1361 ret 1362 1363/* rcr0() */ 1364ENTRY(rcr0) 1365 movl %cr0,%eax 1366 ret 1367 1368/* rcr3() */ 1369ENTRY(rcr3) 1370 movl %cr3,%eax 1371 ret 1372 1373/* void load_cr3(caddr_t cr3) */ 1374ENTRY(load_cr3) 1375 movl 4(%esp),%eax 1376 movl %eax,%cr3 1377 ret 1378 1379 1380/*****************************************************************************/ 1381/* setjump, longjump */ 1382/*****************************************************************************/ 1383 1384ENTRY(setjmp) 1385 movl 4(%esp),%eax 1386 movl %ebx,(%eax) /* save ebx */ 1387 movl %esp,4(%eax) /* save esp */ 1388 movl %ebp,8(%eax) /* save ebp */ 1389 movl %esi,12(%eax) /* save esi */ 1390 movl %edi,16(%eax) /* save edi */ 1391 movl (%esp),%edx /* get rta */ 1392 movl %edx,20(%eax) /* save eip */ 1393 xorl %eax,%eax /* return(0); */ 1394 ret 1395 1396ENTRY(longjmp) 1397 movl 4(%esp),%eax 1398 movl (%eax),%ebx /* restore ebx */ 1399 movl 4(%eax),%esp /* restore esp */ 1400 movl 8(%eax),%ebp /* restore ebp */ 1401 movl 12(%eax),%esi /* restore esi */ 1402 movl 16(%eax),%edi /* restore edi */ 1403 movl 20(%eax),%edx /* get rta */ 1404 movl %edx,(%esp) /* put in return frame */ 1405 xorl %eax,%eax /* return(1); */ 1406 incl %eax 1407 ret 1408 1409/* 1410 * Here for doing BB-profiling (gcc -a). 1411 * We rely on the "bbset" instead, but need a dummy function. 1412 */ 1413NON_GPROF_ENTRY(__bb_init_func) 1414 movl 4(%esp),%eax 1415 movl $1,(%eax) 1416 .byte 0xc3 /* avoid macro for `ret' */ 1417