support.s revision 18842
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $Id: support.s,v 1.40 1996/10/09 18:16:17 bde Exp $ 34 */ 35 36#include "opt_cpu.h" 37#include "opt_temporary.h" /* for I586_*_B* */ 38 39#include <machine/asmacros.h> 40#include <machine/cputypes.h> 41#include <machine/specialreg.h> 42 43#include "assym.s" 44 45#define KDSEL 0x10 /* kernel data selector */ 46#define IDXSHIFT 10 47 48 .data 49 .globl _bcopy_vector 50_bcopy_vector: 51 .long _generic_bcopy 52 .globl _bzero 53_bzero: 54 .long _generic_bzero 55 .globl _ovbcopy_vector 56_ovbcopy_vector: 57 .long _generic_bcopy 58kernel_fpu_lock: 59 .byte 0xfe 60 .space 3 61 62 .text 63 64/* 65 * bcopy family 66 * void bzero(void *buf, u_int len) 67 */ 68 69ENTRY(generic_bzero) 70 pushl %edi 71 movl 8(%esp),%edi 72 movl 12(%esp),%ecx 73 xorl %eax,%eax 74 shrl $2,%ecx 75 cld 76 rep 77 stosl 78 movl 12(%esp),%ecx 79 andl $3,%ecx 80 rep 81 stosb 82 popl %edi 83 ret 84 85#if defined(I486_CPU) 86ENTRY(i486_bzero) 87 movl 4(%esp),%edx 88 movl 8(%esp),%ecx 89 xorl %eax,%eax 90/* 91 * do 64 byte chunks first 92 * 93 * XXX this is probably over-unrolled at least for DX2's 94 */ 952: 96 cmpl $64,%ecx 97 jb 3f 98 movl %eax,(%edx) 99 movl %eax,4(%edx) 100 movl %eax,8(%edx) 101 movl %eax,12(%edx) 102 movl %eax,16(%edx) 103 movl %eax,20(%edx) 104 movl %eax,24(%edx) 105 movl %eax,28(%edx) 106 movl %eax,32(%edx) 107 movl %eax,36(%edx) 108 movl %eax,40(%edx) 109 movl %eax,44(%edx) 110 movl %eax,48(%edx) 111 movl %eax,52(%edx) 112 movl %eax,56(%edx) 113 movl %eax,60(%edx) 114 addl $64,%edx 115 subl $64,%ecx 116 jnz 2b 117 ret 118 119/* 120 * do 16 byte chunks 121 */ 122 SUPERALIGN_TEXT 1233: 124 cmpl $16,%ecx 125 jb 4f 126 movl %eax,(%edx) 127 movl %eax,4(%edx) 128 movl %eax,8(%edx) 129 movl %eax,12(%edx) 130 addl $16,%edx 131 subl $16,%ecx 132 jnz 3b 133 ret 134 135/* 136 * do 4 byte chunks 137 */ 138 SUPERALIGN_TEXT 1394: 140 cmpl $4,%ecx 141 jb 5f 142 movl %eax,(%edx) 143 addl $4,%edx 144 subl $4,%ecx 145 jnz 4b 146 ret 147 148/* 149 * do 1 byte chunks 150 * a jump table seems to be faster than a loop or more range reductions 151 * 152 * XXX need a const section for non-text 153 */ 154 .data 155jtab: 156 .long do0 157 .long do1 158 .long do2 159 .long do3 160 161 .text 162 SUPERALIGN_TEXT 1635: 164 jmp jtab(,%ecx,4) 165 166 SUPERALIGN_TEXT 167do3: 168 movw %ax,(%edx) 169 movb %al,2(%edx) 170 ret 171 172 SUPERALIGN_TEXT 173do2: 174 movw %ax,(%edx) 175 ret 176 177 SUPERALIGN_TEXT 178do1: 179 movb %al,(%edx) 180 ret 181 182 SUPERALIGN_TEXT 183do0: 184 ret 185#endif 186 187#if defined(I586_CPU) || defined(I686_CPU) 188ENTRY(i586_bzero) 189 movl 4(%esp),%edx 190 movl 8(%esp),%ecx 191 192 /* 193 * The FPU register method is twice as fast as the integer register 194 * method unless the target is in the L1 cache and we pre-allocate a 195 * cache line for it (then the integer register method is 4-5 times 196 * faster). However, we never pre-allocate cache lines, since that 197 * would make the integer method 25% or more slower for the common 198 * case when the target isn't in either the L1 cache or the L2 cache. 199 * Thus we normally use the FPU register method unless the overhead 200 * would be too large. 201 */ 202 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 203 jb intreg_i586_bzero 204 205 /* 206 * The FPU registers may belong to an application or to fastmove() 207 * or to another invocation of bcopy() or ourself in a higher level 208 * interrupt or trap handler. Preserving the registers is 209 * complicated since we avoid it if possible at all levels. We 210 * want to localize the complications even when that increases them. 211 * Here the extra work involves preserving CR0_TS in TS. 212 * `npxproc != NULL' is supposed to be the condition that all the 213 * FPU resources belong to an application, but npxproc and CR0_TS 214 * aren't set atomically enough for this condition to work in 215 * interrupt handlers. 216 * 217 * Case 1: FPU registers belong to the application: we must preserve 218 * the registers if we use them, so we only use the FPU register 219 * method if the target size is large enough to amortize the extra 220 * overhead for preserving them. CR0_TS must be preserved although 221 * it is very likely to end up as set. 222 * 223 * Case 2: FPU registers belong to fastmove(): fastmove() currently 224 * makes the registers look like they belong to an application so 225 * that cpu_switch() and savectx() don't have to know about it, so 226 * this case reduces to case 1. 227 * 228 * Case 3: FPU registers belong to the kernel: don't use the FPU 229 * register method. This case is unlikely, and supporting it would 230 * be more complicated and might take too much stack. 231 * 232 * Case 4: FPU registers don't belong to anyone: the FPU registers 233 * don't need to be preserved, so we always use the FPU register 234 * method. CR0_TS must be preserved although it is very likely to 235 * always end up as clear. 236 */ 237 cmpl $0,_npxproc 238 je i586_bz1 239 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 240 jb intreg_i586_bzero 241 sarb $1,kernel_fpu_lock 242 jc intreg_i586_bzero 243 smsw %ax 244 clts 245 subl $108,%esp 246 fnsave 0(%esp) 247 jmp i586_bz2 248 249i586_bz1: 250 sarb $1,kernel_fpu_lock 251 jc intreg_i586_bzero 252 smsw %ax 253 clts 254 fninit /* XXX should avoid needing this */ 255i586_bz2: 256 fldz 257 258 /* 259 * Align to an 8 byte boundary (misalignment in the main loop would 260 * cost a factor of >= 2). Avoid jumps (at little cost if it is 261 * already aligned) by always zeroing 8 bytes and using the part up 262 * to the _next_ alignment position. 263 */ 264 fstl 0(%edx) 265 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 266 addl $8,%edx 267 andl $~7,%edx 268 subl %edx,%ecx 269 270 /* 271 * Similarly align `len' to a multiple of 8. 272 */ 273 fstl -8(%edx,%ecx) 274 decl %ecx 275 andl $~7,%ecx 276 277 /* 278 * This wouldn't be any faster if it were unrolled, since the loop 279 * control instructions are much faster than the fstl and/or done 280 * in parallel with it so their overhead is insignificant. 281 */ 282fpureg_i586_bzero_loop: 283 fstl 0(%edx) 284 addl $8,%edx 285 subl $8,%ecx 286 cmpl $8,%ecx 287 jae fpureg_i586_bzero_loop 288 289 cmpl $0,_npxproc 290 je i586_bz3 291 frstor 0(%esp) 292 addl $108,%esp 293 lmsw %ax 294 movb $0xfe,kernel_fpu_lock 295 ret 296 297i586_bz3: 298 fstpl %st(0) 299 lmsw %ax 300 movb $0xfe,kernel_fpu_lock 301 ret 302 303intreg_i586_bzero: 304 /* 305 * `rep stos' seems to be the best method in practice for small 306 * counts. Fancy methods usually take too long to start up due 307 * to cache and BTB misses. 308 */ 309 pushl %edi 310 movl %edx,%edi 311 xorl %eax,%eax 312 shrl $2,%ecx 313 cld 314 rep 315 stosl 316 movl 12(%esp),%ecx 317 andl $3,%ecx 318 jne 1f 319 popl %edi 320 ret 321 3221: 323 rep 324 stosb 325 popl %edi 326 ret 327#endif /* I586_CPU || I686_CPU */ 328 329/* fillw(pat, base, cnt) */ 330ENTRY(fillw) 331 pushl %edi 332 movl 8(%esp),%eax 333 movl 12(%esp),%edi 334 movl 16(%esp),%ecx 335 cld 336 rep 337 stosw 338 popl %edi 339 ret 340 341ENTRY(bcopyb) 342bcopyb: 343 pushl %esi 344 pushl %edi 345 movl 12(%esp),%esi 346 movl 16(%esp),%edi 347 movl 20(%esp),%ecx 348 movl %edi,%eax 349 subl %esi,%eax 350 cmpl %ecx,%eax /* overlapping && src < dst? */ 351 jb 1f 352 cld /* nope, copy forwards */ 353 rep 354 movsb 355 popl %edi 356 popl %esi 357 ret 358 359 ALIGN_TEXT 3601: 361 addl %ecx,%edi /* copy backwards. */ 362 addl %ecx,%esi 363 decl %edi 364 decl %esi 365 std 366 rep 367 movsb 368 popl %edi 369 popl %esi 370 cld 371 ret 372 373ENTRY(bcopy) 374 MEXITCOUNT 375 jmp *_bcopy_vector 376 377ENTRY(ovbcopy) 378 MEXITCOUNT 379 jmp *_ovbcopy_vector 380 381/* 382 * generic_bcopy(src, dst, cnt) 383 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 384 */ 385ENTRY(generic_bcopy) 386 pushl %esi 387 pushl %edi 388 movl 12(%esp),%esi 389 movl 16(%esp),%edi 390 movl 20(%esp),%ecx 391 392 movl %edi,%eax 393 subl %esi,%eax 394 cmpl %ecx,%eax /* overlapping && src < dst? */ 395 jb 1f 396 397 shrl $2,%ecx /* copy by 32-bit words */ 398 cld /* nope, copy forwards */ 399 rep 400 movsl 401 movl 20(%esp),%ecx 402 andl $3,%ecx /* any bytes left? */ 403 rep 404 movsb 405 popl %edi 406 popl %esi 407 ret 408 409 ALIGN_TEXT 4101: 411 addl %ecx,%edi /* copy backwards */ 412 addl %ecx,%esi 413 decl %edi 414 decl %esi 415 andl $3,%ecx /* any fractional bytes? */ 416 std 417 rep 418 movsb 419 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 420 shrl $2,%ecx 421 subl $3,%esi 422 subl $3,%edi 423 rep 424 movsl 425 popl %edi 426 popl %esi 427 cld 428 ret 429 430ENTRY(i586_bcopy) 431 pushl %esi 432 pushl %edi 433 movl 12(%esp),%esi 434 movl 16(%esp),%edi 435 movl 20(%esp),%ecx 436 437 movl %edi,%eax 438 subl %esi,%eax 439 cmpl %ecx,%eax /* overlapping && src < dst? */ 440 jb 1f 441 442 cmpl $1024,%ecx 443 jb small_i586_bcopy 444 445 sarb $1,kernel_fpu_lock 446 jc small_i586_bcopy 447 cmpl $0,_npxproc 448 je i586_bc1 449 smsw %dx 450 clts 451 subl $108,%esp 452 fnsave 0(%esp) 453 jmp 4f 454 455i586_bc1: 456 smsw %dx 457 clts 458 fninit /* XXX should avoid needing this */ 459 460 ALIGN_TEXT 4614: 462 pushl %ecx 463#define DCACHE_SIZE 8192 464 cmpl $(DCACHE_SIZE-512)/2,%ecx 465 jbe 2f 466 movl $(DCACHE_SIZE-512)/2,%ecx 4672: 468 subl %ecx,0(%esp) 469 cmpl $256,%ecx 470 jb 5f /* XXX should prefetch if %ecx >= 32 */ 471 pushl %esi 472 pushl %ecx 473 ALIGN_TEXT 4743: 475 movl 0(%esi),%eax 476 movl 32(%esi),%eax 477 movl 64(%esi),%eax 478 movl 96(%esi),%eax 479 movl 128(%esi),%eax 480 movl 160(%esi),%eax 481 movl 192(%esi),%eax 482 movl 224(%esi),%eax 483 addl $256,%esi 484 subl $256,%ecx 485 cmpl $256,%ecx 486 jae 3b 487 popl %ecx 488 popl %esi 4895: 490 ALIGN_TEXT 491large_i586_bcopy_loop: 492 fildq 0(%esi) 493 fildq 8(%esi) 494 fildq 16(%esi) 495 fildq 24(%esi) 496 fildq 32(%esi) 497 fildq 40(%esi) 498 fildq 48(%esi) 499 fildq 56(%esi) 500 fistpq 56(%edi) 501 fistpq 48(%edi) 502 fistpq 40(%edi) 503 fistpq 32(%edi) 504 fistpq 24(%edi) 505 fistpq 16(%edi) 506 fistpq 8(%edi) 507 fistpq 0(%edi) 508 addl $64,%esi 509 addl $64,%edi 510 subl $64,%ecx 511 cmpl $64,%ecx 512 jae large_i586_bcopy_loop 513 popl %eax 514 addl %eax,%ecx 515 cmpl $64,%ecx 516 jae 4b 517 518 cmpl $0,_npxproc 519 je i586_bc2 520 frstor 0(%esp) 521 addl $108,%esp 522i586_bc2: 523 lmsw %dx 524 movb $0xfe,kernel_fpu_lock 525 526/* 527 * This is a duplicate of the main part of generic_bcopy. See the comments 528 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 529 * would mess up high resolution profiling. 530 */ 531 ALIGN_TEXT 532small_i586_bcopy: 533 shrl $2,%ecx 534 cld 535 rep 536 movsl 537 movl 20(%esp),%ecx 538 andl $3,%ecx 539 rep 540 movsb 541 popl %edi 542 popl %esi 543 ret 544 545 ALIGN_TEXT 5461: 547 addl %ecx,%edi 548 addl %ecx,%esi 549 decl %edi 550 decl %esi 551 andl $3,%ecx 552 std 553 rep 554 movsb 555 movl 20(%esp),%ecx 556 shrl $2,%ecx 557 subl $3,%esi 558 subl $3,%edi 559 rep 560 movsl 561 popl %edi 562 popl %esi 563 cld 564 ret 565 566/* 567 * Note: memcpy does not support overlapping copies 568 */ 569ENTRY(memcpy) 570 pushl %edi 571 pushl %esi 572 movl 12(%esp),%edi 573 movl 16(%esp),%esi 574 movl 20(%esp),%ecx 575 movl %edi,%eax 576 shrl $2,%ecx /* copy by 32-bit words */ 577 cld /* nope, copy forwards */ 578 rep 579 movsl 580 movl 20(%esp),%ecx 581 andl $3,%ecx /* any bytes left? */ 582 rep 583 movsb 584 popl %esi 585 popl %edi 586 ret 587 588 589/*****************************************************************************/ 590/* copyout and fubyte family */ 591/*****************************************************************************/ 592/* 593 * Access user memory from inside the kernel. These routines and possibly 594 * the math- and DOS emulators should be the only places that do this. 595 * 596 * We have to access the memory with user's permissions, so use a segment 597 * selector with RPL 3. For writes to user space we have to additionally 598 * check the PTE for write permission, because the 386 does not check 599 * write permissions when we are executing with EPL 0. The 486 does check 600 * this if the WP bit is set in CR0, so we can use a simpler version here. 601 * 602 * These routines set curpcb->onfault for the time they execute. When a 603 * protection violation occurs inside the functions, the trap handler 604 * returns to *curpcb->onfault instead of the function. 605 */ 606 607 608ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ 609 movl _curpcb,%eax 610 movl $copyout_fault,PCB_ONFAULT(%eax) 611 pushl %esi 612 pushl %edi 613 pushl %ebx 614 movl 16(%esp),%esi 615 movl 20(%esp),%edi 616 movl 24(%esp),%ebx 617 testl %ebx,%ebx /* anything to do? */ 618 jz done_copyout 619 620 /* 621 * Check explicitly for non-user addresses. If 486 write protection 622 * is being used, this check is essential because we are in kernel 623 * mode so the h/w does not provide any protection against writing 624 * kernel addresses. 625 */ 626 627 /* 628 * First, prevent address wrapping. 629 */ 630 movl %edi,%eax 631 addl %ebx,%eax 632 jc copyout_fault 633/* 634 * XXX STOP USING VM_MAXUSER_ADDRESS. 635 * It is an end address, not a max, so every time it is used correctly it 636 * looks like there is an off by one error, and of course it caused an off 637 * by one error in several places. 638 */ 639 cmpl $VM_MAXUSER_ADDRESS,%eax 640 ja copyout_fault 641 642#if defined(I386_CPU) 643 644#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 645 cmpl $CPUCLASS_386,_cpu_class 646 jne 3f 647#endif 648/* 649 * We have to check each PTE for user write permission. 650 * The checking may cause a page fault, so it is important to set 651 * up everything for return via copyout_fault before here. 652 */ 653 /* compute number of pages */ 654 movl %edi,%ecx 655 andl $PAGE_MASK,%ecx 656 addl %ebx,%ecx 657 decl %ecx 658 shrl $IDXSHIFT+2,%ecx 659 incl %ecx 660 661 /* compute PTE offset for start address */ 662 movl %edi,%edx 663 shrl $IDXSHIFT,%edx 664 andb $0xfc,%dl 665 6661: /* check PTE for each page */ 667 movb _PTmap(%edx),%al 668 andb $0x07,%al /* Pages must be VALID + USERACC + WRITABLE */ 669 cmpb $0x07,%al 670 je 2f 671 672 /* simulate a trap */ 673 pushl %edx 674 pushl %ecx 675 shll $IDXSHIFT,%edx 676 pushl %edx 677 call _trapwrite /* trapwrite(addr) */ 678 popl %edx 679 popl %ecx 680 popl %edx 681 682 testl %eax,%eax /* if not ok, return EFAULT */ 683 jnz copyout_fault 684 6852: 686 addl $4,%edx 687 decl %ecx 688 jnz 1b /* check next page */ 689#endif /* I386_CPU */ 690 691 /* bcopy(%esi, %edi, %ebx) */ 6923: 693 movl %ebx,%ecx 694#if defined(I586_CPU) && defined(I586_FAST_BCOPY) 695 cmpl $1024,%ecx 696 jb slow_copyout 697 698#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) 699 cmpl $CPUCLASS_586,_cpu_class 700 jne slow_copyout 701#endif /* I386_CPU || I486_CPU || I686_CPU */ 702 703 pushl %ecx 704 call _fastmove 705 addl $4,%esp 706 jmp done_copyout 707 708 ALIGN_TEXT 709slow_copyout: 710#endif /* I586_CPU && I586_FAST_BCOPY */ 711 shrl $2,%ecx 712 cld 713 rep 714 movsl 715 movb %bl,%cl 716 andb $3,%cl 717 rep 718 movsb 719 720done_copyout: 721 popl %ebx 722 popl %edi 723 popl %esi 724 xorl %eax,%eax 725 movl _curpcb,%edx 726 movl %eax,PCB_ONFAULT(%edx) 727 ret 728 729 ALIGN_TEXT 730copyout_fault: 731 popl %ebx 732 popl %edi 733 popl %esi 734 movl _curpcb,%edx 735 movl $0,PCB_ONFAULT(%edx) 736 movl $EFAULT,%eax 737 ret 738 739/* copyin(from_user, to_kernel, len) */ 740ENTRY(copyin) 741 movl _curpcb,%eax 742 movl $copyin_fault,PCB_ONFAULT(%eax) 743 pushl %esi 744 pushl %edi 745 movl 12(%esp),%esi /* caddr_t from */ 746 movl 16(%esp),%edi /* caddr_t to */ 747 movl 20(%esp),%ecx /* size_t len */ 748 749 /* 750 * make sure address is valid 751 */ 752 movl %esi,%edx 753 addl %ecx,%edx 754 jc copyin_fault 755 cmpl $VM_MAXUSER_ADDRESS,%edx 756 ja copyin_fault 757 758#if defined(I586_CPU) && defined(I586_FAST_BCOPY) 759 cmpl $1024,%ecx 760 jb slow_copyin 761 762#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) 763 cmpl $CPUCLASS_586,_cpu_class 764 jne slow_copyin 765#endif /* I386_CPU || I486_CPU || I686_CPU */ 766 767 pushl %ecx 768 call _fastmove 769 addl $4,%esp 770 jmp done_copyin 771 772 ALIGN_TEXT 773slow_copyin: 774#endif /* I586_CPU && I586_FAST_BCOPY */ 775 movb %cl,%al 776 shrl $2,%ecx /* copy longword-wise */ 777 cld 778 rep 779 movsl 780 movb %al,%cl 781 andb $3,%cl /* copy remaining bytes */ 782 rep 783 movsb 784 785#if defined(I586_CPU) && defined(I586_FAST_BCOPY) 786 ALIGN_TEXT 787done_copyin: 788#endif /* I586_CPU && I586_FAST_BCOPY */ 789 popl %edi 790 popl %esi 791 xorl %eax,%eax 792 movl _curpcb,%edx 793 movl %eax,PCB_ONFAULT(%edx) 794 ret 795 796 ALIGN_TEXT 797copyin_fault: 798 popl %edi 799 popl %esi 800 movl _curpcb,%edx 801 movl $0,PCB_ONFAULT(%edx) 802 movl $EFAULT,%eax 803 ret 804 805#if defined(I586_CPU) && defined(I586_FAST_BCOPY) 806/* fastmove(src, dst, len) 807 src in %esi 808 dst in %edi 809 len in %ecx XXX changed to on stack for profiling 810 uses %eax and %edx for tmp. storage 811 */ 812/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 813ENTRY(fastmove) 814 movl 4(%esp),%ecx 815 cmpl $63,%ecx 816 jbe fastmove_tail 817 818 testl $7,%esi /* check if src addr is multiple of 8 */ 819 jnz fastmove_tail 820 821 testl $7,%edi /* check if dst addr is multiple of 8 */ 822 jnz fastmove_tail 823 824 pushl %ebp 825 movl %esp,%ebp 826 subl $PCB_SAVEFPU_SIZE,%esp 827 828/* if (npxproc != NULL) { */ 829 cmpl $0,_npxproc 830 je 6f 831/* fnsave(&curpcb->pcb_savefpu); */ 832 movl _curpcb,%eax 833 fnsave PCB_SAVEFPU(%eax) 834/* npxproc = NULL; */ 835 movl $0,_npxproc 836/* } */ 8376: 838/* now we own the FPU. */ 839 840/* 841 * The process' FP state is saved in the pcb, but if we get 842 * switched, the cpu_switch() will store our FP state in the 843 * pcb. It should be possible to avoid all the copying for 844 * this, e.g., by setting a flag to tell cpu_switch() to 845 * save the state somewhere else. 846 */ 847/* tmp = curpcb->pcb_savefpu; */ 848 pushl %edi 849 pushl %esi 850 pushl %ecx 851 leal -PCB_SAVEFPU_SIZE(%ebp),%edi 852 movl _curpcb,%esi 853 addl $PCB_SAVEFPU,%esi 854 cld 855 movl $PCB_SAVEFPU_SIZE>>2,%ecx 856 rep 857 movsl 858 popl %ecx 859 popl %esi 860 popl %edi 861/* stop_emulating(); */ 862 clts 863/* npxproc = curproc; */ 864 movl _curproc,%eax 865 movl %eax,_npxproc 8664: 867 pushl %ecx 868 cmpl $1792,%ecx 869 jbe 2f 870 movl $1792,%ecx 8712: 872 subl %ecx,0(%esp) 873 cmpl $256,%ecx 874 jb 5f 875 pushl %esi 876 pushl %ecx 877 ALIGN_TEXT 8783: 879 movl 0(%esi),%eax 880 movl 32(%esi),%eax 881 movl 64(%esi),%eax 882 movl 96(%esi),%eax 883 movl 128(%esi),%eax 884 movl 160(%esi),%eax 885 movl 192(%esi),%eax 886 movl 224(%esi),%eax 887 addl $256,%esi 888 subl $256,%ecx 889 cmpl $256,%ecx 890 jae 3b 891 popl %ecx 892 popl %esi 8935: 894 ALIGN_TEXT 895fastmove_loop: 896 fildq 0(%esi) 897 fildq 8(%esi) 898 fildq 16(%esi) 899 fildq 24(%esi) 900 fildq 32(%esi) 901 fildq 40(%esi) 902 fildq 48(%esi) 903 fildq 56(%esi) 904 fistpq 56(%edi) 905 fistpq 48(%edi) 906 fistpq 40(%edi) 907 fistpq 32(%edi) 908 fistpq 24(%edi) 909 fistpq 16(%edi) 910 fistpq 8(%edi) 911 fistpq 0(%edi) 912 addl $-64,%ecx 913 addl $64,%esi 914 addl $64,%edi 915 cmpl $63,%ecx 916 ja fastmove_loop 917 popl %eax 918 addl %eax,%ecx 919 cmpl $64,%ecx 920 jae 4b 921 922/* curpcb->pcb_savefpu = tmp; */ 923 pushl %edi 924 pushl %esi 925 pushl %ecx 926 movl _curpcb,%edi 927 addl $PCB_SAVEFPU,%edi 928 leal -PCB_SAVEFPU_SIZE(%ebp),%esi 929 cld 930 movl $PCB_SAVEFPU_SIZE>>2,%ecx 931 rep 932 movsl 933 popl %ecx 934 popl %esi 935 popl %edi 936 937/* start_emulating(); */ 938 smsw %ax 939 orb $CR0_TS,%al 940 lmsw %ax 941/* npxproc = NULL; */ 942 movl $0,_npxproc 943 movl %ebp,%esp 944 popl %ebp 945 946 ALIGN_TEXT 947fastmove_tail: 948 movb %cl,%al 949 shrl $2,%ecx /* copy longword-wise */ 950 cld 951 rep 952 movsl 953 movb %al,%cl 954 andb $3,%cl /* copy remaining bytes */ 955 rep 956 movsb 957 958 ret 959#endif /* I586_CPU && I586_FAST_BCOPY */ 960 961/* 962 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory 963 */ 964ENTRY(fuword) 965 movl _curpcb,%ecx 966 movl $fusufault,PCB_ONFAULT(%ecx) 967 movl 4(%esp),%edx /* from */ 968 969 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 970 ja fusufault 971 972 movl (%edx),%eax 973 movl $0,PCB_ONFAULT(%ecx) 974 ret 975 976/* 977 * These two routines are called from the profiling code, potentially 978 * at interrupt time. If they fail, that's okay, good things will 979 * happen later. Fail all the time for now - until the trap code is 980 * able to deal with this. 981 */ 982ALTENTRY(suswintr) 983ENTRY(fuswintr) 984 movl $-1,%eax 985 ret 986 987ENTRY(fusword) 988 movl _curpcb,%ecx 989 movl $fusufault,PCB_ONFAULT(%ecx) 990 movl 4(%esp),%edx 991 992 cmpl $VM_MAXUSER_ADDRESS-2,%edx 993 ja fusufault 994 995 movzwl (%edx),%eax 996 movl $0,PCB_ONFAULT(%ecx) 997 ret 998 999ENTRY(fubyte) 1000 movl _curpcb,%ecx 1001 movl $fusufault,PCB_ONFAULT(%ecx) 1002 movl 4(%esp),%edx 1003 1004 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1005 ja fusufault 1006 1007 movzbl (%edx),%eax 1008 movl $0,PCB_ONFAULT(%ecx) 1009 ret 1010 1011 ALIGN_TEXT 1012fusufault: 1013 movl _curpcb,%ecx 1014 xorl %eax,%eax 1015 movl %eax,PCB_ONFAULT(%ecx) 1016 decl %eax 1017 ret 1018 1019/* 1020 * su{byte,sword,word}: write a byte (word, longword) to user memory 1021 */ 1022ENTRY(suword) 1023 movl _curpcb,%ecx 1024 movl $fusufault,PCB_ONFAULT(%ecx) 1025 movl 4(%esp),%edx 1026 1027#if defined(I386_CPU) 1028 1029#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1030 cmpl $CPUCLASS_386,_cpu_class 1031 jne 2f /* we only have to set the right segment selector */ 1032#endif /* I486_CPU || I586_CPU || I686_CPU */ 1033 1034 /* XXX - page boundary crossing is still not handled */ 1035 movl %edx,%eax 1036 shrl $IDXSHIFT,%edx 1037 andb $0xfc,%dl 1038 movb _PTmap(%edx),%dl 1039 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1040 cmpb $0x7,%dl 1041 je 1f 1042 1043 /* simulate a trap */ 1044 pushl %eax 1045 call _trapwrite 1046 popl %edx /* remove junk parameter from stack */ 1047 movl _curpcb,%ecx /* restore trashed register */ 1048 testl %eax,%eax 1049 jnz fusufault 10501: 1051 movl 4(%esp),%edx 1052#endif 1053 10542: 1055 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1056 ja fusufault 1057 1058 movl 8(%esp),%eax 1059 movl %eax,(%edx) 1060 xorl %eax,%eax 1061 movl %eax,PCB_ONFAULT(%ecx) 1062 ret 1063 1064ENTRY(susword) 1065 movl _curpcb,%ecx 1066 movl $fusufault,PCB_ONFAULT(%ecx) 1067 movl 4(%esp),%edx 1068 1069#if defined(I386_CPU) 1070 1071#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1072 cmpl $CPUCLASS_386,_cpu_class 1073 jne 2f 1074#endif /* I486_CPU || I586_CPU || I686_CPU */ 1075 1076 /* XXX - page boundary crossing is still not handled */ 1077 movl %edx,%eax 1078 shrl $IDXSHIFT,%edx 1079 andb $0xfc,%dl 1080 movb _PTmap(%edx),%dl 1081 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1082 cmpb $0x7,%dl 1083 je 1f 1084 1085 /* simulate a trap */ 1086 pushl %eax 1087 call _trapwrite 1088 popl %edx /* remove junk parameter from stack */ 1089 movl _curpcb,%ecx /* restore trashed register */ 1090 testl %eax,%eax 1091 jnz fusufault 10921: 1093 movl 4(%esp),%edx 1094#endif 1095 10962: 1097 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1098 ja fusufault 1099 1100 movw 8(%esp),%ax 1101 movw %ax,(%edx) 1102 xorl %eax,%eax 1103 movl %eax,PCB_ONFAULT(%ecx) 1104 ret 1105 1106ALTENTRY(suibyte) 1107ENTRY(subyte) 1108 movl _curpcb,%ecx 1109 movl $fusufault,PCB_ONFAULT(%ecx) 1110 movl 4(%esp),%edx 1111 1112#if defined(I386_CPU) 1113 1114#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1115 cmpl $CPUCLASS_386,_cpu_class 1116 jne 2f 1117#endif /* I486_CPU || I586_CPU || I686_CPU */ 1118 1119 movl %edx,%eax 1120 shrl $IDXSHIFT,%edx 1121 andb $0xfc,%dl 1122 movb _PTmap(%edx),%dl 1123 andb $0x7,%dl /* must be VALID + USERACC + WRITE */ 1124 cmpb $0x7,%dl 1125 je 1f 1126 1127 /* simulate a trap */ 1128 pushl %eax 1129 call _trapwrite 1130 popl %edx /* remove junk parameter from stack */ 1131 movl _curpcb,%ecx /* restore trashed register */ 1132 testl %eax,%eax 1133 jnz fusufault 11341: 1135 movl 4(%esp),%edx 1136#endif 1137 11382: 1139 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1140 ja fusufault 1141 1142 movb 8(%esp),%al 1143 movb %al,(%edx) 1144 xorl %eax,%eax 1145 movl %eax,PCB_ONFAULT(%ecx) 1146 ret 1147 1148/* 1149 * copyinstr(from, to, maxlen, int *lencopied) 1150 * copy a string from from to to, stop when a 0 character is reached. 1151 * return ENAMETOOLONG if string is longer than maxlen, and 1152 * EFAULT on protection violations. If lencopied is non-zero, 1153 * return the actual length in *lencopied. 1154 */ 1155ENTRY(copyinstr) 1156 pushl %esi 1157 pushl %edi 1158 movl _curpcb,%ecx 1159 movl $cpystrflt,PCB_ONFAULT(%ecx) 1160 1161 movl 12(%esp),%esi /* %esi = from */ 1162 movl 16(%esp),%edi /* %edi = to */ 1163 movl 20(%esp),%edx /* %edx = maxlen */ 1164 1165 movl $VM_MAXUSER_ADDRESS,%eax 1166 1167 /* make sure 'from' is within bounds */ 1168 subl %esi,%eax 1169 jbe cpystrflt 1170 1171 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1172 cmpl %edx,%eax 1173 jae 1f 1174 movl %eax,%edx 1175 movl %eax,20(%esp) 11761: 1177 incl %edx 1178 cld 1179 11802: 1181 decl %edx 1182 jz 3f 1183 1184 lodsb 1185 stosb 1186 orb %al,%al 1187 jnz 2b 1188 1189 /* Success -- 0 byte reached */ 1190 decl %edx 1191 xorl %eax,%eax 1192 jmp cpystrflt_x 11933: 1194 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1195 cmpl $VM_MAXUSER_ADDRESS,%esi 1196 jae cpystrflt 11974: 1198 movl $ENAMETOOLONG,%eax 1199 jmp cpystrflt_x 1200 1201cpystrflt: 1202 movl $EFAULT,%eax 1203 1204cpystrflt_x: 1205 /* set *lencopied and return %eax */ 1206 movl _curpcb,%ecx 1207 movl $0,PCB_ONFAULT(%ecx) 1208 movl 20(%esp),%ecx 1209 subl %edx,%ecx 1210 movl 24(%esp),%edx 1211 testl %edx,%edx 1212 jz 1f 1213 movl %ecx,(%edx) 12141: 1215 popl %edi 1216 popl %esi 1217 ret 1218 1219 1220/* 1221 * copystr(from, to, maxlen, int *lencopied) 1222 */ 1223ENTRY(copystr) 1224 pushl %esi 1225 pushl %edi 1226 1227 movl 12(%esp),%esi /* %esi = from */ 1228 movl 16(%esp),%edi /* %edi = to */ 1229 movl 20(%esp),%edx /* %edx = maxlen */ 1230 incl %edx 1231 cld 12321: 1233 decl %edx 1234 jz 4f 1235 lodsb 1236 stosb 1237 orb %al,%al 1238 jnz 1b 1239 1240 /* Success -- 0 byte reached */ 1241 decl %edx 1242 xorl %eax,%eax 1243 jmp 6f 12444: 1245 /* edx is zero -- return ENAMETOOLONG */ 1246 movl $ENAMETOOLONG,%eax 1247 12486: 1249 /* set *lencopied and return %eax */ 1250 movl 20(%esp),%ecx 1251 subl %edx,%ecx 1252 movl 24(%esp),%edx 1253 testl %edx,%edx 1254 jz 7f 1255 movl %ecx,(%edx) 12567: 1257 popl %edi 1258 popl %esi 1259 ret 1260 1261ENTRY(bcmp) 1262 pushl %edi 1263 pushl %esi 1264 movl 12(%esp),%edi 1265 movl 16(%esp),%esi 1266 movl 20(%esp),%edx 1267 xorl %eax,%eax 1268 1269 movl %edx,%ecx 1270 shrl $2,%ecx 1271 cld /* compare forwards */ 1272 repe 1273 cmpsl 1274 jne 1f 1275 1276 movl %edx,%ecx 1277 andl $3,%ecx 1278 repe 1279 cmpsb 1280 je 2f 12811: 1282 incl %eax 12832: 1284 popl %esi 1285 popl %edi 1286 ret 1287 1288 1289/* 1290 * Handling of special 386 registers and descriptor tables etc 1291 */ 1292/* void lgdt(struct region_descriptor *rdp); */ 1293ENTRY(lgdt) 1294 /* reload the descriptor table */ 1295 movl 4(%esp),%eax 1296 lgdt (%eax) 1297 1298 /* flush the prefetch q */ 1299 jmp 1f 1300 nop 13011: 1302 /* reload "stale" selectors */ 1303 movl $KDSEL,%eax 1304 movl %ax,%ds 1305 movl %ax,%es 1306 movl %ax,%ss 1307 1308 /* reload code selector by turning return into intersegmental return */ 1309 movl (%esp),%eax 1310 pushl %eax 1311# movl $KCSEL,4(%esp) 1312 movl $8,4(%esp) 1313 lret 1314 1315/* 1316 * void lidt(struct region_descriptor *rdp); 1317 */ 1318ENTRY(lidt) 1319 movl 4(%esp),%eax 1320 lidt (%eax) 1321 ret 1322 1323/* 1324 * void lldt(u_short sel) 1325 */ 1326ENTRY(lldt) 1327 lldt 4(%esp) 1328 ret 1329 1330/* 1331 * void ltr(u_short sel) 1332 */ 1333ENTRY(ltr) 1334 ltr 4(%esp) 1335 ret 1336 1337/* ssdtosd(*ssdp,*sdp) */ 1338ENTRY(ssdtosd) 1339 pushl %ebx 1340 movl 8(%esp),%ecx 1341 movl 8(%ecx),%ebx 1342 shll $16,%ebx 1343 movl (%ecx),%edx 1344 roll $16,%edx 1345 movb %dh,%bl 1346 movb %dl,%bh 1347 rorl $8,%ebx 1348 movl 4(%ecx),%eax 1349 movw %ax,%dx 1350 andl $0xf0000,%eax 1351 orl %eax,%ebx 1352 movl 12(%esp),%ecx 1353 movl %edx,(%ecx) 1354 movl %ebx,4(%ecx) 1355 popl %ebx 1356 ret 1357 1358/* load_cr0(cr0) */ 1359ENTRY(load_cr0) 1360 movl 4(%esp),%eax 1361 movl %eax,%cr0 1362 ret 1363 1364/* rcr0() */ 1365ENTRY(rcr0) 1366 movl %cr0,%eax 1367 ret 1368 1369/* rcr3() */ 1370ENTRY(rcr3) 1371 movl %cr3,%eax 1372 ret 1373 1374/* void load_cr3(caddr_t cr3) */ 1375ENTRY(load_cr3) 1376 movl 4(%esp),%eax 1377 movl %eax,%cr3 1378 ret 1379 1380 1381/*****************************************************************************/ 1382/* setjump, longjump */ 1383/*****************************************************************************/ 1384 1385ENTRY(setjmp) 1386 movl 4(%esp),%eax 1387 movl %ebx,(%eax) /* save ebx */ 1388 movl %esp,4(%eax) /* save esp */ 1389 movl %ebp,8(%eax) /* save ebp */ 1390 movl %esi,12(%eax) /* save esi */ 1391 movl %edi,16(%eax) /* save edi */ 1392 movl (%esp),%edx /* get rta */ 1393 movl %edx,20(%eax) /* save eip */ 1394 xorl %eax,%eax /* return(0); */ 1395 ret 1396 1397ENTRY(longjmp) 1398 movl 4(%esp),%eax 1399 movl (%eax),%ebx /* restore ebx */ 1400 movl 4(%eax),%esp /* restore esp */ 1401 movl 8(%eax),%ebp /* restore ebp */ 1402 movl 12(%eax),%esi /* restore esi */ 1403 movl 16(%eax),%edi /* restore edi */ 1404 movl 20(%eax),%edx /* get rta */ 1405 movl %edx,(%esp) /* put in return frame */ 1406 xorl %eax,%eax /* return(1); */ 1407 incl %eax 1408 ret 1409 1410/* 1411 * Here for doing BB-profiling (gcc -a). 1412 * We rely on the "bbset" instead, but need a dummy function. 1413 */ 1414NON_GPROF_ENTRY(__bb_init_func) 1415 movl 4(%esp),%eax 1416 movl $1,(%eax) 1417 .byte 0xc3 /* avoid macro for `ret' */ 1418