support.s revision 22975
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $Id$ 34 */ 35 36#include "opt_cpu.h" 37 38#include <machine/asmacros.h> 39#include <machine/cputypes.h> 40#include <machine/pmap.h> 41#include <machine/specialreg.h> 42 43#include "assym.s" 44 45#define KDSEL 0x10 /* kernel data selector */ 46#define IDXSHIFT 10 47 48 .data 49 .globl _bcopy_vector 50_bcopy_vector: 51 .long _generic_bcopy 52 .globl _bzero 53_bzero: 54 .long _generic_bzero 55 .globl _copyin_vector 56_copyin_vector: 57 .long _generic_copyin 58 .globl _copyout_vector 59_copyout_vector: 60 .long _generic_copyout 61 .globl _ovbcopy_vector 62_ovbcopy_vector: 63 .long _generic_bcopy 64kernel_fpu_lock: 65 .byte 0xfe 66 .space 3 67 68 .text 69 70/* 71 * bcopy family 72 * void bzero(void *buf, u_int len) 73 */ 74 75ENTRY(generic_bzero) 76 pushl %edi 77 movl 8(%esp),%edi 78 movl 12(%esp),%ecx 79 xorl %eax,%eax 80 shrl $2,%ecx 81 cld 82 rep 83 stosl 84 movl 12(%esp),%ecx 85 andl $3,%ecx 86 rep 87 stosb 88 popl %edi 89 ret 90 91#if defined(I486_CPU) 92ENTRY(i486_bzero) 93 movl 4(%esp),%edx 94 movl 8(%esp),%ecx 95 xorl %eax,%eax 96/* 97 * do 64 byte chunks first 98 * 99 * XXX this is probably over-unrolled at least for DX2's 100 */ 1012: 102 cmpl $64,%ecx 103 jb 3f 104 movl %eax,(%edx) 105 movl %eax,4(%edx) 106 movl %eax,8(%edx) 107 movl %eax,12(%edx) 108 movl %eax,16(%edx) 109 movl %eax,20(%edx) 110 movl %eax,24(%edx) 111 movl %eax,28(%edx) 112 movl %eax,32(%edx) 113 movl %eax,36(%edx) 114 movl %eax,40(%edx) 115 movl %eax,44(%edx) 116 movl %eax,48(%edx) 117 movl %eax,52(%edx) 118 movl %eax,56(%edx) 119 movl %eax,60(%edx) 120 addl $64,%edx 121 subl $64,%ecx 122 jnz 2b 123 ret 124 125/* 126 * do 16 byte chunks 127 */ 128 SUPERALIGN_TEXT 1293: 130 cmpl $16,%ecx 131 jb 4f 132 movl %eax,(%edx) 133 movl %eax,4(%edx) 134 movl %eax,8(%edx) 135 movl %eax,12(%edx) 136 addl $16,%edx 137 subl $16,%ecx 138 jnz 3b 139 ret 140 141/* 142 * do 4 byte chunks 143 */ 144 SUPERALIGN_TEXT 1454: 146 cmpl $4,%ecx 147 jb 5f 148 movl %eax,(%edx) 149 addl $4,%edx 150 subl $4,%ecx 151 jnz 4b 152 ret 153 154/* 155 * do 1 byte chunks 156 * a jump table seems to be faster than a loop or more range reductions 157 * 158 * XXX need a const section for non-text 159 */ 160 .data 161jtab: 162 .long do0 163 .long do1 164 .long do2 165 .long do3 166 167 .text 168 SUPERALIGN_TEXT 1695: 170 jmp jtab(,%ecx,4) 171 172 SUPERALIGN_TEXT 173do3: 174 movw %ax,(%edx) 175 movb %al,2(%edx) 176 ret 177 178 SUPERALIGN_TEXT 179do2: 180 movw %ax,(%edx) 181 ret 182 183 SUPERALIGN_TEXT 184do1: 185 movb %al,(%edx) 186 ret 187 188 SUPERALIGN_TEXT 189do0: 190 ret 191#endif 192 193#ifdef I586_CPU 194ENTRY(i586_bzero) 195 movl 4(%esp),%edx 196 movl 8(%esp),%ecx 197 198 /* 199 * The FPU register method is twice as fast as the integer register 200 * method unless the target is in the L1 cache and we pre-allocate a 201 * cache line for it (then the integer register method is 4-5 times 202 * faster). However, we never pre-allocate cache lines, since that 203 * would make the integer method 25% or more slower for the common 204 * case when the target isn't in either the L1 cache or the L2 cache. 205 * Thus we normally use the FPU register method unless the overhead 206 * would be too large. 207 */ 208 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 209 jb intreg_i586_bzero 210 211 /* 212 * The FPU registers may belong to an application or to fastmove() 213 * or to another invocation of bcopy() or ourself in a higher level 214 * interrupt or trap handler. Preserving the registers is 215 * complicated since we avoid it if possible at all levels. We 216 * want to localize the complications even when that increases them. 217 * Here the extra work involves preserving CR0_TS in TS. 218 * `npxproc != NULL' is supposed to be the condition that all the 219 * FPU resources belong to an application, but npxproc and CR0_TS 220 * aren't set atomically enough for this condition to work in 221 * interrupt handlers. 222 * 223 * Case 1: FPU registers belong to the application: we must preserve 224 * the registers if we use them, so we only use the FPU register 225 * method if the target size is large enough to amortize the extra 226 * overhead for preserving them. CR0_TS must be preserved although 227 * it is very likely to end up as set. 228 * 229 * Case 2: FPU registers belong to fastmove(): fastmove() currently 230 * makes the registers look like they belong to an application so 231 * that cpu_switch() and savectx() don't have to know about it, so 232 * this case reduces to case 1. 233 * 234 * Case 3: FPU registers belong to the kernel: don't use the FPU 235 * register method. This case is unlikely, and supporting it would 236 * be more complicated and might take too much stack. 237 * 238 * Case 4: FPU registers don't belong to anyone: the FPU registers 239 * don't need to be preserved, so we always use the FPU register 240 * method. CR0_TS must be preserved although it is very likely to 241 * always end up as clear. 242 */ 243 cmpl $0,_npxproc 244 je i586_bz1 245 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 246 jb intreg_i586_bzero 247 sarb $1,kernel_fpu_lock 248 jc intreg_i586_bzero 249 smsw %ax 250 clts 251 subl $108,%esp 252 fnsave 0(%esp) 253 jmp i586_bz2 254 255i586_bz1: 256 sarb $1,kernel_fpu_lock 257 jc intreg_i586_bzero 258 smsw %ax 259 clts 260 fninit /* XXX should avoid needing this */ 261i586_bz2: 262 fldz 263 264 /* 265 * Align to an 8 byte boundary (misalignment in the main loop would 266 * cost a factor of >= 2). Avoid jumps (at little cost if it is 267 * already aligned) by always zeroing 8 bytes and using the part up 268 * to the _next_ alignment position. 269 */ 270 fstl 0(%edx) 271 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 272 addl $8,%edx 273 andl $~7,%edx 274 subl %edx,%ecx 275 276 /* 277 * Similarly align `len' to a multiple of 8. 278 */ 279 fstl -8(%edx,%ecx) 280 decl %ecx 281 andl $~7,%ecx 282 283 /* 284 * This wouldn't be any faster if it were unrolled, since the loop 285 * control instructions are much faster than the fstl and/or done 286 * in parallel with it so their overhead is insignificant. 287 */ 288fpureg_i586_bzero_loop: 289 fstl 0(%edx) 290 addl $8,%edx 291 subl $8,%ecx 292 cmpl $8,%ecx 293 jae fpureg_i586_bzero_loop 294 295 cmpl $0,_npxproc 296 je i586_bz3 297 frstor 0(%esp) 298 addl $108,%esp 299 lmsw %ax 300 movb $0xfe,kernel_fpu_lock 301 ret 302 303i586_bz3: 304 fstpl %st(0) 305 lmsw %ax 306 movb $0xfe,kernel_fpu_lock 307 ret 308 309intreg_i586_bzero: 310 /* 311 * `rep stos' seems to be the best method in practice for small 312 * counts. Fancy methods usually take too long to start up due 313 * to cache and BTB misses. 314 */ 315 pushl %edi 316 movl %edx,%edi 317 xorl %eax,%eax 318 shrl $2,%ecx 319 cld 320 rep 321 stosl 322 movl 12(%esp),%ecx 323 andl $3,%ecx 324 jne 1f 325 popl %edi 326 ret 327 3281: 329 rep 330 stosb 331 popl %edi 332 ret 333#endif /* I586_CPU */ 334 335/* fillw(pat, base, cnt) */ 336ENTRY(fillw) 337 pushl %edi 338 movl 8(%esp),%eax 339 movl 12(%esp),%edi 340 movl 16(%esp),%ecx 341 cld 342 rep 343 stosw 344 popl %edi 345 ret 346 347ENTRY(bcopyb) 348bcopyb: 349 pushl %esi 350 pushl %edi 351 movl 12(%esp),%esi 352 movl 16(%esp),%edi 353 movl 20(%esp),%ecx 354 movl %edi,%eax 355 subl %esi,%eax 356 cmpl %ecx,%eax /* overlapping && src < dst? */ 357 jb 1f 358 cld /* nope, copy forwards */ 359 rep 360 movsb 361 popl %edi 362 popl %esi 363 ret 364 365 ALIGN_TEXT 3661: 367 addl %ecx,%edi /* copy backwards. */ 368 addl %ecx,%esi 369 decl %edi 370 decl %esi 371 std 372 rep 373 movsb 374 popl %edi 375 popl %esi 376 cld 377 ret 378 379ENTRY(bcopy) 380 MEXITCOUNT 381 jmp *_bcopy_vector 382 383ENTRY(ovbcopy) 384 MEXITCOUNT 385 jmp *_ovbcopy_vector 386 387/* 388 * generic_bcopy(src, dst, cnt) 389 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 390 */ 391ENTRY(generic_bcopy) 392 pushl %esi 393 pushl %edi 394 movl 12(%esp),%esi 395 movl 16(%esp),%edi 396 movl 20(%esp),%ecx 397 398 movl %edi,%eax 399 subl %esi,%eax 400 cmpl %ecx,%eax /* overlapping && src < dst? */ 401 jb 1f 402 403 shrl $2,%ecx /* copy by 32-bit words */ 404 cld /* nope, copy forwards */ 405 rep 406 movsl 407 movl 20(%esp),%ecx 408 andl $3,%ecx /* any bytes left? */ 409 rep 410 movsb 411 popl %edi 412 popl %esi 413 ret 414 415 ALIGN_TEXT 4161: 417 addl %ecx,%edi /* copy backwards */ 418 addl %ecx,%esi 419 decl %edi 420 decl %esi 421 andl $3,%ecx /* any fractional bytes? */ 422 std 423 rep 424 movsb 425 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 426 shrl $2,%ecx 427 subl $3,%esi 428 subl $3,%edi 429 rep 430 movsl 431 popl %edi 432 popl %esi 433 cld 434 ret 435 436#ifdef I586_CPU 437ENTRY(i586_bcopy) 438 pushl %esi 439 pushl %edi 440 movl 12(%esp),%esi 441 movl 16(%esp),%edi 442 movl 20(%esp),%ecx 443 444 movl %edi,%eax 445 subl %esi,%eax 446 cmpl %ecx,%eax /* overlapping && src < dst? */ 447 jb 1f 448 449 cmpl $1024,%ecx 450 jb small_i586_bcopy 451 452 sarb $1,kernel_fpu_lock 453 jc small_i586_bcopy 454 cmpl $0,_npxproc 455 je i586_bc1 456 smsw %dx 457 clts 458 subl $108,%esp 459 fnsave 0(%esp) 460 jmp 4f 461 462i586_bc1: 463 smsw %dx 464 clts 465 fninit /* XXX should avoid needing this */ 466 467 ALIGN_TEXT 4684: 469 pushl %ecx 470#define DCACHE_SIZE 8192 471 cmpl $(DCACHE_SIZE-512)/2,%ecx 472 jbe 2f 473 movl $(DCACHE_SIZE-512)/2,%ecx 4742: 475 subl %ecx,0(%esp) 476 cmpl $256,%ecx 477 jb 5f /* XXX should prefetch if %ecx >= 32 */ 478 pushl %esi 479 pushl %ecx 480 ALIGN_TEXT 4813: 482 movl 0(%esi),%eax 483 movl 32(%esi),%eax 484 movl 64(%esi),%eax 485 movl 96(%esi),%eax 486 movl 128(%esi),%eax 487 movl 160(%esi),%eax 488 movl 192(%esi),%eax 489 movl 224(%esi),%eax 490 addl $256,%esi 491 subl $256,%ecx 492 cmpl $256,%ecx 493 jae 3b 494 popl %ecx 495 popl %esi 4965: 497 ALIGN_TEXT 498large_i586_bcopy_loop: 499 fildq 0(%esi) 500 fildq 8(%esi) 501 fildq 16(%esi) 502 fildq 24(%esi) 503 fildq 32(%esi) 504 fildq 40(%esi) 505 fildq 48(%esi) 506 fildq 56(%esi) 507 fistpq 56(%edi) 508 fistpq 48(%edi) 509 fistpq 40(%edi) 510 fistpq 32(%edi) 511 fistpq 24(%edi) 512 fistpq 16(%edi) 513 fistpq 8(%edi) 514 fistpq 0(%edi) 515 addl $64,%esi 516 addl $64,%edi 517 subl $64,%ecx 518 cmpl $64,%ecx 519 jae large_i586_bcopy_loop 520 popl %eax 521 addl %eax,%ecx 522 cmpl $64,%ecx 523 jae 4b 524 525 cmpl $0,_npxproc 526 je i586_bc2 527 frstor 0(%esp) 528 addl $108,%esp 529i586_bc2: 530 lmsw %dx 531 movb $0xfe,kernel_fpu_lock 532 533/* 534 * This is a duplicate of the main part of generic_bcopy. See the comments 535 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 536 * would mess up high resolution profiling. 537 */ 538 ALIGN_TEXT 539small_i586_bcopy: 540 shrl $2,%ecx 541 cld 542 rep 543 movsl 544 movl 20(%esp),%ecx 545 andl $3,%ecx 546 rep 547 movsb 548 popl %edi 549 popl %esi 550 ret 551 552 ALIGN_TEXT 5531: 554 addl %ecx,%edi 555 addl %ecx,%esi 556 decl %edi 557 decl %esi 558 andl $3,%ecx 559 std 560 rep 561 movsb 562 movl 20(%esp),%ecx 563 shrl $2,%ecx 564 subl $3,%esi 565 subl $3,%edi 566 rep 567 movsl 568 popl %edi 569 popl %esi 570 cld 571 ret 572#endif /* I586_CPU */ 573 574/* 575 * Note: memcpy does not support overlapping copies 576 */ 577ENTRY(memcpy) 578 pushl %edi 579 pushl %esi 580 movl 12(%esp),%edi 581 movl 16(%esp),%esi 582 movl 20(%esp),%ecx 583 movl %edi,%eax 584 shrl $2,%ecx /* copy by 32-bit words */ 585 cld /* nope, copy forwards */ 586 rep 587 movsl 588 movl 20(%esp),%ecx 589 andl $3,%ecx /* any bytes left? */ 590 rep 591 movsb 592 popl %esi 593 popl %edi 594 ret 595 596 597/*****************************************************************************/ 598/* copyout and fubyte family */ 599/*****************************************************************************/ 600/* 601 * Access user memory from inside the kernel. These routines and possibly 602 * the math- and DOS emulators should be the only places that do this. 603 * 604 * We have to access the memory with user's permissions, so use a segment 605 * selector with RPL 3. For writes to user space we have to additionally 606 * check the PTE for write permission, because the 386 does not check 607 * write permissions when we are executing with EPL 0. The 486 does check 608 * this if the WP bit is set in CR0, so we can use a simpler version here. 609 * 610 * These routines set curpcb->onfault for the time they execute. When a 611 * protection violation occurs inside the functions, the trap handler 612 * returns to *curpcb->onfault instead of the function. 613 */ 614 615/* copyout(from_kernel, to_user, len) */ 616ENTRY(copyout) 617 MEXITCOUNT 618 jmp *_copyout_vector 619 620ENTRY(generic_copyout) 621 movl _curpcb,%eax 622 movl $copyout_fault,PCB_ONFAULT(%eax) 623 pushl %esi 624 pushl %edi 625 pushl %ebx 626 movl 16(%esp),%esi 627 movl 20(%esp),%edi 628 movl 24(%esp),%ebx 629 testl %ebx,%ebx /* anything to do? */ 630 jz done_copyout 631 632 /* 633 * Check explicitly for non-user addresses. If 486 write protection 634 * is being used, this check is essential because we are in kernel 635 * mode so the h/w does not provide any protection against writing 636 * kernel addresses. 637 */ 638 639 /* 640 * First, prevent address wrapping. 641 */ 642 movl %edi,%eax 643 addl %ebx,%eax 644 jc copyout_fault 645/* 646 * XXX STOP USING VM_MAXUSER_ADDRESS. 647 * It is an end address, not a max, so every time it is used correctly it 648 * looks like there is an off by one error, and of course it caused an off 649 * by one error in several places. 650 */ 651 cmpl $VM_MAXUSER_ADDRESS,%eax 652 ja copyout_fault 653 654#if defined(I386_CPU) 655 656#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 657 cmpl $CPUCLASS_386,_cpu_class 658 jne 3f 659#endif 660/* 661 * We have to check each PTE for user write permission. 662 * The checking may cause a page fault, so it is important to set 663 * up everything for return via copyout_fault before here. 664 */ 665 /* compute number of pages */ 666 movl %edi,%ecx 667 andl $PAGE_MASK,%ecx 668 addl %ebx,%ecx 669 decl %ecx 670 shrl $IDXSHIFT+2,%ecx 671 incl %ecx 672 673 /* compute PTE offset for start address */ 674 movl %edi,%edx 675 shrl $IDXSHIFT,%edx 676 andb $0xfc,%dl 677 6781: 679 /* check PTE for each page */ 680 leal _PTmap(%edx),%eax 681 shrl $IDXSHIFT,%eax 682 andb $0xfc,%al 683 testb $PG_V,_PTmap(%eax) /* PTE page must be valid */ 684 je 4f 685 movb _PTmap(%edx),%al 686 andb $PG_V|PG_RW|PG_U,%al /* page must be valid and user writable */ 687 cmpb $PG_V|PG_RW|PG_U,%al 688 je 2f 689 6904: 691 /* simulate a trap */ 692 pushl %edx 693 pushl %ecx 694 shll $IDXSHIFT,%edx 695 pushl %edx 696 call _trapwrite /* trapwrite(addr) */ 697 popl %edx 698 popl %ecx 699 popl %edx 700 701 testl %eax,%eax /* if not ok, return EFAULT */ 702 jnz copyout_fault 703 7042: 705 addl $4,%edx 706 decl %ecx 707 jnz 1b /* check next page */ 708#endif /* I386_CPU */ 709 710 /* bcopy(%esi, %edi, %ebx) */ 7113: 712 movl %ebx,%ecx 713 714#ifdef I586_CPU 715 ALIGN_TEXT 716slow_copyout: 717#endif 718 shrl $2,%ecx 719 cld 720 rep 721 movsl 722 movb %bl,%cl 723 andb $3,%cl 724 rep 725 movsb 726 727done_copyout: 728 popl %ebx 729 popl %edi 730 popl %esi 731 xorl %eax,%eax 732 movl _curpcb,%edx 733 movl %eax,PCB_ONFAULT(%edx) 734 ret 735 736 ALIGN_TEXT 737copyout_fault: 738 popl %ebx 739 popl %edi 740 popl %esi 741 movl _curpcb,%edx 742 movl $0,PCB_ONFAULT(%edx) 743 movl $EFAULT,%eax 744 ret 745 746#ifdef I586_CPU 747ENTRY(i586_copyout) 748 /* 749 * Duplicated from generic_copyout. Could be done a bit better. 750 */ 751 movl _curpcb,%eax 752 movl $copyout_fault,PCB_ONFAULT(%eax) 753 pushl %esi 754 pushl %edi 755 pushl %ebx 756 movl 16(%esp),%esi 757 movl 20(%esp),%edi 758 movl 24(%esp),%ebx 759 testl %ebx,%ebx /* anything to do? */ 760 jz done_copyout 761 762 /* 763 * Check explicitly for non-user addresses. If 486 write protection 764 * is being used, this check is essential because we are in kernel 765 * mode so the h/w does not provide any protection against writing 766 * kernel addresses. 767 */ 768 769 /* 770 * First, prevent address wrapping. 771 */ 772 movl %edi,%eax 773 addl %ebx,%eax 774 jc copyout_fault 775/* 776 * XXX STOP USING VM_MAXUSER_ADDRESS. 777 * It is an end address, not a max, so every time it is used correctly it 778 * looks like there is an off by one error, and of course it caused an off 779 * by one error in several places. 780 */ 781 cmpl $VM_MAXUSER_ADDRESS,%eax 782 ja copyout_fault 783 784 /* bcopy(%esi, %edi, %ebx) */ 7853: 786 movl %ebx,%ecx 787 /* 788 * End of duplicated code. 789 */ 790 791 cmpl $1024,%ecx 792 jb slow_copyout 793 794 pushl %ecx 795 call _fastmove 796 addl $4,%esp 797 jmp done_copyout 798#endif /* I586_CPU */ 799 800/* copyin(from_user, to_kernel, len) */ 801ENTRY(copyin) 802 MEXITCOUNT 803 jmp *_copyin_vector 804 805ENTRY(generic_copyin) 806 movl _curpcb,%eax 807 movl $copyin_fault,PCB_ONFAULT(%eax) 808 pushl %esi 809 pushl %edi 810 movl 12(%esp),%esi /* caddr_t from */ 811 movl 16(%esp),%edi /* caddr_t to */ 812 movl 20(%esp),%ecx /* size_t len */ 813 814 /* 815 * make sure address is valid 816 */ 817 movl %esi,%edx 818 addl %ecx,%edx 819 jc copyin_fault 820 cmpl $VM_MAXUSER_ADDRESS,%edx 821 ja copyin_fault 822 823#ifdef I586_CPU 824 ALIGN_TEXT 825slow_copyin: 826#endif 827 movb %cl,%al 828 shrl $2,%ecx /* copy longword-wise */ 829 cld 830 rep 831 movsl 832 movb %al,%cl 833 andb $3,%cl /* copy remaining bytes */ 834 rep 835 movsb 836 837#if defined(I586_CPU) 838 ALIGN_TEXT 839done_copyin: 840#endif /* I586_CPU */ 841 popl %edi 842 popl %esi 843 xorl %eax,%eax 844 movl _curpcb,%edx 845 movl %eax,PCB_ONFAULT(%edx) 846 ret 847 848 ALIGN_TEXT 849copyin_fault: 850 popl %edi 851 popl %esi 852 movl _curpcb,%edx 853 movl $0,PCB_ONFAULT(%edx) 854 movl $EFAULT,%eax 855 ret 856 857#ifdef I586_CPU 858ENTRY(i586_copyin) 859 /* 860 * Duplicated from generic_copyin. Could be done a bit better. 861 */ 862 movl _curpcb,%eax 863 movl $copyin_fault,PCB_ONFAULT(%eax) 864 pushl %esi 865 pushl %edi 866 movl 12(%esp),%esi /* caddr_t from */ 867 movl 16(%esp),%edi /* caddr_t to */ 868 movl 20(%esp),%ecx /* size_t len */ 869 870 /* 871 * make sure address is valid 872 */ 873 movl %esi,%edx 874 addl %ecx,%edx 875 jc copyin_fault 876 cmpl $VM_MAXUSER_ADDRESS,%edx 877 ja copyin_fault 878 /* 879 * End of duplicated code. 880 */ 881 882 cmpl $1024,%ecx 883 jb slow_copyin 884 885 pushl %ebx /* XXX prepare for fastmove_fault */ 886 pushl %ecx 887 call _fastmove 888 addl $8,%esp 889 jmp done_copyin 890#endif /* I586_CPU */ 891 892#if defined(I586_CPU) 893/* fastmove(src, dst, len) 894 src in %esi 895 dst in %edi 896 len in %ecx XXX changed to on stack for profiling 897 uses %eax and %edx for tmp. storage 898 */ 899/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 900ENTRY(fastmove) 901 pushl %ebp 902 movl %esp,%ebp 903 subl $PCB_SAVEFPU_SIZE+3*4,%esp 904 905 movl 8(%ebp),%ecx 906 cmpl $63,%ecx 907 jbe fastmove_tail 908 909 testl $7,%esi /* check if src addr is multiple of 8 */ 910 jnz fastmove_tail 911 912 testl $7,%edi /* check if dst addr is multiple of 8 */ 913 jnz fastmove_tail 914 915/* if (npxproc != NULL) { */ 916 cmpl $0,_npxproc 917 je 6f 918/* fnsave(&curpcb->pcb_savefpu); */ 919 movl _curpcb,%eax 920 fnsave PCB_SAVEFPU(%eax) 921/* npxproc = NULL; */ 922 movl $0,_npxproc 923/* } */ 9246: 925/* now we own the FPU. */ 926 927/* 928 * The process' FP state is saved in the pcb, but if we get 929 * switched, the cpu_switch() will store our FP state in the 930 * pcb. It should be possible to avoid all the copying for 931 * this, e.g., by setting a flag to tell cpu_switch() to 932 * save the state somewhere else. 933 */ 934/* tmp = curpcb->pcb_savefpu; */ 935 movl %ecx,-12(%ebp) 936 movl %esi,-8(%ebp) 937 movl %edi,-4(%ebp) 938 movl %esp,%edi 939 movl _curpcb,%esi 940 addl $PCB_SAVEFPU,%esi 941 cld 942 movl $PCB_SAVEFPU_SIZE>>2,%ecx 943 rep 944 movsl 945 movl -12(%ebp),%ecx 946 movl -8(%ebp),%esi 947 movl -4(%ebp),%edi 948/* stop_emulating(); */ 949 clts 950/* npxproc = curproc; */ 951 movl _curproc,%eax 952 movl %eax,_npxproc 953 movl _curpcb,%eax 954 movl $fastmove_fault,PCB_ONFAULT(%eax) 9554: 956 movl %ecx,-12(%ebp) 957 cmpl $1792,%ecx 958 jbe 2f 959 movl $1792,%ecx 9602: 961 subl %ecx,-12(%ebp) 962 cmpl $256,%ecx 963 jb 5f 964 movl %ecx,-8(%ebp) 965 movl %esi,-4(%ebp) 966 ALIGN_TEXT 9673: 968 movl 0(%esi),%eax 969 movl 32(%esi),%eax 970 movl 64(%esi),%eax 971 movl 96(%esi),%eax 972 movl 128(%esi),%eax 973 movl 160(%esi),%eax 974 movl 192(%esi),%eax 975 movl 224(%esi),%eax 976 addl $256,%esi 977 subl $256,%ecx 978 cmpl $256,%ecx 979 jae 3b 980 movl -8(%ebp),%ecx 981 movl -4(%ebp),%esi 9825: 983 ALIGN_TEXT 984fastmove_loop: 985 fildq 0(%esi) 986 fildq 8(%esi) 987 fildq 16(%esi) 988 fildq 24(%esi) 989 fildq 32(%esi) 990 fildq 40(%esi) 991 fildq 48(%esi) 992 fildq 56(%esi) 993 fistpq 56(%edi) 994 fistpq 48(%edi) 995 fistpq 40(%edi) 996 fistpq 32(%edi) 997 fistpq 24(%edi) 998 fistpq 16(%edi) 999 fistpq 8(%edi) 1000 fistpq 0(%edi) 1001 addl $-64,%ecx 1002 addl $64,%esi 1003 addl $64,%edi 1004 cmpl $63,%ecx 1005 ja fastmove_loop 1006 movl -12(%ebp),%eax 1007 addl %eax,%ecx 1008 cmpl $64,%ecx 1009 jae 4b 1010 1011/* curpcb->pcb_savefpu = tmp; */ 1012 movl %ecx,-12(%ebp) 1013 movl %esi,-8(%ebp) 1014 movl %edi,-4(%ebp) 1015 movl _curpcb,%edi 1016 addl $PCB_SAVEFPU,%edi 1017 movl %esp,%esi 1018 cld 1019 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1020 rep 1021 movsl 1022 movl -12(%ebp),%ecx 1023 movl -8(%ebp),%esi 1024 movl -4(%ebp),%edi 1025 1026/* start_emulating(); */ 1027 smsw %ax 1028 orb $CR0_TS,%al 1029 lmsw %ax 1030/* npxproc = NULL; */ 1031 movl $0,_npxproc 1032 1033 ALIGN_TEXT 1034fastmove_tail: 1035 movl _curpcb,%eax 1036 movl $fastmove_tail_fault,PCB_ONFAULT(%eax) 1037 1038 movb %cl,%al 1039 shrl $2,%ecx /* copy longword-wise */ 1040 cld 1041 rep 1042 movsl 1043 movb %al,%cl 1044 andb $3,%cl /* copy remaining bytes */ 1045 rep 1046 movsb 1047 1048 movl %ebp,%esp 1049 popl %ebp 1050 ret 1051 1052 ALIGN_TEXT 1053fastmove_fault: 1054 movl _curpcb,%edi 1055 addl $PCB_SAVEFPU,%edi 1056 movl %esp,%esi 1057 cld 1058 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1059 rep 1060 movsl 1061 1062 smsw %ax 1063 orb $CR0_TS,%al 1064 lmsw %ax 1065 movl $0,_npxproc 1066 1067fastmove_tail_fault: 1068 movl %ebp,%esp 1069 popl %ebp 1070 addl $8,%esp 1071 popl %ebx 1072 popl %edi 1073 popl %esi 1074 movl _curpcb,%edx 1075 movl $0,PCB_ONFAULT(%edx) 1076 movl $EFAULT,%eax 1077 ret 1078#endif /* I586_CPU */ 1079 1080/* 1081 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory 1082 */ 1083ENTRY(fuword) 1084 movl _curpcb,%ecx 1085 movl $fusufault,PCB_ONFAULT(%ecx) 1086 movl 4(%esp),%edx /* from */ 1087 1088 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 1089 ja fusufault 1090 1091 movl (%edx),%eax 1092 movl $0,PCB_ONFAULT(%ecx) 1093 ret 1094 1095/* 1096 * These two routines are called from the profiling code, potentially 1097 * at interrupt time. If they fail, that's okay, good things will 1098 * happen later. Fail all the time for now - until the trap code is 1099 * able to deal with this. 1100 */ 1101ALTENTRY(suswintr) 1102ENTRY(fuswintr) 1103 movl $-1,%eax 1104 ret 1105 1106ENTRY(fusword) 1107 movl _curpcb,%ecx 1108 movl $fusufault,PCB_ONFAULT(%ecx) 1109 movl 4(%esp),%edx 1110 1111 cmpl $VM_MAXUSER_ADDRESS-2,%edx 1112 ja fusufault 1113 1114 movzwl (%edx),%eax 1115 movl $0,PCB_ONFAULT(%ecx) 1116 ret 1117 1118ENTRY(fubyte) 1119 movl _curpcb,%ecx 1120 movl $fusufault,PCB_ONFAULT(%ecx) 1121 movl 4(%esp),%edx 1122 1123 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1124 ja fusufault 1125 1126 movzbl (%edx),%eax 1127 movl $0,PCB_ONFAULT(%ecx) 1128 ret 1129 1130 ALIGN_TEXT 1131fusufault: 1132 movl _curpcb,%ecx 1133 xorl %eax,%eax 1134 movl %eax,PCB_ONFAULT(%ecx) 1135 decl %eax 1136 ret 1137 1138/* 1139 * su{byte,sword,word}: write a byte (word, longword) to user memory 1140 */ 1141ENTRY(suword) 1142 movl _curpcb,%ecx 1143 movl $fusufault,PCB_ONFAULT(%ecx) 1144 movl 4(%esp),%edx 1145 1146#if defined(I386_CPU) 1147 1148#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1149 cmpl $CPUCLASS_386,_cpu_class 1150 jne 2f /* we only have to set the right segment selector */ 1151#endif /* I486_CPU || I586_CPU || I686_CPU */ 1152 1153 /* XXX - page boundary crossing is still not handled */ 1154 movl %edx,%eax 1155 shrl $IDXSHIFT,%edx 1156 andb $0xfc,%dl 1157 1158 leal _PTmap(%edx),%ecx 1159 shrl $IDXSHIFT,%ecx 1160 andb $0xfc,%cl 1161 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1162 je 4f 1163 movb _PTmap(%edx),%dl 1164 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1165 cmpb $PG_V|PG_RW|PG_U,%dl 1166 je 1f 1167 11684: 1169 /* simulate a trap */ 1170 pushl %eax 1171 call _trapwrite 1172 popl %edx /* remove junk parameter from stack */ 1173 testl %eax,%eax 1174 jnz fusufault 11751: 1176 movl 4(%esp),%edx 1177#endif 1178 11792: 1180 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1181 ja fusufault 1182 1183 movl 8(%esp),%eax 1184 movl %eax,(%edx) 1185 xorl %eax,%eax 1186 movl _curpcb,%ecx 1187 movl %eax,PCB_ONFAULT(%ecx) 1188 ret 1189 1190ENTRY(susword) 1191 movl _curpcb,%ecx 1192 movl $fusufault,PCB_ONFAULT(%ecx) 1193 movl 4(%esp),%edx 1194 1195#if defined(I386_CPU) 1196 1197#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1198 cmpl $CPUCLASS_386,_cpu_class 1199 jne 2f 1200#endif /* I486_CPU || I586_CPU || I686_CPU */ 1201 1202 /* XXX - page boundary crossing is still not handled */ 1203 movl %edx,%eax 1204 shrl $IDXSHIFT,%edx 1205 andb $0xfc,%dl 1206 1207 leal _PTmap(%edx),%ecx 1208 shrl $IDXSHIFT,%ecx 1209 andb $0xfc,%cl 1210 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1211 je 4f 1212 movb _PTmap(%edx),%dl 1213 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1214 cmpb $PG_V|PG_RW|PG_U,%dl 1215 je 1f 1216 12174: 1218 /* simulate a trap */ 1219 pushl %eax 1220 call _trapwrite 1221 popl %edx /* remove junk parameter from stack */ 1222 testl %eax,%eax 1223 jnz fusufault 12241: 1225 movl 4(%esp),%edx 1226#endif 1227 12282: 1229 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1230 ja fusufault 1231 1232 movw 8(%esp),%ax 1233 movw %ax,(%edx) 1234 xorl %eax,%eax 1235 movl _curpcb,%ecx /* restore trashed register */ 1236 movl %eax,PCB_ONFAULT(%ecx) 1237 ret 1238 1239ALTENTRY(suibyte) 1240ENTRY(subyte) 1241 movl _curpcb,%ecx 1242 movl $fusufault,PCB_ONFAULT(%ecx) 1243 movl 4(%esp),%edx 1244 1245#if defined(I386_CPU) 1246 1247#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 1248 cmpl $CPUCLASS_386,_cpu_class 1249 jne 2f 1250#endif /* I486_CPU || I586_CPU || I686_CPU */ 1251 1252 movl %edx,%eax 1253 shrl $IDXSHIFT,%edx 1254 andb $0xfc,%dl 1255 1256 leal _PTmap(%edx),%ecx 1257 shrl $IDXSHIFT,%ecx 1258 andb $0xfc,%cl 1259 testb $PG_V,_PTmap(%ecx) /* PTE page must be valid */ 1260 je 4f 1261 movb _PTmap(%edx),%dl 1262 andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ 1263 cmpb $PG_V|PG_RW|PG_U,%dl 1264 je 1f 1265 12664: 1267 /* simulate a trap */ 1268 pushl %eax 1269 call _trapwrite 1270 popl %edx /* remove junk parameter from stack */ 1271 testl %eax,%eax 1272 jnz fusufault 12731: 1274 movl 4(%esp),%edx 1275#endif 1276 12772: 1278 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1279 ja fusufault 1280 1281 movb 8(%esp),%al 1282 movb %al,(%edx) 1283 xorl %eax,%eax 1284 movl _curpcb,%ecx /* restore trashed register */ 1285 movl %eax,PCB_ONFAULT(%ecx) 1286 ret 1287 1288/* 1289 * copyinstr(from, to, maxlen, int *lencopied) 1290 * copy a string from from to to, stop when a 0 character is reached. 1291 * return ENAMETOOLONG if string is longer than maxlen, and 1292 * EFAULT on protection violations. If lencopied is non-zero, 1293 * return the actual length in *lencopied. 1294 */ 1295ENTRY(copyinstr) 1296 pushl %esi 1297 pushl %edi 1298 movl _curpcb,%ecx 1299 movl $cpystrflt,PCB_ONFAULT(%ecx) 1300 1301 movl 12(%esp),%esi /* %esi = from */ 1302 movl 16(%esp),%edi /* %edi = to */ 1303 movl 20(%esp),%edx /* %edx = maxlen */ 1304 1305 movl $VM_MAXUSER_ADDRESS,%eax 1306 1307 /* make sure 'from' is within bounds */ 1308 subl %esi,%eax 1309 jbe cpystrflt 1310 1311 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1312 cmpl %edx,%eax 1313 jae 1f 1314 movl %eax,%edx 1315 movl %eax,20(%esp) 13161: 1317 incl %edx 1318 cld 1319 13202: 1321 decl %edx 1322 jz 3f 1323 1324 lodsb 1325 stosb 1326 orb %al,%al 1327 jnz 2b 1328 1329 /* Success -- 0 byte reached */ 1330 decl %edx 1331 xorl %eax,%eax 1332 jmp cpystrflt_x 13333: 1334 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1335 cmpl $VM_MAXUSER_ADDRESS,%esi 1336 jae cpystrflt 13374: 1338 movl $ENAMETOOLONG,%eax 1339 jmp cpystrflt_x 1340 1341cpystrflt: 1342 movl $EFAULT,%eax 1343 1344cpystrflt_x: 1345 /* set *lencopied and return %eax */ 1346 movl _curpcb,%ecx 1347 movl $0,PCB_ONFAULT(%ecx) 1348 movl 20(%esp),%ecx 1349 subl %edx,%ecx 1350 movl 24(%esp),%edx 1351 testl %edx,%edx 1352 jz 1f 1353 movl %ecx,(%edx) 13541: 1355 popl %edi 1356 popl %esi 1357 ret 1358 1359 1360/* 1361 * copystr(from, to, maxlen, int *lencopied) 1362 */ 1363ENTRY(copystr) 1364 pushl %esi 1365 pushl %edi 1366 1367 movl 12(%esp),%esi /* %esi = from */ 1368 movl 16(%esp),%edi /* %edi = to */ 1369 movl 20(%esp),%edx /* %edx = maxlen */ 1370 incl %edx 1371 cld 13721: 1373 decl %edx 1374 jz 4f 1375 lodsb 1376 stosb 1377 orb %al,%al 1378 jnz 1b 1379 1380 /* Success -- 0 byte reached */ 1381 decl %edx 1382 xorl %eax,%eax 1383 jmp 6f 13844: 1385 /* edx is zero -- return ENAMETOOLONG */ 1386 movl $ENAMETOOLONG,%eax 1387 13886: 1389 /* set *lencopied and return %eax */ 1390 movl 20(%esp),%ecx 1391 subl %edx,%ecx 1392 movl 24(%esp),%edx 1393 testl %edx,%edx 1394 jz 7f 1395 movl %ecx,(%edx) 13967: 1397 popl %edi 1398 popl %esi 1399 ret 1400 1401ENTRY(bcmp) 1402 pushl %edi 1403 pushl %esi 1404 movl 12(%esp),%edi 1405 movl 16(%esp),%esi 1406 movl 20(%esp),%edx 1407 xorl %eax,%eax 1408 1409 movl %edx,%ecx 1410 shrl $2,%ecx 1411 cld /* compare forwards */ 1412 repe 1413 cmpsl 1414 jne 1f 1415 1416 movl %edx,%ecx 1417 andl $3,%ecx 1418 repe 1419 cmpsb 1420 je 2f 14211: 1422 incl %eax 14232: 1424 popl %esi 1425 popl %edi 1426 ret 1427 1428 1429/* 1430 * Handling of special 386 registers and descriptor tables etc 1431 */ 1432/* void lgdt(struct region_descriptor *rdp); */ 1433ENTRY(lgdt) 1434 /* reload the descriptor table */ 1435 movl 4(%esp),%eax 1436 lgdt (%eax) 1437 1438 /* flush the prefetch q */ 1439 jmp 1f 1440 nop 14411: 1442 /* reload "stale" selectors */ 1443 movl $KDSEL,%eax 1444 movl %ax,%ds 1445 movl %ax,%es 1446 movl %ax,%ss 1447 1448 /* reload code selector by turning return into intersegmental return */ 1449 movl (%esp),%eax 1450 pushl %eax 1451# movl $KCSEL,4(%esp) 1452 movl $8,4(%esp) 1453 lret 1454 1455/* 1456 * void lidt(struct region_descriptor *rdp); 1457 */ 1458ENTRY(lidt) 1459 movl 4(%esp),%eax 1460 lidt (%eax) 1461 ret 1462 1463/* 1464 * void lldt(u_short sel) 1465 */ 1466ENTRY(lldt) 1467 lldt 4(%esp) 1468 ret 1469 1470/* 1471 * void ltr(u_short sel) 1472 */ 1473ENTRY(ltr) 1474 ltr 4(%esp) 1475 ret 1476 1477/* ssdtosd(*ssdp,*sdp) */ 1478ENTRY(ssdtosd) 1479 pushl %ebx 1480 movl 8(%esp),%ecx 1481 movl 8(%ecx),%ebx 1482 shll $16,%ebx 1483 movl (%ecx),%edx 1484 roll $16,%edx 1485 movb %dh,%bl 1486 movb %dl,%bh 1487 rorl $8,%ebx 1488 movl 4(%ecx),%eax 1489 movw %ax,%dx 1490 andl $0xf0000,%eax 1491 orl %eax,%ebx 1492 movl 12(%esp),%ecx 1493 movl %edx,(%ecx) 1494 movl %ebx,4(%ecx) 1495 popl %ebx 1496 ret 1497 1498/* load_cr0(cr0) */ 1499ENTRY(load_cr0) 1500 movl 4(%esp),%eax 1501 movl %eax,%cr0 1502 ret 1503 1504/* rcr0() */ 1505ENTRY(rcr0) 1506 movl %cr0,%eax 1507 ret 1508 1509/* rcr3() */ 1510ENTRY(rcr3) 1511 movl %cr3,%eax 1512 ret 1513 1514/* void load_cr3(caddr_t cr3) */ 1515ENTRY(load_cr3) 1516 movl 4(%esp),%eax 1517 movl %eax,%cr3 1518 ret 1519 1520 1521/*****************************************************************************/ 1522/* setjump, longjump */ 1523/*****************************************************************************/ 1524 1525ENTRY(setjmp) 1526 movl 4(%esp),%eax 1527 movl %ebx,(%eax) /* save ebx */ 1528 movl %esp,4(%eax) /* save esp */ 1529 movl %ebp,8(%eax) /* save ebp */ 1530 movl %esi,12(%eax) /* save esi */ 1531 movl %edi,16(%eax) /* save edi */ 1532 movl (%esp),%edx /* get rta */ 1533 movl %edx,20(%eax) /* save eip */ 1534 xorl %eax,%eax /* return(0); */ 1535 ret 1536 1537ENTRY(longjmp) 1538 movl 4(%esp),%eax 1539 movl (%eax),%ebx /* restore ebx */ 1540 movl 4(%eax),%esp /* restore esp */ 1541 movl 8(%eax),%ebp /* restore ebp */ 1542 movl 12(%eax),%esi /* restore esi */ 1543 movl 16(%eax),%edi /* restore edi */ 1544 movl 20(%eax),%edx /* get rta */ 1545 movl %edx,(%esp) /* put in return frame */ 1546 xorl %eax,%eax /* return(1); */ 1547 incl %eax 1548 ret 1549 1550/* 1551 * Here for doing BB-profiling (gcc -a). 1552 * We rely on the "bbset" instead, but need a dummy function. 1553 */ 1554NON_GPROF_ENTRY(__bb_init_func) 1555 movl 4(%esp),%eax 1556 movl $1,(%eax) 1557 .byte 0xc3 /* avoid macro for `ret' */ 1558