support.s revision 169895
1/*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD: head/sys/i386/i386/support.s 169895 2007-05-23 08:33:06Z kib $ 30 */ 31 32#include "opt_npx.h" 33 34#include <machine/asmacros.h> 35#include <machine/cputypes.h> 36#include <machine/intr_machdep.h> 37#include <machine/pmap.h> 38#include <machine/specialreg.h> 39 40#include "assym.s" 41 42#define IDXSHIFT 10 43 44 .data 45 .globl bcopy_vector 46bcopy_vector: 47 .long generic_bcopy 48 .globl bzero_vector 49bzero_vector: 50 .long generic_bzero 51 .globl copyin_vector 52copyin_vector: 53 .long generic_copyin 54 .globl copyout_vector 55copyout_vector: 56 .long generic_copyout 57#if defined(I586_CPU) && defined(DEV_NPX) 58kernel_fpu_lock: 59 .byte 0xfe 60 .space 3 61#endif 62 ALIGN_DATA 63 .globl intrcnt, eintrcnt 64intrcnt: 65 .space INTRCNT_COUNT * 4 66eintrcnt: 67 68 .globl intrnames, eintrnames 69intrnames: 70 .space INTRCNT_COUNT * (MAXCOMLEN + 1) 71eintrnames: 72 73 .text 74 75/* 76 * bcopy family 77 * void bzero(void *buf, u_int len) 78 */ 79 80ENTRY(bzero) 81 MEXITCOUNT 82 jmp *bzero_vector 83 84ENTRY(generic_bzero) 85 pushl %edi 86 movl 8(%esp),%edi 87 movl 12(%esp),%ecx 88 xorl %eax,%eax 89 shrl $2,%ecx 90 cld 91 rep 92 stosl 93 movl 12(%esp),%ecx 94 andl $3,%ecx 95 rep 96 stosb 97 popl %edi 98 ret 99 100#ifdef I486_CPU 101ENTRY(i486_bzero) 102 movl 4(%esp),%edx 103 movl 8(%esp),%ecx 104 xorl %eax,%eax 105/* 106 * do 64 byte chunks first 107 * 108 * XXX this is probably over-unrolled at least for DX2's 109 */ 1102: 111 cmpl $64,%ecx 112 jb 3f 113 movl %eax,(%edx) 114 movl %eax,4(%edx) 115 movl %eax,8(%edx) 116 movl %eax,12(%edx) 117 movl %eax,16(%edx) 118 movl %eax,20(%edx) 119 movl %eax,24(%edx) 120 movl %eax,28(%edx) 121 movl %eax,32(%edx) 122 movl %eax,36(%edx) 123 movl %eax,40(%edx) 124 movl %eax,44(%edx) 125 movl %eax,48(%edx) 126 movl %eax,52(%edx) 127 movl %eax,56(%edx) 128 movl %eax,60(%edx) 129 addl $64,%edx 130 subl $64,%ecx 131 jnz 2b 132 ret 133 134/* 135 * do 16 byte chunks 136 */ 137 SUPERALIGN_TEXT 1383: 139 cmpl $16,%ecx 140 jb 4f 141 movl %eax,(%edx) 142 movl %eax,4(%edx) 143 movl %eax,8(%edx) 144 movl %eax,12(%edx) 145 addl $16,%edx 146 subl $16,%ecx 147 jnz 3b 148 ret 149 150/* 151 * do 4 byte chunks 152 */ 153 SUPERALIGN_TEXT 1544: 155 cmpl $4,%ecx 156 jb 5f 157 movl %eax,(%edx) 158 addl $4,%edx 159 subl $4,%ecx 160 jnz 4b 161 ret 162 163/* 164 * do 1 byte chunks 165 * a jump table seems to be faster than a loop or more range reductions 166 * 167 * XXX need a const section for non-text 168 */ 169 .data 170jtab: 171 .long do0 172 .long do1 173 .long do2 174 .long do3 175 176 .text 177 SUPERALIGN_TEXT 1785: 179 jmp *jtab(,%ecx,4) 180 181 SUPERALIGN_TEXT 182do3: 183 movw %ax,(%edx) 184 movb %al,2(%edx) 185 ret 186 187 SUPERALIGN_TEXT 188do2: 189 movw %ax,(%edx) 190 ret 191 192 SUPERALIGN_TEXT 193do1: 194 movb %al,(%edx) 195 ret 196 197 SUPERALIGN_TEXT 198do0: 199 ret 200#endif 201 202#if defined(I586_CPU) && defined(DEV_NPX) 203ENTRY(i586_bzero) 204 movl 4(%esp),%edx 205 movl 8(%esp),%ecx 206 207 /* 208 * The FPU register method is twice as fast as the integer register 209 * method unless the target is in the L1 cache and we pre-allocate a 210 * cache line for it (then the integer register method is 4-5 times 211 * faster). However, we never pre-allocate cache lines, since that 212 * would make the integer method 25% or more slower for the common 213 * case when the target isn't in either the L1 cache or the L2 cache. 214 * Thus we normally use the FPU register method unless the overhead 215 * would be too large. 216 */ 217 cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ 218 jb intreg_i586_bzero 219 220 /* 221 * The FPU registers may belong to an application or to fastmove() 222 * or to another invocation of bcopy() or ourself in a higher level 223 * interrupt or trap handler. Preserving the registers is 224 * complicated since we avoid it if possible at all levels. We 225 * want to localize the complications even when that increases them. 226 * Here the extra work involves preserving CR0_TS in TS. 227 * `fpcurthread != NULL' is supposed to be the condition that all the 228 * FPU resources belong to an application, but fpcurthread and CR0_TS 229 * aren't set atomically enough for this condition to work in 230 * interrupt handlers. 231 * 232 * Case 1: FPU registers belong to the application: we must preserve 233 * the registers if we use them, so we only use the FPU register 234 * method if the target size is large enough to amortize the extra 235 * overhead for preserving them. CR0_TS must be preserved although 236 * it is very likely to end up as set. 237 * 238 * Case 2: FPU registers belong to fastmove(): fastmove() currently 239 * makes the registers look like they belong to an application so 240 * that cpu_switch() and savectx() don't have to know about it, so 241 * this case reduces to case 1. 242 * 243 * Case 3: FPU registers belong to the kernel: don't use the FPU 244 * register method. This case is unlikely, and supporting it would 245 * be more complicated and might take too much stack. 246 * 247 * Case 4: FPU registers don't belong to anyone: the FPU registers 248 * don't need to be preserved, so we always use the FPU register 249 * method. CR0_TS must be preserved although it is very likely to 250 * always end up as clear. 251 */ 252 cmpl $0,PCPU(FPCURTHREAD) 253 je i586_bz1 254 255 /* 256 * XXX don't use the FPU for cases 1 and 2, since preemptive 257 * scheduling of ithreads broke these cases. Note that we can 258 * no longer get here from an interrupt handler, since the 259 * context sitch to the interrupt handler will have saved the 260 * FPU state. 261 */ 262 jmp intreg_i586_bzero 263 264 cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ 265 jb intreg_i586_bzero 266 sarb $1,kernel_fpu_lock 267 jc intreg_i586_bzero 268 smsw %ax 269 clts 270 subl $108,%esp 271 fnsave 0(%esp) 272 jmp i586_bz2 273 274i586_bz1: 275 sarb $1,kernel_fpu_lock 276 jc intreg_i586_bzero 277 smsw %ax 278 clts 279 fninit /* XXX should avoid needing this */ 280i586_bz2: 281 fldz 282 283 /* 284 * Align to an 8 byte boundary (misalignment in the main loop would 285 * cost a factor of >= 2). Avoid jumps (at little cost if it is 286 * already aligned) by always zeroing 8 bytes and using the part up 287 * to the _next_ alignment position. 288 */ 289 fstl 0(%edx) 290 addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ 291 addl $8,%edx 292 andl $~7,%edx 293 subl %edx,%ecx 294 295 /* 296 * Similarly align `len' to a multiple of 8. 297 */ 298 fstl -8(%edx,%ecx) 299 decl %ecx 300 andl $~7,%ecx 301 302 /* 303 * This wouldn't be any faster if it were unrolled, since the loop 304 * control instructions are much faster than the fstl and/or done 305 * in parallel with it so their overhead is insignificant. 306 */ 307fpureg_i586_bzero_loop: 308 fstl 0(%edx) 309 addl $8,%edx 310 subl $8,%ecx 311 cmpl $8,%ecx 312 jae fpureg_i586_bzero_loop 313 314 cmpl $0,PCPU(FPCURTHREAD) 315 je i586_bz3 316 317 /* XXX check that the condition for cases 1-2 stayed false. */ 318i586_bzero_oops: 319 int $3 320 jmp i586_bzero_oops 321 322 frstor 0(%esp) 323 addl $108,%esp 324 lmsw %ax 325 movb $0xfe,kernel_fpu_lock 326 ret 327 328i586_bz3: 329 fstp %st(0) 330 lmsw %ax 331 movb $0xfe,kernel_fpu_lock 332 ret 333 334intreg_i586_bzero: 335 /* 336 * `rep stos' seems to be the best method in practice for small 337 * counts. Fancy methods usually take too long to start up due 338 * to cache and BTB misses. 339 */ 340 pushl %edi 341 movl %edx,%edi 342 xorl %eax,%eax 343 shrl $2,%ecx 344 cld 345 rep 346 stosl 347 movl 12(%esp),%ecx 348 andl $3,%ecx 349 jne 1f 350 popl %edi 351 ret 352 3531: 354 rep 355 stosb 356 popl %edi 357 ret 358#endif /* I586_CPU && defined(DEV_NPX) */ 359 360ENTRY(sse2_pagezero) 361 pushl %ebx 362 movl 8(%esp),%ecx 363 movl %ecx,%eax 364 addl $4096,%eax 365 xor %ebx,%ebx 3661: 367 movnti %ebx,(%ecx) 368 addl $4,%ecx 369 cmpl %ecx,%eax 370 jne 1b 371 sfence 372 popl %ebx 373 ret 374 375ENTRY(i686_pagezero) 376 pushl %edi 377 pushl %ebx 378 379 movl 12(%esp),%edi 380 movl $1024,%ecx 381 cld 382 383 ALIGN_TEXT 3841: 385 xorl %eax,%eax 386 repe 387 scasl 388 jnz 2f 389 390 popl %ebx 391 popl %edi 392 ret 393 394 ALIGN_TEXT 395 3962: 397 incl %ecx 398 subl $4,%edi 399 400 movl %ecx,%edx 401 cmpl $16,%ecx 402 403 jge 3f 404 405 movl %edi,%ebx 406 andl $0x3f,%ebx 407 shrl %ebx 408 shrl %ebx 409 movl $16,%ecx 410 subl %ebx,%ecx 411 4123: 413 subl %ecx,%edx 414 rep 415 stosl 416 417 movl %edx,%ecx 418 testl %edx,%edx 419 jnz 1b 420 421 popl %ebx 422 popl %edi 423 ret 424 425/* fillw(pat, base, cnt) */ 426ENTRY(fillw) 427 pushl %edi 428 movl 8(%esp),%eax 429 movl 12(%esp),%edi 430 movl 16(%esp),%ecx 431 cld 432 rep 433 stosw 434 popl %edi 435 ret 436 437ENTRY(bcopyb) 438 pushl %esi 439 pushl %edi 440 movl 12(%esp),%esi 441 movl 16(%esp),%edi 442 movl 20(%esp),%ecx 443 movl %edi,%eax 444 subl %esi,%eax 445 cmpl %ecx,%eax /* overlapping && src < dst? */ 446 jb 1f 447 cld /* nope, copy forwards */ 448 rep 449 movsb 450 popl %edi 451 popl %esi 452 ret 453 454 ALIGN_TEXT 4551: 456 addl %ecx,%edi /* copy backwards. */ 457 addl %ecx,%esi 458 decl %edi 459 decl %esi 460 std 461 rep 462 movsb 463 popl %edi 464 popl %esi 465 cld 466 ret 467 468ENTRY(bcopy) 469 MEXITCOUNT 470 jmp *bcopy_vector 471 472/* 473 * generic_bcopy(src, dst, cnt) 474 * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 475 */ 476ENTRY(generic_bcopy) 477 pushl %esi 478 pushl %edi 479 movl 12(%esp),%esi 480 movl 16(%esp),%edi 481 movl 20(%esp),%ecx 482 483 movl %edi,%eax 484 subl %esi,%eax 485 cmpl %ecx,%eax /* overlapping && src < dst? */ 486 jb 1f 487 488 shrl $2,%ecx /* copy by 32-bit words */ 489 cld /* nope, copy forwards */ 490 rep 491 movsl 492 movl 20(%esp),%ecx 493 andl $3,%ecx /* any bytes left? */ 494 rep 495 movsb 496 popl %edi 497 popl %esi 498 ret 499 500 ALIGN_TEXT 5011: 502 addl %ecx,%edi /* copy backwards */ 503 addl %ecx,%esi 504 decl %edi 505 decl %esi 506 andl $3,%ecx /* any fractional bytes? */ 507 std 508 rep 509 movsb 510 movl 20(%esp),%ecx /* copy remainder by 32-bit words */ 511 shrl $2,%ecx 512 subl $3,%esi 513 subl $3,%edi 514 rep 515 movsl 516 popl %edi 517 popl %esi 518 cld 519 ret 520 521#if defined(I586_CPU) && defined(DEV_NPX) 522ENTRY(i586_bcopy) 523 pushl %esi 524 pushl %edi 525 movl 12(%esp),%esi 526 movl 16(%esp),%edi 527 movl 20(%esp),%ecx 528 529 movl %edi,%eax 530 subl %esi,%eax 531 cmpl %ecx,%eax /* overlapping && src < dst? */ 532 jb 1f 533 534 cmpl $1024,%ecx 535 jb small_i586_bcopy 536 537 sarb $1,kernel_fpu_lock 538 jc small_i586_bcopy 539 cmpl $0,PCPU(FPCURTHREAD) 540 je i586_bc1 541 542 /* XXX turn off handling of cases 1-2, as above. */ 543 movb $0xfe,kernel_fpu_lock 544 jmp small_i586_bcopy 545 546 smsw %dx 547 clts 548 subl $108,%esp 549 fnsave 0(%esp) 550 jmp 4f 551 552i586_bc1: 553 smsw %dx 554 clts 555 fninit /* XXX should avoid needing this */ 556 557 ALIGN_TEXT 5584: 559 pushl %ecx 560#define DCACHE_SIZE 8192 561 cmpl $(DCACHE_SIZE-512)/2,%ecx 562 jbe 2f 563 movl $(DCACHE_SIZE-512)/2,%ecx 5642: 565 subl %ecx,0(%esp) 566 cmpl $256,%ecx 567 jb 5f /* XXX should prefetch if %ecx >= 32 */ 568 pushl %esi 569 pushl %ecx 570 ALIGN_TEXT 5713: 572 movl 0(%esi),%eax 573 movl 32(%esi),%eax 574 movl 64(%esi),%eax 575 movl 96(%esi),%eax 576 movl 128(%esi),%eax 577 movl 160(%esi),%eax 578 movl 192(%esi),%eax 579 movl 224(%esi),%eax 580 addl $256,%esi 581 subl $256,%ecx 582 cmpl $256,%ecx 583 jae 3b 584 popl %ecx 585 popl %esi 5865: 587 ALIGN_TEXT 588large_i586_bcopy_loop: 589 fildq 0(%esi) 590 fildq 8(%esi) 591 fildq 16(%esi) 592 fildq 24(%esi) 593 fildq 32(%esi) 594 fildq 40(%esi) 595 fildq 48(%esi) 596 fildq 56(%esi) 597 fistpq 56(%edi) 598 fistpq 48(%edi) 599 fistpq 40(%edi) 600 fistpq 32(%edi) 601 fistpq 24(%edi) 602 fistpq 16(%edi) 603 fistpq 8(%edi) 604 fistpq 0(%edi) 605 addl $64,%esi 606 addl $64,%edi 607 subl $64,%ecx 608 cmpl $64,%ecx 609 jae large_i586_bcopy_loop 610 popl %eax 611 addl %eax,%ecx 612 cmpl $64,%ecx 613 jae 4b 614 615 cmpl $0,PCPU(FPCURTHREAD) 616 je i586_bc2 617 618 /* XXX check that the condition for cases 1-2 stayed false. */ 619i586_bcopy_oops: 620 int $3 621 jmp i586_bcopy_oops 622 623 frstor 0(%esp) 624 addl $108,%esp 625i586_bc2: 626 lmsw %dx 627 movb $0xfe,kernel_fpu_lock 628 629/* 630 * This is a duplicate of the main part of generic_bcopy. See the comments 631 * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and 632 * would mess up high resolution profiling. 633 */ 634 ALIGN_TEXT 635small_i586_bcopy: 636 shrl $2,%ecx 637 cld 638 rep 639 movsl 640 movl 20(%esp),%ecx 641 andl $3,%ecx 642 rep 643 movsb 644 popl %edi 645 popl %esi 646 ret 647 648 ALIGN_TEXT 6491: 650 addl %ecx,%edi 651 addl %ecx,%esi 652 decl %edi 653 decl %esi 654 andl $3,%ecx 655 std 656 rep 657 movsb 658 movl 20(%esp),%ecx 659 shrl $2,%ecx 660 subl $3,%esi 661 subl $3,%edi 662 rep 663 movsl 664 popl %edi 665 popl %esi 666 cld 667 ret 668#endif /* I586_CPU && defined(DEV_NPX) */ 669 670/* 671 * Note: memcpy does not support overlapping copies 672 */ 673ENTRY(memcpy) 674 pushl %edi 675 pushl %esi 676 movl 12(%esp),%edi 677 movl 16(%esp),%esi 678 movl 20(%esp),%ecx 679 movl %edi,%eax 680 shrl $2,%ecx /* copy by 32-bit words */ 681 cld /* nope, copy forwards */ 682 rep 683 movsl 684 movl 20(%esp),%ecx 685 andl $3,%ecx /* any bytes left? */ 686 rep 687 movsb 688 popl %esi 689 popl %edi 690 ret 691 692 693/*****************************************************************************/ 694/* copyout and fubyte family */ 695/*****************************************************************************/ 696/* 697 * Access user memory from inside the kernel. These routines and possibly 698 * the math- and DOS emulators should be the only places that do this. 699 * 700 * We have to access the memory with user's permissions, so use a segment 701 * selector with RPL 3. For writes to user space we have to additionally 702 * check the PTE for write permission, because the 386 does not check 703 * write permissions when we are executing with EPL 0. The 486 does check 704 * this if the WP bit is set in CR0, so we can use a simpler version here. 705 * 706 * These routines set curpcb->onfault for the time they execute. When a 707 * protection violation occurs inside the functions, the trap handler 708 * returns to *curpcb->onfault instead of the function. 709 */ 710 711/* 712 * copyout(from_kernel, to_user, len) - MP SAFE 713 */ 714ENTRY(copyout) 715 MEXITCOUNT 716 jmp *copyout_vector 717 718ENTRY(generic_copyout) 719 movl PCPU(CURPCB),%eax 720 movl $copyout_fault,PCB_ONFAULT(%eax) 721 pushl %esi 722 pushl %edi 723 pushl %ebx 724 movl 16(%esp),%esi 725 movl 20(%esp),%edi 726 movl 24(%esp),%ebx 727 testl %ebx,%ebx /* anything to do? */ 728 jz done_copyout 729 730 /* 731 * Check explicitly for non-user addresses. If 486 write protection 732 * is being used, this check is essential because we are in kernel 733 * mode so the h/w does not provide any protection against writing 734 * kernel addresses. 735 */ 736 737 /* 738 * First, prevent address wrapping. 739 */ 740 movl %edi,%eax 741 addl %ebx,%eax 742 jc copyout_fault 743/* 744 * XXX STOP USING VM_MAXUSER_ADDRESS. 745 * It is an end address, not a max, so every time it is used correctly it 746 * looks like there is an off by one error, and of course it caused an off 747 * by one error in several places. 748 */ 749 cmpl $VM_MAXUSER_ADDRESS,%eax 750 ja copyout_fault 751 752 /* bcopy(%esi, %edi, %ebx) */ 753 movl %ebx,%ecx 754 755#if defined(I586_CPU) && defined(DEV_NPX) 756 ALIGN_TEXT 757slow_copyout: 758#endif 759 shrl $2,%ecx 760 cld 761 rep 762 movsl 763 movb %bl,%cl 764 andb $3,%cl 765 rep 766 movsb 767 768done_copyout: 769 popl %ebx 770 popl %edi 771 popl %esi 772 xorl %eax,%eax 773 movl PCPU(CURPCB),%edx 774 movl %eax,PCB_ONFAULT(%edx) 775 ret 776 777 ALIGN_TEXT 778copyout_fault: 779 popl %ebx 780 popl %edi 781 popl %esi 782 movl PCPU(CURPCB),%edx 783 movl $0,PCB_ONFAULT(%edx) 784 movl $EFAULT,%eax 785 ret 786 787#if defined(I586_CPU) && defined(DEV_NPX) 788ENTRY(i586_copyout) 789 /* 790 * Duplicated from generic_copyout. Could be done a bit better. 791 */ 792 movl PCPU(CURPCB),%eax 793 movl $copyout_fault,PCB_ONFAULT(%eax) 794 pushl %esi 795 pushl %edi 796 pushl %ebx 797 movl 16(%esp),%esi 798 movl 20(%esp),%edi 799 movl 24(%esp),%ebx 800 testl %ebx,%ebx /* anything to do? */ 801 jz done_copyout 802 803 /* 804 * Check explicitly for non-user addresses. If 486 write protection 805 * is being used, this check is essential because we are in kernel 806 * mode so the h/w does not provide any protection against writing 807 * kernel addresses. 808 */ 809 810 /* 811 * First, prevent address wrapping. 812 */ 813 movl %edi,%eax 814 addl %ebx,%eax 815 jc copyout_fault 816/* 817 * XXX STOP USING VM_MAXUSER_ADDRESS. 818 * It is an end address, not a max, so every time it is used correctly it 819 * looks like there is an off by one error, and of course it caused an off 820 * by one error in several places. 821 */ 822 cmpl $VM_MAXUSER_ADDRESS,%eax 823 ja copyout_fault 824 825 /* bcopy(%esi, %edi, %ebx) */ 8263: 827 movl %ebx,%ecx 828 /* 829 * End of duplicated code. 830 */ 831 832 cmpl $1024,%ecx 833 jb slow_copyout 834 835 pushl %ecx 836 call fastmove 837 addl $4,%esp 838 jmp done_copyout 839#endif /* I586_CPU && defined(DEV_NPX) */ 840 841/* 842 * copyin(from_user, to_kernel, len) - MP SAFE 843 */ 844ENTRY(copyin) 845 MEXITCOUNT 846 jmp *copyin_vector 847 848ENTRY(generic_copyin) 849 movl PCPU(CURPCB),%eax 850 movl $copyin_fault,PCB_ONFAULT(%eax) 851 pushl %esi 852 pushl %edi 853 movl 12(%esp),%esi /* caddr_t from */ 854 movl 16(%esp),%edi /* caddr_t to */ 855 movl 20(%esp),%ecx /* size_t len */ 856 857 /* 858 * make sure address is valid 859 */ 860 movl %esi,%edx 861 addl %ecx,%edx 862 jc copyin_fault 863 cmpl $VM_MAXUSER_ADDRESS,%edx 864 ja copyin_fault 865 866#if defined(I586_CPU) && defined(DEV_NPX) 867 ALIGN_TEXT 868slow_copyin: 869#endif 870 movb %cl,%al 871 shrl $2,%ecx /* copy longword-wise */ 872 cld 873 rep 874 movsl 875 movb %al,%cl 876 andb $3,%cl /* copy remaining bytes */ 877 rep 878 movsb 879 880#if defined(I586_CPU) && defined(DEV_NPX) 881 ALIGN_TEXT 882done_copyin: 883#endif 884 popl %edi 885 popl %esi 886 xorl %eax,%eax 887 movl PCPU(CURPCB),%edx 888 movl %eax,PCB_ONFAULT(%edx) 889 ret 890 891 ALIGN_TEXT 892copyin_fault: 893 popl %edi 894 popl %esi 895 movl PCPU(CURPCB),%edx 896 movl $0,PCB_ONFAULT(%edx) 897 movl $EFAULT,%eax 898 ret 899 900#if defined(I586_CPU) && defined(DEV_NPX) 901ENTRY(i586_copyin) 902 /* 903 * Duplicated from generic_copyin. Could be done a bit better. 904 */ 905 movl PCPU(CURPCB),%eax 906 movl $copyin_fault,PCB_ONFAULT(%eax) 907 pushl %esi 908 pushl %edi 909 movl 12(%esp),%esi /* caddr_t from */ 910 movl 16(%esp),%edi /* caddr_t to */ 911 movl 20(%esp),%ecx /* size_t len */ 912 913 /* 914 * make sure address is valid 915 */ 916 movl %esi,%edx 917 addl %ecx,%edx 918 jc copyin_fault 919 cmpl $VM_MAXUSER_ADDRESS,%edx 920 ja copyin_fault 921 /* 922 * End of duplicated code. 923 */ 924 925 cmpl $1024,%ecx 926 jb slow_copyin 927 928 pushl %ebx /* XXX prepare for fastmove_fault */ 929 pushl %ecx 930 call fastmove 931 addl $8,%esp 932 jmp done_copyin 933#endif /* I586_CPU && defined(DEV_NPX) */ 934 935#if defined(I586_CPU) && defined(DEV_NPX) 936/* fastmove(src, dst, len) 937 src in %esi 938 dst in %edi 939 len in %ecx XXX changed to on stack for profiling 940 uses %eax and %edx for tmp. storage 941 */ 942/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ 943ENTRY(fastmove) 944 pushl %ebp 945 movl %esp,%ebp 946 subl $PCB_SAVEFPU_SIZE+3*4,%esp 947 948 movl 8(%ebp),%ecx 949 cmpl $63,%ecx 950 jbe fastmove_tail 951 952 testl $7,%esi /* check if src addr is multiple of 8 */ 953 jnz fastmove_tail 954 955 testl $7,%edi /* check if dst addr is multiple of 8 */ 956 jnz fastmove_tail 957 958 /* XXX grab FPU context atomically. */ 959 cli 960 961/* if (fpcurthread != NULL) { */ 962 cmpl $0,PCPU(FPCURTHREAD) 963 je 6f 964/* fnsave(&curpcb->pcb_savefpu); */ 965 movl PCPU(CURPCB),%eax 966 fnsave PCB_SAVEFPU(%eax) 967/* FPCURTHREAD = NULL; */ 968 movl $0,PCPU(FPCURTHREAD) 969/* } */ 9706: 971/* now we own the FPU. */ 972 973/* 974 * The process' FP state is saved in the pcb, but if we get 975 * switched, the cpu_switch() will store our FP state in the 976 * pcb. It should be possible to avoid all the copying for 977 * this, e.g., by setting a flag to tell cpu_switch() to 978 * save the state somewhere else. 979 */ 980/* tmp = curpcb->pcb_savefpu; */ 981 movl %ecx,-12(%ebp) 982 movl %esi,-8(%ebp) 983 movl %edi,-4(%ebp) 984 movl %esp,%edi 985 movl PCPU(CURPCB),%esi 986 addl $PCB_SAVEFPU,%esi 987 cld 988 movl $PCB_SAVEFPU_SIZE>>2,%ecx 989 rep 990 movsl 991 movl -12(%ebp),%ecx 992 movl -8(%ebp),%esi 993 movl -4(%ebp),%edi 994/* stop_emulating(); */ 995 clts 996/* fpcurthread = curthread; */ 997 movl PCPU(CURTHREAD),%eax 998 movl %eax,PCPU(FPCURTHREAD) 999 movl PCPU(CURPCB),%eax 1000 1001 /* XXX end of atomic FPU context grab. */ 1002 sti 1003 1004 movl $fastmove_fault,PCB_ONFAULT(%eax) 10054: 1006 movl %ecx,-12(%ebp) 1007 cmpl $1792,%ecx 1008 jbe 2f 1009 movl $1792,%ecx 10102: 1011 subl %ecx,-12(%ebp) 1012 cmpl $256,%ecx 1013 jb 5f 1014 movl %ecx,-8(%ebp) 1015 movl %esi,-4(%ebp) 1016 ALIGN_TEXT 10173: 1018 movl 0(%esi),%eax 1019 movl 32(%esi),%eax 1020 movl 64(%esi),%eax 1021 movl 96(%esi),%eax 1022 movl 128(%esi),%eax 1023 movl 160(%esi),%eax 1024 movl 192(%esi),%eax 1025 movl 224(%esi),%eax 1026 addl $256,%esi 1027 subl $256,%ecx 1028 cmpl $256,%ecx 1029 jae 3b 1030 movl -8(%ebp),%ecx 1031 movl -4(%ebp),%esi 10325: 1033 ALIGN_TEXT 1034fastmove_loop: 1035 fildq 0(%esi) 1036 fildq 8(%esi) 1037 fildq 16(%esi) 1038 fildq 24(%esi) 1039 fildq 32(%esi) 1040 fildq 40(%esi) 1041 fildq 48(%esi) 1042 fildq 56(%esi) 1043 fistpq 56(%edi) 1044 fistpq 48(%edi) 1045 fistpq 40(%edi) 1046 fistpq 32(%edi) 1047 fistpq 24(%edi) 1048 fistpq 16(%edi) 1049 fistpq 8(%edi) 1050 fistpq 0(%edi) 1051 addl $-64,%ecx 1052 addl $64,%esi 1053 addl $64,%edi 1054 cmpl $63,%ecx 1055 ja fastmove_loop 1056 movl -12(%ebp),%eax 1057 addl %eax,%ecx 1058 cmpl $64,%ecx 1059 jae 4b 1060 1061 /* XXX ungrab FPU context atomically. */ 1062 cli 1063 1064/* curpcb->pcb_savefpu = tmp; */ 1065 movl %ecx,-12(%ebp) 1066 movl %esi,-8(%ebp) 1067 movl %edi,-4(%ebp) 1068 movl PCPU(CURPCB),%edi 1069 addl $PCB_SAVEFPU,%edi 1070 movl %esp,%esi 1071 cld 1072 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1073 rep 1074 movsl 1075 movl -12(%ebp),%ecx 1076 movl -8(%ebp),%esi 1077 movl -4(%ebp),%edi 1078 1079/* start_emulating(); */ 1080 smsw %ax 1081 orb $CR0_TS,%al 1082 lmsw %ax 1083/* fpcurthread = NULL; */ 1084 movl $0,PCPU(FPCURTHREAD) 1085 1086 /* XXX end of atomic FPU context ungrab. */ 1087 sti 1088 1089 ALIGN_TEXT 1090fastmove_tail: 1091 movl PCPU(CURPCB),%eax 1092 movl $fastmove_tail_fault,PCB_ONFAULT(%eax) 1093 1094 movb %cl,%al 1095 shrl $2,%ecx /* copy longword-wise */ 1096 cld 1097 rep 1098 movsl 1099 movb %al,%cl 1100 andb $3,%cl /* copy remaining bytes */ 1101 rep 1102 movsb 1103 1104 movl %ebp,%esp 1105 popl %ebp 1106 ret 1107 1108 ALIGN_TEXT 1109fastmove_fault: 1110 /* XXX ungrab FPU context atomically. */ 1111 cli 1112 1113 movl PCPU(CURPCB),%edi 1114 addl $PCB_SAVEFPU,%edi 1115 movl %esp,%esi 1116 cld 1117 movl $PCB_SAVEFPU_SIZE>>2,%ecx 1118 rep 1119 movsl 1120 1121 smsw %ax 1122 orb $CR0_TS,%al 1123 lmsw %ax 1124 movl $0,PCPU(FPCURTHREAD) 1125 1126 /* XXX end of atomic FPU context ungrab. */ 1127 sti 1128 1129fastmove_tail_fault: 1130 movl %ebp,%esp 1131 popl %ebp 1132 addl $8,%esp 1133 popl %ebx 1134 popl %edi 1135 popl %esi 1136 movl PCPU(CURPCB),%edx 1137 movl $0,PCB_ONFAULT(%edx) 1138 movl $EFAULT,%eax 1139 ret 1140#endif /* I586_CPU && defined(DEV_NPX) */ 1141 1142/* 1143 * casuword. Compare and set user word. Returns -1 or the current value. 1144 */ 1145 1146ALTENTRY(casuword32) 1147ENTRY(casuword) 1148 movl PCPU(CURPCB),%ecx 1149 movl $fusufault,PCB_ONFAULT(%ecx) 1150 movl 4(%esp),%edx /* dst */ 1151 movl 8(%esp),%eax /* old */ 1152 movl 12(%esp),%ecx /* new */ 1153 1154 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 1155 ja fusufault 1156 1157#ifdef SMP 1158 lock 1159#endif 1160 cmpxchgl %ecx,(%edx) /* Compare and set. */ 1161 1162 /* 1163 * The old value is in %eax. If the store succeeded it will be the 1164 * value we expected (old) from before the store, otherwise it will 1165 * be the current value. 1166 */ 1167 1168 movl PCPU(CURPCB),%ecx 1169 movl $fusufault,PCB_ONFAULT(%ecx) 1170 movl $0,PCB_ONFAULT(%ecx) 1171 ret 1172 1173/* 1174 * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user 1175 * memory. All these functions are MPSAFE. 1176 */ 1177 1178ALTENTRY(fuword32) 1179ENTRY(fuword) 1180 movl PCPU(CURPCB),%ecx 1181 movl $fusufault,PCB_ONFAULT(%ecx) 1182 movl 4(%esp),%edx /* from */ 1183 1184 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ 1185 ja fusufault 1186 1187 movl (%edx),%eax 1188 movl $0,PCB_ONFAULT(%ecx) 1189 ret 1190 1191/* 1192 * fuswintr() and suswintr() are specialized variants of fuword16() and 1193 * suword16(), respectively. They are called from the profiling code, 1194 * potentially at interrupt time. If they fail, that's okay; good things 1195 * will happen later. They always fail for now, until the trap code is 1196 * able to deal with this. 1197 */ 1198ALTENTRY(suswintr) 1199ENTRY(fuswintr) 1200 movl $-1,%eax 1201 ret 1202 1203ENTRY(fuword16) 1204 movl PCPU(CURPCB),%ecx 1205 movl $fusufault,PCB_ONFAULT(%ecx) 1206 movl 4(%esp),%edx 1207 1208 cmpl $VM_MAXUSER_ADDRESS-2,%edx 1209 ja fusufault 1210 1211 movzwl (%edx),%eax 1212 movl $0,PCB_ONFAULT(%ecx) 1213 ret 1214 1215ENTRY(fubyte) 1216 movl PCPU(CURPCB),%ecx 1217 movl $fusufault,PCB_ONFAULT(%ecx) 1218 movl 4(%esp),%edx 1219 1220 cmpl $VM_MAXUSER_ADDRESS-1,%edx 1221 ja fusufault 1222 1223 movzbl (%edx),%eax 1224 movl $0,PCB_ONFAULT(%ecx) 1225 ret 1226 1227 ALIGN_TEXT 1228fusufault: 1229 movl PCPU(CURPCB),%ecx 1230 xorl %eax,%eax 1231 movl %eax,PCB_ONFAULT(%ecx) 1232 decl %eax 1233 ret 1234 1235/* 1236 * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory. 1237 * All these functions are MPSAFE. 1238 */ 1239 1240ALTENTRY(suword32) 1241ENTRY(suword) 1242 movl PCPU(CURPCB),%ecx 1243 movl $fusufault,PCB_ONFAULT(%ecx) 1244 movl 4(%esp),%edx 1245 1246 cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ 1247 ja fusufault 1248 1249 movl 8(%esp),%eax 1250 movl %eax,(%edx) 1251 xorl %eax,%eax 1252 movl PCPU(CURPCB),%ecx 1253 movl %eax,PCB_ONFAULT(%ecx) 1254 ret 1255 1256ENTRY(suword16) 1257 movl PCPU(CURPCB),%ecx 1258 movl $fusufault,PCB_ONFAULT(%ecx) 1259 movl 4(%esp),%edx 1260 1261 cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ 1262 ja fusufault 1263 1264 movw 8(%esp),%ax 1265 movw %ax,(%edx) 1266 xorl %eax,%eax 1267 movl PCPU(CURPCB),%ecx /* restore trashed register */ 1268 movl %eax,PCB_ONFAULT(%ecx) 1269 ret 1270 1271ENTRY(subyte) 1272 movl PCPU(CURPCB),%ecx 1273 movl $fusufault,PCB_ONFAULT(%ecx) 1274 movl 4(%esp),%edx 1275 1276 cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ 1277 ja fusufault 1278 1279 movb 8(%esp),%al 1280 movb %al,(%edx) 1281 xorl %eax,%eax 1282 movl PCPU(CURPCB),%ecx /* restore trashed register */ 1283 movl %eax,PCB_ONFAULT(%ecx) 1284 ret 1285 1286/* 1287 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE 1288 * 1289 * copy a string from from to to, stop when a 0 character is reached. 1290 * return ENAMETOOLONG if string is longer than maxlen, and 1291 * EFAULT on protection violations. If lencopied is non-zero, 1292 * return the actual length in *lencopied. 1293 */ 1294ENTRY(copyinstr) 1295 pushl %esi 1296 pushl %edi 1297 movl PCPU(CURPCB),%ecx 1298 movl $cpystrflt,PCB_ONFAULT(%ecx) 1299 1300 movl 12(%esp),%esi /* %esi = from */ 1301 movl 16(%esp),%edi /* %edi = to */ 1302 movl 20(%esp),%edx /* %edx = maxlen */ 1303 1304 movl $VM_MAXUSER_ADDRESS,%eax 1305 1306 /* make sure 'from' is within bounds */ 1307 subl %esi,%eax 1308 jbe cpystrflt 1309 1310 /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ 1311 cmpl %edx,%eax 1312 jae 1f 1313 movl %eax,%edx 1314 movl %eax,20(%esp) 13151: 1316 incl %edx 1317 cld 1318 13192: 1320 decl %edx 1321 jz 3f 1322 1323 lodsb 1324 stosb 1325 orb %al,%al 1326 jnz 2b 1327 1328 /* Success -- 0 byte reached */ 1329 decl %edx 1330 xorl %eax,%eax 1331 jmp cpystrflt_x 13323: 1333 /* edx is zero - return ENAMETOOLONG or EFAULT */ 1334 cmpl $VM_MAXUSER_ADDRESS,%esi 1335 jae cpystrflt 13364: 1337 movl $ENAMETOOLONG,%eax 1338 jmp cpystrflt_x 1339 1340cpystrflt: 1341 movl $EFAULT,%eax 1342 1343cpystrflt_x: 1344 /* set *lencopied and return %eax */ 1345 movl PCPU(CURPCB),%ecx 1346 movl $0,PCB_ONFAULT(%ecx) 1347 movl 20(%esp),%ecx 1348 subl %edx,%ecx 1349 movl 24(%esp),%edx 1350 testl %edx,%edx 1351 jz 1f 1352 movl %ecx,(%edx) 13531: 1354 popl %edi 1355 popl %esi 1356 ret 1357 1358 1359/* 1360 * copystr(from, to, maxlen, int *lencopied) - MP SAFE 1361 */ 1362ENTRY(copystr) 1363 pushl %esi 1364 pushl %edi 1365 1366 movl 12(%esp),%esi /* %esi = from */ 1367 movl 16(%esp),%edi /* %edi = to */ 1368 movl 20(%esp),%edx /* %edx = maxlen */ 1369 incl %edx 1370 cld 13711: 1372 decl %edx 1373 jz 4f 1374 lodsb 1375 stosb 1376 orb %al,%al 1377 jnz 1b 1378 1379 /* Success -- 0 byte reached */ 1380 decl %edx 1381 xorl %eax,%eax 1382 jmp 6f 13834: 1384 /* edx is zero -- return ENAMETOOLONG */ 1385 movl $ENAMETOOLONG,%eax 1386 13876: 1388 /* set *lencopied and return %eax */ 1389 movl 20(%esp),%ecx 1390 subl %edx,%ecx 1391 movl 24(%esp),%edx 1392 testl %edx,%edx 1393 jz 7f 1394 movl %ecx,(%edx) 13957: 1396 popl %edi 1397 popl %esi 1398 ret 1399 1400ENTRY(bcmp) 1401 pushl %edi 1402 pushl %esi 1403 movl 12(%esp),%edi 1404 movl 16(%esp),%esi 1405 movl 20(%esp),%edx 1406 1407 movl %edx,%ecx 1408 shrl $2,%ecx 1409 cld /* compare forwards */ 1410 repe 1411 cmpsl 1412 jne 1f 1413 1414 movl %edx,%ecx 1415 andl $3,%ecx 1416 repe 1417 cmpsb 14181: 1419 setne %al 1420 movsbl %al,%eax 1421 popl %esi 1422 popl %edi 1423 ret 1424 1425 1426/* 1427 * Handling of special 386 registers and descriptor tables etc 1428 */ 1429/* void lgdt(struct region_descriptor *rdp); */ 1430ENTRY(lgdt) 1431 /* reload the descriptor table */ 1432 movl 4(%esp),%eax 1433 lgdt (%eax) 1434 1435 /* flush the prefetch q */ 1436 jmp 1f 1437 nop 14381: 1439 /* reload "stale" selectors */ 1440 movl $KDSEL,%eax 1441 movl %eax,%ds 1442 movl %eax,%es 1443 movl %eax,%gs 1444 movl %eax,%ss 1445 movl $KPSEL,%eax 1446 movl %eax,%fs 1447 1448 /* reload code selector by turning return into intersegmental return */ 1449 movl (%esp),%eax 1450 pushl %eax 1451 movl $KCSEL,4(%esp) 1452 MEXITCOUNT 1453 lret 1454 1455/* ssdtosd(*ssdp,*sdp) */ 1456ENTRY(ssdtosd) 1457 pushl %ebx 1458 movl 8(%esp),%ecx 1459 movl 8(%ecx),%ebx 1460 shll $16,%ebx 1461 movl (%ecx),%edx 1462 roll $16,%edx 1463 movb %dh,%bl 1464 movb %dl,%bh 1465 rorl $8,%ebx 1466 movl 4(%ecx),%eax 1467 movw %ax,%dx 1468 andl $0xf0000,%eax 1469 orl %eax,%ebx 1470 movl 12(%esp),%ecx 1471 movl %edx,(%ecx) 1472 movl %ebx,4(%ecx) 1473 popl %ebx 1474 ret 1475 1476/* void reset_dbregs() */ 1477ENTRY(reset_dbregs) 1478 movl $0,%eax 1479 movl %eax,%dr7 /* disable all breapoints first */ 1480 movl %eax,%dr0 1481 movl %eax,%dr1 1482 movl %eax,%dr2 1483 movl %eax,%dr3 1484 movl %eax,%dr6 1485 ret 1486 1487/*****************************************************************************/ 1488/* setjump, longjump */ 1489/*****************************************************************************/ 1490 1491ENTRY(setjmp) 1492 movl 4(%esp),%eax 1493 movl %ebx,(%eax) /* save ebx */ 1494 movl %esp,4(%eax) /* save esp */ 1495 movl %ebp,8(%eax) /* save ebp */ 1496 movl %esi,12(%eax) /* save esi */ 1497 movl %edi,16(%eax) /* save edi */ 1498 movl (%esp),%edx /* get rta */ 1499 movl %edx,20(%eax) /* save eip */ 1500 xorl %eax,%eax /* return(0); */ 1501 ret 1502 1503ENTRY(longjmp) 1504 movl 4(%esp),%eax 1505 movl (%eax),%ebx /* restore ebx */ 1506 movl 4(%eax),%esp /* restore esp */ 1507 movl 8(%eax),%ebp /* restore ebp */ 1508 movl 12(%eax),%esi /* restore esi */ 1509 movl 16(%eax),%edi /* restore edi */ 1510 movl 20(%eax),%edx /* get rta */ 1511 movl %edx,(%esp) /* put in return frame */ 1512 xorl %eax,%eax /* return(1); */ 1513 incl %eax 1514 ret 1515 1516/* 1517 * Support for BB-profiling (gcc -a). The kernbb program will extract 1518 * the data from the kernel. 1519 */ 1520 1521 .data 1522 ALIGN_DATA 1523 .globl bbhead 1524bbhead: 1525 .long 0 1526 1527 .text 1528NON_GPROF_ENTRY(__bb_init_func) 1529 movl 4(%esp),%eax 1530 movl $1,(%eax) 1531 movl bbhead,%edx 1532 movl %edx,16(%eax) 1533 movl %eax,bbhead 1534 NON_GPROF_RET 1535